PyPI - mlrun - Versions diffs - 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show

mlrun/__init__.py +26 -22
mlrun/__main__.py +15 -16
mlrun/alerts/alert.py +150 -15
mlrun/api/schemas/__init__.py +1 -9
mlrun/artifacts/__init__.py +2 -3
mlrun/artifacts/base.py +62 -19
mlrun/artifacts/dataset.py +17 -17
mlrun/artifacts/document.py +454 -0
mlrun/artifacts/manager.py +28 -18
mlrun/artifacts/model.py +91 -59
mlrun/artifacts/plots.py +2 -2
mlrun/common/constants.py +8 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -1
mlrun/common/formatters/feature_set.py +2 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +12 -62
mlrun/common/runtimes/constants.py +25 -4
mlrun/common/schemas/__init__.py +9 -5
mlrun/common/schemas/alert.py +114 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +22 -9
mlrun/common/schemas/auth.py +8 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +4 -4
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +4 -8
mlrun/common/schemas/model_monitoring/constants.py +127 -46
mlrun/common/schemas/model_monitoring/grafana.py +18 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +142 -0
mlrun/common/schemas/pipeline.py +3 -3
mlrun/common/schemas/project.py +26 -18
mlrun/common/schemas/runs.py +3 -3
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +6 -5
mlrun/common/types.py +1 -0
mlrun/config.py +157 -89
mlrun/data_types/__init__.py +5 -3
mlrun/data_types/infer.py +13 -3
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +59 -18
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +19 -24
mlrun/datastore/datastore.py +10 -4
mlrun/datastore/datastore_profile.py +178 -45
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +14 -3
mlrun/datastore/sources.py +89 -92
mlrun/datastore/store_resources.py +7 -4
mlrun/datastore/storeytargets.py +51 -16
mlrun/datastore/targets.py +38 -31
mlrun/datastore/utils.py +87 -4
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +291 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +286 -100
mlrun/db/httpdb.py +1562 -490
mlrun/db/nopdb.py +250 -83
mlrun/errors.py +6 -2
mlrun/execution.py +194 -50
mlrun/feature_store/__init__.py +2 -10
mlrun/feature_store/api.py +20 -458
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +105 -479
mlrun/feature_store/feature_vector_utils.py +466 -0
mlrun/feature_store/retrieval/base.py +15 -11
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/retrieval/storey_merger.py +1 -1
mlrun/feature_store/steps.py +3 -3
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +31 -31
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/k8s_utils.py +2 -5
mlrun/launcher/base.py +3 -4
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +6 -2
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +8 -4
mlrun/model.py +132 -46
mlrun/model_monitoring/__init__.py +3 -5
mlrun/model_monitoring/api.py +113 -98
mlrun/model_monitoring/applications/__init__.py +0 -5
mlrun/model_monitoring/applications/_application_steps.py +81 -50
mlrun/model_monitoring/applications/base.py +467 -14
mlrun/model_monitoring/applications/context.py +212 -134
mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
mlrun/model_monitoring/applications/evidently/base.py +146 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
mlrun/model_monitoring/applications/results.py +67 -15
mlrun/model_monitoring/controller.py +701 -315
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +242 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
mlrun/model_monitoring/db/tsdb/base.py +243 -49
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
mlrun/model_monitoring/helpers.py +356 -114
mlrun/model_monitoring/stream_processing.py +190 -345
mlrun/model_monitoring/tracking_policy.py +11 -4
mlrun/model_monitoring/writer.py +49 -90
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +2 -2
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +35 -32
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +30 -30
mlrun/projects/pipelines.py +116 -47
mlrun/projects/project.py +1292 -329
mlrun/render.py +5 -9
mlrun/run.py +57 -14
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +30 -22
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
mlrun/runtimes/function_reference.py +5 -2
mlrun/runtimes/generators.py +3 -2
mlrun/runtimes/kubejob.py +6 -7
mlrun/runtimes/mounts.py +574 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -13
mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
mlrun/runtimes/nuclio/function.py +127 -70
mlrun/runtimes/nuclio/serving.py +105 -37
mlrun/runtimes/pod.py +159 -54
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +22 -12
mlrun/runtimes/utils.py +7 -6
mlrun/secrets.py +2 -2
mlrun/serving/__init__.py +8 -0
mlrun/serving/merger.py +7 -5
mlrun/serving/remote.py +35 -22
mlrun/serving/routers.py +186 -240
mlrun/serving/server.py +41 -10
mlrun/serving/states.py +432 -118
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +161 -203
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +35 -22
mlrun/utils/clones.py +7 -4
mlrun/utils/helpers.py +511 -58
mlrun/utils/logger.py +119 -13
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +39 -15
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +16 -8
mlrun/utils/notifications/notification/webhook.py +24 -8
mlrun/utils/notifications/notification_pusher.py +191 -200
mlrun/utils/regex.py +12 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
mlrun-1.8.0.dist-info/RECORD +351 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/applications/evidently_base.py +0 -137
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.2rc3.dist-info/RECORD +0 -351
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0

mlrun/config.py CHANGED Viewed

@@ -30,7 +30,6 @@ import typing
 import warnings
 from collections.abc import Mapping
 from datetime import timedelta
-from distutils.util import strtobool
 from os.path import expanduser
 from threading import Lock
@@ -83,8 +82,8 @@ default_config = {
     "images_to_enrich_registry": "^mlrun/*,python:3.9",
     "kfp_url": "",
     "kfp_ttl": "14400",  # KFP ttl in sec, after that completed PODs will be deleted
-    "kfp_image": "mlrun/mlrun",  # image to use for KFP runner (defaults to mlrun/mlrun)
-    "dask_kfp_image": "mlrun/ml-base",  # image to use for dask KFP runner (defaults to mlrun/ml-base)
+    "kfp_image": "mlrun/mlrun-kfp",  # image to use for KFP runner
+    "dask_kfp_image": "mlrun/ml-base",  # image to use for dask KFP runner
     "igz_version": "",  # the version of the iguazio system the API is running on
     "iguazio_api_url": "",  # the url to iguazio api
     "spark_app_image": "",  # image to use for spark operator app runtime
@@ -102,7 +101,10 @@ default_config = {
     "log_level": "INFO",
     # log formatter (options: human | human_extended | json)
     "log_formatter": "human",
-    "submit_timeout": "180",  # timeout when submitting a new k8s resource
+    # custom logger format, workes only with log_formatter: custom
+    # Note that your custom format must include those 4 fields - timestamp, level, message and more
+    "log_format_override": None,
+    "submit_timeout": "280",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
     "runtimes_cleanup_interval": "300",
     "monitoring": {
@@ -120,14 +122,6 @@ default_config = {
         "projects": {
             "summaries": {
                 "cache_interval": "30",
-                "feature_gates": {
-                    "artifacts": "enabled",
-                    "schedules": "enabled",
-                    "feature_sets": "enabled",
-                    "models": "enabled",
-                    "runs": "enabled",
-                    "pipelines": "enabled",
-                },
             },
         },
     },
@@ -140,6 +134,12 @@ default_config = {
             "delete_crd_resources_timeout": "5 minutes",
         },
     },
+    "object_retentions": {
+        "alert_activations": 14 * 7,  # days
+    },
+    # A safety margin to account for delays
+    # This ensures that extra partitions are available beyond the specified retention period
+    "partitions_buffer_multiplier": 3,
     # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
     # before deleting them (4 hours)
     "runtime_resources_deletion_grace_period": "14400",
@@ -159,6 +159,7 @@ default_config = {
         # migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
         # migration progress.
         "artifact_migration_batch_size": 200,
+        "artifact_migration_v9_batch_size": 30000,
         "artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
         "datasets": {
             "max_preview_columns": 100,
@@ -167,6 +168,7 @@ default_config = {
             "max_chunk_size": 1024 * 1024 * 1,  # 1MB
             "max_preview_size": 1024 * 1024 * 10,  # 10MB
             "max_download_size": 1024 * 1024 * 100,  # 100MB
+            "max_deletions": 200,
         },
     },
     # FIXME: Adding these defaults here so we won't need to patch the "installing component" (provazio-controller) to
@@ -229,8 +231,11 @@ default_config = {
                 "abort_grace_period": "10",
                 "delete_project": "900",
                 "delete_function": "900",
+                "model_endpoint_creation": "600",
+                "model_endpoint_tsdb_leftovers": "900",
             },
             "runtimes": {"dask": "600"},
+            "push_notifications": "60",
         },
     },
     "function": {
@@ -262,6 +267,7 @@ default_config = {
             # When the module is reloaded, the maximum depth recursion configuration for the recursive reload
             # function is used to prevent infinite loop
             "reload_max_recursion_depth": 100,
+            "source_code_max_bytes": 10000,
         },
         "databricks": {
             "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
@@ -314,7 +320,7 @@ default_config = {
                 },
                 "request_timeout": 45,  # seconds
             },
-            # see server.api.utils.helpers.ensure_running_on_chief
+            # see server.py.services.api.utils.helpers.ensure_running_on_chief
             "ensure_function_running_on_chief_mode": "enabled",
         },
         "port": 8080,
@@ -480,6 +486,10 @@ default_config = {
             "iguazio_client_job_cache_ttl": "20 minutes",
             "nuclio_project_deletion_verification_timeout": "300 seconds",
             "nuclio_project_deletion_verification_interval": "5 seconds",
+            "summaries": {
+                # Number of days back to include when calculating the project pipeline summary.
+                "list_pipelines_time_period_in_days": 7,
+            },
         },
         # The API needs to know what is its k8s svc url so it could enrich it in the jobs it creates
         "api_url": "",
@@ -532,7 +542,9 @@ default_config = {
             "verbose": True,
         },
         "pagination": {
-            "default_page_size": 20,
+            "default_page_size": 200,
+            "page_limit": 1000000,
+            "page_size_limit": 1000000,
             "pagination_cache": {
                 "interval": 60,
                 "ttl": 3600,
@@ -541,6 +553,10 @@ default_config = {
         },
     },
     "model_endpoint_monitoring": {
+        # Scaling Rule
+        # The fundamental scaling rule to maintain is: Shards/Partitions = Replicas * Workers
+        # In other words, the number of shards (V3IO) or partitions (Kafka) must be equal to the
+        # total number of worker processes across all pods.
         "serving_stream": {
             "v3io": {
                 "shard_count": 2,
@@ -559,33 +575,49 @@ default_config = {
         },
         "application_stream_args": {
             "v3io": {
-                "shard_count": 1,
+                "shard_count": 4,
                 "retention_period_hours": 24,
-                "num_workers": 1,
+                "num_workers": 4,
                 "min_replicas": 1,
                 "max_replicas": 1,
             },
             "kafka": {
-                "partition_count": 1,
+                "partition_count": 4,
                 "replication_factor": 1,
-                "num_workers": 1,
+                "num_workers": 4,
                 "min_replicas": 1,
                 "max_replicas": 1,
             },
         },
         "writer_stream_args": {
             "v3io": {
-                "shard_count": 1,
+                "shard_count": 4,
                 "retention_period_hours": 24,
-                "num_workers": 1,
+                "num_workers": 4,
                 "min_replicas": 1,
                 "max_replicas": 1,
             },
             "kafka": {
-                "partition_count": 1,
+                "partition_count": 4,
                 # TODO: add retention period configuration
                 "replication_factor": 1,
-                "num_workers": 1,
+                "num_workers": 4,
+                "min_replicas": 1,
+                "max_replicas": 1,
+            },
+        },
+        "controller_stream_args": {
+            "v3io": {
+                "shard_count": 10,
+                "retention_period_hours": 24,
+                "num_workers": 10,
+                "min_replicas": 1,
+                "max_replicas": 1,
+            },
+            "kafka": {
+                "partition_count": 10,
+                "replication_factor": 1,
+                "num_workers": 10,
                 "min_replicas": 1,
                 "max_replicas": 1,
             },
@@ -600,22 +632,8 @@ default_config = {
         # Offline storage path can be either relative or a full path. This path is used for general offline data
         # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
         "offline_storage_path": "model-endpoints/{kind}",
-        # Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
-        # when the user is working in CE environment and has not provided any stream path.
-        "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
-        "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
-        "endpoint_store_connection": "",
-        # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
-        "tsdb_connection": "",
-        # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
-        "stream_connection": "",
-        "tdengine": {
-            "timeout": 10,
-            "retries": 1,
-        },
     },
     "secret_stores": {
         # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -644,7 +662,7 @@ default_config = {
             "auto_add_project_secrets": True,
             "project_secret_name": "mlrun-project-secrets-{project}",
             "auth_secret_name": "mlrun-auth-secrets.{hashed_access_key}",
-            "env_variable_prefix": "MLRUN_K8S_SECRET__",
+            "env_variable_prefix": "",
             "global_function_env_secret_name": None,
         },
     },
@@ -729,6 +747,7 @@ default_config = {
     },
     "workflows": {
         "default_workflow_runner_name": "workflow-runner-{}",
+        "concurrent_delete_worker_count": 20,
         # Default timeout seconds for retrieving workflow id after execution
         # Remote workflow timeout is the maximum between remote and the inner engine timeout
         "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
@@ -794,17 +813,44 @@ default_config = {
     "grafana_url": "",
     "alerts": {
         # supported modes: "enabled", "disabled".
-        "mode": "disabled",
+        "mode": "enabled",
         # maximum number of alerts we allow to be configured.
         # user will get an error when exceeding this
-        "max_allowed": 10000,
+        "max_allowed": 20000,
         # maximum allowed value for count in criteria field inside AlertConfig
         "max_criteria_count": 100,
+        # interval for periodic events generation job
+        "events_generation_interval": 30,  # seconds
+        # number of alerts to delete in each chunk
+        "chunk_size_during_project_deletion": 100,
+        # maximum allowed alert config cache size in alert's CRUD
+        # for the best performance, it is recommended to set this value to the maximum number of alerts
+        "max_allowed_cache_size": 20000,
+        # default limit for listing alert configs
+        "default_list_alert_configs_limit": 2000,
     },
     "auth_with_client_id": {
         "enabled": False,
         "request_timeout": 5,
     },
+    "services": {
+        # The running service name. One of: "api", "alerts"
+        "service_name": "api",
+        "hydra": {
+            # Comma separated list of services to run on the instance.
+            # Currently, this is only considered when the service_name is "api".
+            # "*" starts all services on the same instance,
+            # other options are considered as running only the api service.
+            "services": "*",
+        },
+    },
+    "notifications": {
+        "smtp": {
+            "config_secret_name": "mlrun-smtp-config",
+            "refresh_interval": "30",
+        }
+    },
+    "system_id": "",
 }
 _is_running_as_api = None
@@ -837,6 +883,14 @@ class Config:
             return self.__class__(val)
         return val
+    def __deepcopy__(self, memo):
+        cls = self.__class__
+        # create a new Config without calling __init__ (avoid recursion)
+        result = cls.__new__(cls)
+        # manually deep-copy _cfg
+        object.__setattr__(result, "_cfg", copy.deepcopy(self._cfg, memo))
+        return result
     def __setattr__(self, attr, value):
         # in order for the dbpath setter to work
         if attr == "dbpath":
@@ -851,6 +905,22 @@ class Config:
         name = self.__class__.__name__
         return f"{name}({self._cfg!r})"
+    def __iter__(self):
+        if isinstance(self._cfg, Mapping):
+            return self._cfg.__iter__()
+    def items(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self._cfg.items())
+    def keys(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.keys())
+    def values(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.values())
     def update(self, cfg, skip_errors=False):
         for key, value in cfg.items():
             if hasattr(self, key):
@@ -1043,6 +1113,17 @@ class Config:
                 f"is not allowed for iguazio version: {igz_version} < 3.5.1"
             )
+    def validate_object_retentions(self):
+        for table_name, retention_days in self.object_retentions.items():
+            if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be greater than a week"
+                )
+            elif retention_days > 53 * 7:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be less than a year"
+                )
     def resolve_chief_api_url(self) -> str:
         if self.httpdb.clusterization.chief.url:
             return self.httpdb.clusterization.chief.url
@@ -1201,9 +1282,9 @@ class Config:
     def get_model_monitoring_file_target_path(
         self,
-        project: str = "",
-        kind: str = "",
-        target: str = "online",
+        project: str,
+        kind: str,
+        target: typing.Literal["online", "offline"] = "online",
         artifact_path: typing.Optional[str] = None,
         function_name: typing.Optional[str] = None,
         **kwargs,
@@ -1237,24 +1318,39 @@ class Config:
                 function_name
                 and function_name
                 != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
+                and function_name
+                != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
             ):
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
                     project=project,
                     kind=kind
                     if function_name is None
-                    else f"{kind}-{function_name.lower()}",
+                    else f"{kind}-{function_name.lower()}-v1",
                 )
-            elif kind == "stream":
+            elif (
+                kind == "stream"
+                and function_name
+                != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
+            ):
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
                     project=project,
-                    kind=kind,
+                    kind=f"{kind}-v1",
                 )
-            else:
+            elif (
+                function_name
+                == mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
+                and kind == "stream"
+            ):
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
                     project=project,
-                    kind=kind,
+                    kind=f"{kind}-{function_name.lower()}-v1",
                 )
+            return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
+                project=project,
+                kind=kind,
+            )
         # Get the current offline path from the configuration
         file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
             project=project, kind=kind
@@ -1282,35 +1378,6 @@ class Config:
             ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
         )
-    def get_s3_storage_options(self) -> dict[str, typing.Any]:
-        """
-        Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
-        graph uses this method for generating the storage options for S3 parquet target path.
-        :return: A storage options dictionary in which each key-value pair  represents a particular configuration,
-        such as endpoint_url or aws access key.
-        """
-        key = mlrun.get_secret_or_env("AWS_ACCESS_KEY_ID")
-        secret = mlrun.get_secret_or_env("AWS_SECRET_ACCESS_KEY")
-        force_non_anonymous = mlrun.get_secret_or_env("S3_NON_ANONYMOUS")
-        profile = mlrun.get_secret_or_env("AWS_PROFILE")
-        storage_options = dict(
-            anon=not (force_non_anonymous or (key and secret)),
-            key=key,
-            secret=secret,
-        )
-        endpoint_url = mlrun.get_secret_or_env("S3_ENDPOINT_URL")
-        if endpoint_url:
-            client_kwargs = {"endpoint_url": endpoint_url}
-            storage_options["client_kwargs"] = client_kwargs
-        if profile:
-            storage_options["profile"] = profile
-        return storage_options
     def is_explicit_ack_enabled(self) -> bool:
         return self.httpdb.nuclio.explicit_ack == "enabled" and (
             not self.nuclio_version
@@ -1381,9 +1448,12 @@ def _validate_config(config):
         pass
     config.verify_security_context_enrichment_mode_is_allowed()
+    config.validate_object_retentions()
-def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
+def _verify_gpu_requests_and_limits(
+    requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
+):
     # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
     if requests_gpu and not limits_gpu:
         raise mlrun.errors.MLRunConflictError(
@@ -1396,7 +1466,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
         )
-def _convert_resources_to_str(config: dict = None):
+def _convert_resources_to_str(config: typing.Optional[dict] = None):
     resources_types = ["cpu", "memory", "gpu"]
     resource_requirements = ["requests", "limits"]
     if not config.get("default_function_pod_resources"):
@@ -1414,17 +1484,6 @@ def _convert_resources_to_str(config: dict = None):
             resource_requirement[resource_type] = str(value)
-def _convert_str(value, typ):
-    if typ in (str, _none_type):
-        return value
-    if typ is bool:
-        return strtobool(value)
-    # e.g. int('8080') → 8080
-    return typ(value)
 def _configure_ssl_verification(verify_ssl: bool) -> None:
     """Configure SSL verification warnings based on the setting."""
     if not verify_ssl:
@@ -1532,6 +1591,15 @@ def read_env(env=None, prefix=env_prefix):
     # The default function pod resource values are of type str; however, when reading from environment variable numbers,
     # it converts them to type int if contains only number, so we want to convert them to str.
     _convert_resources_to_str(config)
+    # If the environment variable MLRUN_HTTPDB__HTTP__VERIFY is set, we ensure SSL verification settings take precedence
+    # by moving the 'httpdb' configuration to the beginning of the config dictionary.
+    # This ensures that SSL verification is applied before other settings.
+    if "MLRUN_HTTPDB__HTTP__VERIFY" in env:
+        httpdb = config.pop("httpdb", None)
+        if httpdb:
+            config = {"httpdb": httpdb, **config}
     return config

mlrun/data_types/__init__.py CHANGED Viewed

@@ -11,8 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 from .data_types import (
     InferOptions,
@@ -29,8 +27,12 @@ class BaseDataInfer:
     get_stats = None
+def is_spark_dataframe(df) -> bool:
+    return "rdd" in dir(df)
 def get_infer_interface(df) -> BaseDataInfer:
-    if hasattr(df, "rdd"):
+    if is_spark_dataframe(df):
         from .spark import SparkDataInfer
         return SparkDataInfer

mlrun/data_types/infer.py CHANGED Viewed

@@ -12,12 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import Optional
 import numpy as np
 import packaging.version
 import pandas as pd
 import pyarrow
 from pandas.io.json._table_schema import convert_pandas_type_to_json_field
+import mlrun.features
+from mlrun.model import ObjectList
 from mlrun.utils import logger
 from .data_types import InferOptions, pa_type_to_value_type, pd_schema_to_value_type
@@ -27,17 +31,19 @@ default_num_bins = 20
 def infer_schema_from_df(
     df: pd.DataFrame,
-    features,
+    features: ObjectList,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
+    push_at_start: Optional[bool] = False,
 ):
     """infer feature set schema from dataframe"""
     timestamp_fields = []
     current_entities = list(entities.keys())
     entity_columns = entity_columns or []
     index_columns = dict()
+    temp_features = ObjectList(mlrun.features.Feature)
     def upsert_entity(name, value_type):
         if name in current_entities:
@@ -72,10 +78,14 @@ def infer_schema_from_df(
             if column in features.keys():
                 features[column].value_type = value_type
             else:
-                features[column] = {"name": column, "value_type": value_type}
+                temp_features[column] = {"name": column, "value_type": value_type}
         if value_type == "datetime" and not is_entity:
             timestamp_fields.append(column)
+    features.update_list(
+        object_list=temp_features, push_at_start=push_at_start
+    )  # Push to start of the Object list
     index_type = None
     if InferOptions.get_common_options(options, InferOptions.Index):
         # infer types of index fields

mlrun/data_types/spark.py CHANGED Viewed

@@ -14,6 +14,7 @@
 #
 from datetime import datetime
 from os import environ
+from typing import Optional
 import numpy as np
 import pytz
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
     df,
     features,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
 ):

mlrun/datastore/__init__.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 __all__ = [
     "DataItem",
     "get_store_resource",
@@ -32,11 +30,21 @@ __all__ = [
     "DatabricksFileSystemDisableCache",
     "DatabricksFileBugFixed",
     "get_stream_pusher",
+    "ConfigProfile",
+    "VectorStoreCollection",
 ]
+from urllib.parse import urlparse
 import fsspec
+from mergedeep import merge
 import mlrun.datastore.wasbfs
+from mlrun.datastore.datastore_profile import (
+    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaTarget,
+    DatastoreProfileV3io,
+)
 from mlrun.platforms.iguazio import (
     HTTPOutputStream,
     KafkaOutputStream,
@@ -106,23 +114,56 @@ def get_stream_pusher(stream_path: str, **kwargs):
     :param stream_path:        path/url of stream
     """
-    kafka_brokers = get_kafka_brokers_from_dict(kwargs)
-    if stream_path.startswith("kafka://") or kafka_brokers:
-        topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
-        return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
-    elif stream_path.startswith("http://") or stream_path.startswith("https://"):
-        return HTTPOutputStream(stream_path=stream_path)
-    elif "://" not in stream_path:
-        return OutputStream(stream_path, **kwargs)
-    elif stream_path.startswith("v3io"):
-        endpoint, stream_path = parse_path(stream_path)
-        endpoint = kwargs.pop("endpoint", None) or endpoint
-        return OutputStream(stream_path, endpoint=endpoint, **kwargs)
-    elif stream_path.startswith("dummy://"):
-        return _DummyStream(**kwargs)
+    if stream_path.startswith("ds://"):
+        datastore_profile = mlrun.datastore.datastore_profile.datastore_profile_read(
+            stream_path
+        )
+        if isinstance(
+            datastore_profile,
+            (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
+        ):
+            attributes = datastore_profile.attributes()
+            brokers = attributes.pop("brokers", None)
+            # Override the topic with the one in the url (if any)
+            parsed_url = urlparse(stream_path)
+            topic = (
+                parsed_url.path.strip("/")
+                if parsed_url.path
+                else datastore_profile.get_topic()
+            )
+            producer_options = mlrun.datastore.utils.KafkaParameters(
+                attributes
+            ).producer()
+            return KafkaOutputStream(topic, brokers, producer_options=producer_options)
+        elif isinstance(datastore_profile, DatastoreProfileV3io):
+            parsed_url = urlparse(stream_path)
+            stream_path = datastore_profile.url(parsed_url.path)
+            endpoint, stream_path = parse_path(stream_path)
+            return OutputStream(stream_path, endpoint=endpoint, **kwargs)
+        else:
+            raise ValueError(
+                f"Unsupported datastore profile type: {type(datastore_profile)}"
+            )
     else:
-        raise ValueError(f"unsupported stream path {stream_path}")
+        kafka_brokers = get_kafka_brokers_from_dict(kwargs)
+        if stream_path.startswith("kafka://") or kafka_brokers:
+            topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
+            return KafkaOutputStream(
+                topic, brokers, kwargs.get("kafka_producer_options")
+            )
+        elif stream_path.startswith("http://") or stream_path.startswith("https://"):
+            return HTTPOutputStream(stream_path=stream_path)
+        elif "://" not in stream_path:
+            return OutputStream(stream_path, **kwargs)
+        elif stream_path.startswith("v3io"):
+            endpoint, stream_path = parse_path(stream_path)
+            endpoint = kwargs.pop("endpoint", None) or endpoint
+            return OutputStream(stream_path, endpoint=endpoint, **kwargs)
+        elif stream_path.startswith("dummy://"):
+            return _DummyStream(**kwargs)
+        else:
+            raise ValueError(f"unsupported stream path {stream_path}")
 class _DummyStream:

mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl