PyPI - mlrun - Versions diffs - 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl - Mend

mlrun 1.6.4rc2py3-none-any.whl → 1.7.0rc20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (291) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +26 -112
mlrun/alerts/__init__.py +15 -0
mlrun/alerts/alert.py +144 -0
mlrun/api/schemas/__init__.py +5 -4
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +46 -257
mlrun/artifacts/dataset.py +11 -192
mlrun/artifacts/manager.py +47 -48
mlrun/artifacts/model.py +31 -159
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +69 -0
mlrun/common/db/sql_session.py +2 -3
mlrun/common/formatters/__init__.py +19 -0
mlrun/common/formatters/artifact.py +21 -0
mlrun/common/formatters/base.py +78 -0
mlrun/common/formatters/function.py +41 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/common/helpers.py +1 -2
mlrun/common/model_monitoring/helpers.py +9 -5
mlrun/{runtimes → common/runtimes}/constants.py +37 -9
mlrun/common/schemas/__init__.py +24 -4
mlrun/common/schemas/alert.py +203 -0
mlrun/common/schemas/api_gateway.py +148 -0
mlrun/common/schemas/artifact.py +18 -8
mlrun/common/schemas/auth.py +11 -5
mlrun/common/schemas/background_task.py +1 -1
mlrun/common/schemas/client_spec.py +4 -1
mlrun/common/schemas/feature_store.py +16 -16
mlrun/common/schemas/frontend_spec.py +8 -7
mlrun/common/schemas/function.py +5 -1
mlrun/common/schemas/hub.py +11 -18
mlrun/common/schemas/memory_reports.py +2 -2
mlrun/common/schemas/model_monitoring/__init__.py +18 -3
mlrun/common/schemas/model_monitoring/constants.py +83 -26
mlrun/common/schemas/model_monitoring/grafana.py +13 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
mlrun/common/schemas/notification.py +4 -4
mlrun/common/schemas/object.py +2 -2
mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
mlrun/common/schemas/pipeline.py +1 -10
mlrun/common/schemas/project.py +24 -23
mlrun/common/schemas/runtime_resource.py +8 -12
mlrun/common/schemas/schedule.py +3 -3
mlrun/common/schemas/tag.py +1 -2
mlrun/common/schemas/workflow.py +2 -2
mlrun/common/types.py +7 -1
mlrun/config.py +54 -17
mlrun/data_types/to_pandas.py +10 -12
mlrun/datastore/__init__.py +5 -8
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +17 -5
mlrun/datastore/base.py +62 -39
mlrun/datastore/datastore.py +28 -9
mlrun/datastore/datastore_profile.py +146 -20
mlrun/datastore/filestore.py +0 -1
mlrun/datastore/google_cloud_storage.py +6 -2
mlrun/datastore/hdfs.py +56 -0
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/redis.py +6 -2
mlrun/datastore/s3.py +9 -0
mlrun/datastore/snowflake_utils.py +43 -0
mlrun/datastore/sources.py +201 -96
mlrun/datastore/spark_utils.py +1 -2
mlrun/datastore/store_resources.py +7 -7
mlrun/datastore/targets.py +358 -104
mlrun/datastore/utils.py +72 -58
mlrun/datastore/v3io.py +5 -1
mlrun/db/base.py +185 -35
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +614 -179
mlrun/db/nopdb.py +210 -26
mlrun/errors.py +12 -1
mlrun/execution.py +41 -24
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +40 -72
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_set.py +76 -55
mlrun/feature_store/feature_vector.py +28 -30
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +16 -11
mlrun/feature_store/retrieval/conversion.py +11 -13
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +9 -3
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +34 -24
mlrun/feature_store/steps.py +37 -34
mlrun/features.py +9 -20
mlrun/frameworks/_common/artifacts_library.py +9 -9
mlrun/frameworks/_common/mlrun_interface.py +5 -5
mlrun/frameworks/_common/model_handler.py +48 -48
mlrun/frameworks/_common/plan.py +2 -3
mlrun/frameworks/_common/producer.py +3 -4
mlrun/frameworks/_common/utils.py +5 -5
mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
mlrun/frameworks/_ml_common/model_handler.py +24 -24
mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/_ml_common/utils.py +4 -4
mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
mlrun/frameworks/huggingface/model_server.py +4 -4
mlrun/frameworks/lgbm/__init__.py +33 -33
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
mlrun/frameworks/lgbm/model_handler.py +10 -10
mlrun/frameworks/lgbm/model_server.py +6 -6
mlrun/frameworks/lgbm/utils.py +5 -5
mlrun/frameworks/onnx/dataset.py +8 -8
mlrun/frameworks/onnx/mlrun_interface.py +3 -3
mlrun/frameworks/onnx/model_handler.py +6 -6
mlrun/frameworks/onnx/model_server.py +7 -7
mlrun/frameworks/parallel_coordinates.py +4 -3
mlrun/frameworks/pytorch/__init__.py +18 -18
mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
mlrun/frameworks/pytorch/model_handler.py +17 -17
mlrun/frameworks/pytorch/model_server.py +7 -7
mlrun/frameworks/sklearn/__init__.py +13 -13
mlrun/frameworks/sklearn/estimator.py +4 -4
mlrun/frameworks/sklearn/metrics_library.py +14 -14
mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
mlrun/frameworks/sklearn/model_handler.py +2 -2
mlrun/frameworks/tf_keras/__init__.py +10 -7
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
mlrun/frameworks/tf_keras/model_handler.py +14 -14
mlrun/frameworks/tf_keras/model_server.py +6 -6
mlrun/frameworks/xgboost/__init__.py +13 -13
mlrun/frameworks/xgboost/model_handler.py +6 -6
mlrun/k8s_utils.py +14 -16
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +16 -15
mlrun/launcher/client.py +8 -6
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +17 -11
mlrun/launcher/remote.py +16 -10
mlrun/lists.py +7 -6
mlrun/model.py +238 -73
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +138 -315
mlrun/model_monitoring/application.py +5 -296
mlrun/model_monitoring/applications/__init__.py +24 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +282 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +104 -84
mlrun/model_monitoring/controller_handler.py +13 -5
mlrun/model_monitoring/db/__init__.py +18 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
mlrun/model_monitoring/db/tsdb/base.py +329 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/features_drift_table.py +134 -106
mlrun/model_monitoring/helpers.py +127 -28
mlrun/model_monitoring/metrics/__init__.py +13 -0
mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/prometheus.py +1 -4
mlrun/model_monitoring/stream_processing.py +62 -231
mlrun/model_monitoring/tracking_policy.py +9 -2
mlrun/model_monitoring/writer.py +152 -124
mlrun/package/__init__.py +6 -6
mlrun/package/context_handler.py +5 -5
mlrun/package/packager.py +7 -7
mlrun/package/packagers/default_packager.py +6 -6
mlrun/package/packagers/numpy_packagers.py +15 -15
mlrun/package/packagers/pandas_packagers.py +5 -5
mlrun/package/packagers/python_standard_library_packagers.py +10 -10
mlrun/package/packagers_manager.py +19 -23
mlrun/package/utils/_formatter.py +6 -6
mlrun/package/utils/_pickler.py +2 -2
mlrun/package/utils/_supported_format.py +4 -4
mlrun/package/utils/log_hint_utils.py +2 -2
mlrun/package/utils/type_hint_utils.py +4 -9
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +24 -203
mlrun/projects/operations.py +35 -21
mlrun/projects/pipelines.py +68 -99
mlrun/projects/project.py +830 -266
mlrun/render.py +3 -11
mlrun/run.py +162 -166
mlrun/runtimes/__init__.py +62 -7
mlrun/runtimes/base.py +39 -32
mlrun/runtimes/daskjob.py +8 -8
mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/kubejob.py +28 -122
mlrun/runtimes/local.py +6 -3
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +9 -10
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
mlrun/runtimes/nuclio/api_gateway.py +709 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +523 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
mlrun/runtimes/pod.py +286 -88
mlrun/runtimes/remotesparkjob.py +2 -2
mlrun/runtimes/sparkjob/spark3job.py +51 -34
mlrun/runtimes/utils.py +7 -75
mlrun/secrets.py +9 -5
mlrun/serving/remote.py +2 -7
mlrun/serving/routers.py +13 -10
mlrun/serving/server.py +22 -26
mlrun/serving/states.py +99 -25
mlrun/serving/utils.py +3 -3
mlrun/serving/v1_serving.py +6 -7
mlrun/serving/v2_serving.py +59 -20
mlrun/track/tracker.py +2 -1
mlrun/track/tracker_manager.py +3 -3
mlrun/track/trackers/mlflow_tracker.py +1 -2
mlrun/utils/async_http.py +5 -7
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +1 -2
mlrun/utils/condition_evaluator.py +3 -3
mlrun/utils/db.py +3 -3
mlrun/utils/helpers.py +183 -197
mlrun/utils/http.py +2 -5
mlrun/utils/logger.py +76 -14
mlrun/utils/notifications/notification/__init__.py +17 -12
mlrun/utils/notifications/notification/base.py +14 -2
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +3 -1
mlrun/utils/notifications/notification/ipython.py +3 -1
mlrun/utils/notifications/notification/slack.py +101 -21
mlrun/utils/notifications/notification/webhook.py +11 -1
mlrun/utils/notifications/notification_pusher.py +155 -30
mlrun/utils/retryer.py +208 -0
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +2 -4
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +2 -6
{mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
mlrun-1.7.0rc20.dist-info/RECORD +353 -0
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/batch.py +0 -1095
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
mlrun/platforms/other.py +0 -306
mlrun-1.6.4rc2.dist-info/RECORD +0 -314
{mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
{mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
{mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/tag.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import typing
 import pydantic
@@ -29,4 +28,4 @@ class TagObjects(pydantic.BaseModel):
     kind: str
     # TODO: Add more types to the list for new supported tagged objects
-    identifiers: typing.List[ArtifactIdentifier]
+    identifiers: list[ArtifactIdentifier]

mlrun/common/schemas/workflow.py CHANGED Viewed

@@ -36,12 +36,12 @@ class WorkflowSpec(pydantic.BaseModel):
 class WorkflowRequest(pydantic.BaseModel):
     spec: typing.Optional[WorkflowSpec] = None
-    arguments: typing.Optional[typing.Dict] = None
+    arguments: typing.Optional[dict] = None
     artifact_path: typing.Optional[str] = None
     source: typing.Optional[str] = None
     run_name: typing.Optional[str] = None
     namespace: typing.Optional[str] = None
-    notifications: typing.Optional[typing.List[Notification]] = None
+    notifications: typing.Optional[list[Notification]] = None
 class WorkflowResponse(pydantic.BaseModel):

mlrun/common/types.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import enum
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
     def __repr__(self):
         return self.value
+# Partial backport from Python 3.11
+# https://docs.python.org/3/library/http.html#http.HTTPMethod
+class HTTPMethod(StrEnum):
+    GET = "GET"
+    POST = "POST"

mlrun/config.py CHANGED Viewed

@@ -17,7 +17,7 @@ Configuration system.
 Configuration can be in either a configuration file specified by
 MLRUN_CONFIG_FILE environment variable or by environment variables.
-Environment variables are in the format "MLRUN_httpdb__port=8080". This will be
+Environment variables are in the format "MLRUN_HTTPDB__PORT=8080". This will be
 mapped to config.httpdb.port. Values should be in JSON format.
 """
@@ -37,6 +37,7 @@ import dotenv
 import semver
 import yaml
+import mlrun.common.constants
 import mlrun.common.schemas
 import mlrun.errors
@@ -87,7 +88,7 @@ default_config = {
     "mpijob_crd_version": "",  # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
     "ipython_widget": True,
     "log_level": "INFO",
-    # log formatter (options: human | json)
+    # log formatter (options: human | human_extended | json)
     "log_formatter": "human",
     "submit_timeout": "180",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
@@ -149,7 +150,6 @@ default_config = {
         "url": "",
     },
     "v3io_framesd": "http://framesd:8080",
-    "datastore": {"async_source_mode": "disabled"},
     # default node selector to be applied to all functions - json string base64 encoded format
     "default_function_node_selector": "e30=",
     # default priority class to be applied to functions running on k8s cluster
@@ -189,6 +189,7 @@ default_config = {
     "background_tasks": {
         # enabled / disabled
         "timeout_mode": "enabled",
+        "function_deletion_batch_size": 10,
         # timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
         "default_timeouts": {
             "operations": {
@@ -197,6 +198,7 @@ default_config = {
                 "run_abortion": "600",
                 "abort_grace_period": "10",
                 "delete_project": "900",
+                "delete_function": "900",
             },
             "runtimes": {"dask": "600"},
         },
@@ -231,6 +233,10 @@ default_config = {
         "databricks": {
             "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
         },
+        "application": {
+            "default_sidecar_internal_port": 8050,
+            "default_authentication_mode": "accessKey",
+        },
     },
     # TODO: function defaults should be moved to the function spec config above
     "function_defaults": {
@@ -241,6 +247,7 @@ default_config = {
             "remote": "mlrun/mlrun",
             "dask": "mlrun/ml-base",
             "mpijob": "mlrun/mlrun",
+            "application": "python:3.9-slim",
         },
         # see enrich_function_preemption_spec for more info,
         # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -325,7 +332,13 @@ default_config = {
                 # optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
                 #
                 # if set to "nil" or "none", nothing would be set
-                "modes": "STRICT_TRANS_TABLES",
+                "modes": (
+                    "STRICT_TRANS_TABLES"
+                    ",NO_ZERO_IN_DATE"
+                    ",NO_ZERO_DATE"
+                    ",ERROR_FOR_DIVISION_BY_ZERO"
+                    ",NO_ENGINE_SUBSTITUTION",
+                )
             },
         },
         "jobs": {
@@ -353,10 +366,12 @@ default_config = {
             #                  is set to ClusterIP
             #  ---------------------------------------------------------------------
             # Note: adding a mode requires special handling on
-            # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
-            # - mlrun.runtimes.function.enrich_function_with_ingress
+            # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
+            # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
             "add_templated_ingress_host_mode": "never",
             "explicit_ack": "enabled",
+            # size of serving spec to move to config maps
+            "serving_spec_env_cutoff": 0,
         },
         "logs": {
             "decode": {
@@ -474,6 +489,14 @@ default_config = {
             # if set to true, will log a warning for trying to use run db functionality while in nop db mode
             "verbose": True,
         },
+        "pagination": {
+            "default_page_size": 20,
+            "pagination_cache": {
+                "interval": 60,
+                "ttl": 3600,
+                "max_size": 10000,
+            },
+        },
     },
     "model_endpoint_monitoring": {
         "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
@@ -485,6 +508,7 @@ default_config = {
             "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
             "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
             "stream": "",
+            "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
         },
         # Offline storage path can be either relative or a full path. This path is used for general offline data
         # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -493,12 +517,14 @@ default_config = {
         # when the user is working in CE environment and has not provided any stream path.
         "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
         "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
-        "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
+        # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
         "store_type": "v3io-nosql",
         "endpoint_store_connection": "",
+        # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
+        "tsdb_connector_type": "v3io-tsdb",
+        "tsdb_connection": "",
     },
     "secret_stores": {
         # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -534,9 +560,10 @@ default_config = {
     "feature_store": {
         "data_prefixes": {
             "default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
-            "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
+            "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
             # "authority" is optional and generalizes [userinfo "@"] host [":" port]
-            "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
+            "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
+            "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
         },
         "default_targets": "parquet,nosql",
         "default_job_image": "mlrun/mlrun",
@@ -672,6 +699,10 @@ default_config = {
         "access_key": "",
     },
     "grafana_url": "",
+    "alerts": {
+        # supported modes: "enabled", "disabled".
+        "mode": "enabled"
+    },
     "auth_with_client_id": {
         "enabled": False,
         "request_timeout": 5,
@@ -937,6 +968,10 @@ class Config:
         self.httpdb.clusterization.chief.url = chief_api_url
         return self.httpdb.clusterization.chief.url
+    @staticmethod
+    def internal_labels():
+        return mlrun.common.constants.MLRunInternalLabels.all()
     @staticmethod
     def get_storage_auto_mount_params():
         auto_mount_params = {}
@@ -1065,7 +1100,8 @@ class Config:
         kind: str = "",
         target: str = "online",
         artifact_path: str = None,
-        application_name: str = None,
+        function_name: str = None,
+        **kwargs,
     ) -> typing.Union[str, list[str]]:
         """Get the full path from the configuration based on the provided project and kind.
@@ -1080,7 +1116,7 @@ class Config:
                                 artifact path instead.
         :param artifact_path:   Optional artifact path that will be used as a relative path. If not provided, the
                                 relative artifact path will be taken from the global MLRun artifact path.
-        :param application_name:    Application name, None for model_monitoring_stream.
+        :param function_name:    Application name, None for model_monitoring_stream.
         :return:                Full configured path for the provided kind. Can be either a single path
                                 or a list of paths in the case of the online model monitoring stream path.
@@ -1092,17 +1128,18 @@ class Config:
             )
             if store_prefix_dict.get(kind):
                 # Target exist in store prefix and has a valid string value
-                return store_prefix_dict[kind].format(project=project)
+                return store_prefix_dict[kind].format(project=project, **kwargs)
             if (
-                application_name
+                function_name
+                and function_name
                 != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
             ):
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
                     project=project,
                     kind=kind
-                    if application_name is None
-                    else f"{kind}-{application_name.lower()}",
+                    if function_name is None
+                    else f"{kind}-{function_name.lower()}",
                 )
             elif kind == "stream":  # return list for mlrun<1.6.3 BC
                 return [
@@ -1148,7 +1185,7 @@ class Config:
             ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
         )
-    def get_s3_storage_options(self) -> typing.Dict[str, typing.Any]:
+    def get_s3_storage_options(self) -> dict[str, typing.Any]:
         """
         Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
         graph uses this method for generating the storage options for S3 parquet target path.

mlrun/data_types/to_pandas.py CHANGED Viewed

@@ -65,10 +65,10 @@ def toPandas(spark_df):
                 msg = (
                     "toPandas attempted Arrow optimization because "
                     "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
-                    "failed by the reason below:\n  %s\n"
+                    f"failed by the reason below:\n  {e}\n"
                     "Attempting non-optimization as "
                     "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
-                    "true." % str(e)
+                    "true."
                 )
                 warnings.warn(msg)
                 use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
                     "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
                     "reached the error below and will not continue because automatic fallback "
                     "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
-                    "false.\n  %s" % str(e)
+                    f"false.\n  {e}"
                 )
                 warnings.warn(msg)
                 raise
@@ -94,9 +94,7 @@ def toPandas(spark_df):
                 )
                 # Rename columns to avoid duplicated column names.
-                tmp_column_names = [
-                    "col_{}".format(i) for i in range(len(spark_df.columns))
-                ]
+                tmp_column_names = [f"col_{i}" for i in range(len(spark_df.columns))]
                 self_destruct = spark_df.sql_ctx._conf.arrowPySparkSelfDestructEnabled()
                 batches = spark_df.toDF(*tmp_column_names)._collect_as_arrow(
                     split_batches=self_destruct
@@ -146,7 +144,7 @@ def toPandas(spark_df):
                     "reached the error below and can not continue. Note that "
                     "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
                     "effect on failures in the middle of "
-                    "computation.\n  %s" % str(e)
+                    f"computation.\n  {e}"
                 )
                 warnings.warn(msg)
                 raise
@@ -156,10 +154,10 @@ def toPandas(spark_df):
     column_counter = Counter(spark_df.columns)
     dtype = [None] * len(spark_df.schema)
-    for fieldIdx, field in enumerate(spark_df.schema):
+    for field_idx, field in enumerate(spark_df.schema):
         # For duplicate column name, we use `iloc` to access it.
         if column_counter[field.name] > 1:
-            pandas_col = pdf.iloc[:, fieldIdx]
+            pandas_col = pdf.iloc[:, field_idx]
         else:
             pandas_col = pdf[field.name]
@@ -173,12 +171,12 @@ def toPandas(spark_df):
             and field.nullable
             and pandas_col.isnull().any()
         ):
-            dtype[fieldIdx] = pandas_type
+            dtype[field_idx] = pandas_type
         # Ensure we fall back to nullable numpy types, even when whole column is null:
         if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
-            dtype[fieldIdx] = np.float64
+            dtype[field_idx] = np.float64
         if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
-            dtype[fieldIdx] = object
+            dtype[field_idx] = object
     df = pd.DataFrame()
     for index, t in enumerate(dtype):

mlrun/datastore/__init__.py CHANGED Viewed

@@ -64,7 +64,7 @@ from .store_resources import (
     parse_store_uri,
 )
 from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
-from .utils import parse_kafka_url
+from .utils import get_kafka_brokers_from_dict, parse_kafka_url
 store_manager = StoreManager()
@@ -107,13 +107,10 @@ def get_stream_pusher(stream_path: str, **kwargs):
     :param stream_path:        path/url of stream
     """
-    if stream_path.startswith("kafka://") or "kafka_bootstrap_servers" in kwargs:
-        topic, bootstrap_servers = parse_kafka_url(
-            stream_path, kwargs.get("kafka_bootstrap_servers")
-        )
-        return KafkaOutputStream(
-            topic, bootstrap_servers, kwargs.get("kafka_producer_options")
-        )
+    kafka_brokers = get_kafka_brokers_from_dict(kwargs)
+    if stream_path.startswith("kafka://") or kafka_brokers:
+        topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
+        return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
     elif stream_path.startswith("http://") or stream_path.startswith("https://"):
         return HTTPOutputStream(stream_path=stream_path)
     elif "://" not in stream_path:

mlrun/datastore/alibaba_oss.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import urlparse
+import oss2
+from fsspec.registry import get_filesystem_class
+import mlrun.errors
+from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
+class OSSStore(DataStore):
+    using_bucket = True
+    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+        super().__init__(parent, name, schema, endpoint, secrets)
+        # will be used in case user asks to assume a role and work through fsspec
+        access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
+        secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
+        endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
+        if access_key_id and secret_key and endpoint_url:
+            self.auth = oss2.Auth(access_key_id, secret_key)
+            self.endpoint_url = endpoint_url
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
+            )
+    @property
+    def filesystem(self):
+        """return fsspec file system object, if supported"""
+        if self._filesystem:
+            return self._filesystem
+        try:
+            import ossfs  # noqa
+        except ImportError as exc:
+            raise ImportError("ALIBABA ossfs not installed") from exc
+        filesystem_class = get_filesystem_class(protocol=self.kind)
+        self._filesystem = makeDatastoreSchemaSanitizer(
+            filesystem_class,
+            using_bucket=self.using_bucket,
+            **self.get_storage_options(),
+        )
+        return self._filesystem
+    def get_storage_options(self):
+        res = dict(
+            endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
+            key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
+            secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
+        )
+        return self._sanitize_storage_options(res)
+    def get_bucket_and_key(self, key):
+        path = self._join(key)[1:]
+        return self.endpoint, path
+    def upload(self, key, src_path):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.put_object(key, open(src_path, "rb"))
+    def get(self, key, size=None, offset=0):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        if size or offset:
+            return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
+        return oss.get_object(key).read()
+    def put(self, key, data, append=False):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.put_object(key, data)
+    def stat(self, key):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        obj = oss.get_object_meta(key)
+        size = obj.content_length
+        modified = datetime.fromtimestamp(obj.last_modified)
+        return FileStats(size, time.mktime(modified.timetuple()))
+    def listdir(self, key):
+        remote_path = self._convert_key_to_remote_path(key)
+        if self.filesystem.isfile(remote_path):
+            return key
+        remote_path = f"{remote_path}/**"
+        files = self.filesystem.glob(remote_path)
+        key_length = len(key)
+        files = [
+            f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
+        ]
+        return files
+    def delete(self, key):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.delete_object(key)
+    def _convert_key_to_remote_path(self, key):
+        key = key.strip("/")
+        schema = urlparse(key).scheme
+        #  if called without passing dataitem - like in fset.purge_targets,
+        #  key will include schema.
+        if not schema:
+            key = Path(self.endpoint, key).as_posix()
+        return key
+    @staticmethod
+    def get_range(size, offset):
+        if size:
+            return [offset, size]
+        return [offset, None]

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
                     st[key] = parsed_value
         account_name = st.get("account_name")
-        if not account_name:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "Property 'account_name' is absent both in storage settings and connection string"
-            )
         if primary_url:
             if primary_url.startswith("http://"):
                 primary_url = primary_url[len("http://") :]
             if primary_url.startswith("https://"):
                 primary_url = primary_url[len("https://") :]
             host = primary_url
-        else:
+        elif account_name:
             host = f"{account_name}.{service}.core.windows.net"
+        else:
+            return res
         if "account_key" in st:
             res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
@@ -199,3 +198,16 @@ class AzureBlobStore(DataStore):
             )
             res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
         return res
+    @property
+    def spark_url(self):
+        spark_options = self.get_spark_options()
+        url = f"wasbs://{self.endpoint}"
+        prefix = "spark.hadoop.fs.azure.account.key."
+        if spark_options:
+            for key in spark_options:
+                if key.startswith(prefix):
+                    account_key = key[len(prefix) :]
+                    url += f"@{account_key}"
+                    break
+        return url

mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.4rc2py3-none-any.whl → 1.7.0rc20py3-none-any.whl