mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/config.py
CHANGED
|
@@ -17,7 +17,7 @@ Configuration system.
|
|
|
17
17
|
Configuration can be in either a configuration file specified by
|
|
18
18
|
MLRUN_CONFIG_FILE environment variable or by environment variables.
|
|
19
19
|
|
|
20
|
-
Environment variables are in the format "
|
|
20
|
+
Environment variables are in the format "MLRUN_HTTPDB__PORT=8080". This will be
|
|
21
21
|
mapped to config.httpdb.port. Values should be in JSON format.
|
|
22
22
|
"""
|
|
23
23
|
|
|
@@ -27,6 +27,7 @@ import copy
|
|
|
27
27
|
import json
|
|
28
28
|
import os
|
|
29
29
|
import typing
|
|
30
|
+
import warnings
|
|
30
31
|
from collections.abc import Mapping
|
|
31
32
|
from datetime import timedelta
|
|
32
33
|
from distutils.util import strtobool
|
|
@@ -35,8 +36,10 @@ from threading import Lock
|
|
|
35
36
|
|
|
36
37
|
import dotenv
|
|
37
38
|
import semver
|
|
39
|
+
import urllib3.exceptions
|
|
38
40
|
import yaml
|
|
39
41
|
|
|
42
|
+
import mlrun.common.constants
|
|
40
43
|
import mlrun.common.schemas
|
|
41
44
|
import mlrun.errors
|
|
42
45
|
|
|
@@ -51,6 +54,11 @@ default_config = {
|
|
|
51
54
|
"kubernetes": {
|
|
52
55
|
"kubeconfig_path": "", # local path to kubeconfig file (for development purposes),
|
|
53
56
|
# empty by default as the API already running inside k8s cluster
|
|
57
|
+
"pagination": {
|
|
58
|
+
# pagination config for interacting with k8s API
|
|
59
|
+
"list_pods_limit": 200,
|
|
60
|
+
"list_crd_objects_limit": 200,
|
|
61
|
+
},
|
|
54
62
|
},
|
|
55
63
|
"dbpath": "", # db/api url
|
|
56
64
|
# url to nuclio dashboard api (can be with user & token, e.g. https://username:password@dashboard-url.com)
|
|
@@ -63,11 +71,15 @@ default_config = {
|
|
|
63
71
|
"api_base_version": "v1",
|
|
64
72
|
"version": "", # will be set to current version
|
|
65
73
|
"images_tag": "", # tag to use with mlrun images e.g. mlrun/mlrun (defaults to version)
|
|
66
|
-
|
|
74
|
+
# registry to use with mlrun images that start with "mlrun/" e.g. quay.io/ (defaults to empty, for dockerhub)
|
|
75
|
+
"images_registry": "",
|
|
76
|
+
# registry to use with non-mlrun images (don't start with "mlrun/") specified in 'images_to_enrich_registry'
|
|
77
|
+
# defaults to empty, for dockerhub
|
|
78
|
+
"vendor_images_registry": "",
|
|
67
79
|
# comma separated list of images that are in the specified images_registry, and therefore will be enriched with this
|
|
68
80
|
# registry when used. default to mlrun/* which means any image which is of the mlrun repository (mlrun/mlrun,
|
|
69
81
|
# mlrun/ml-base, etc...)
|
|
70
|
-
"images_to_enrich_registry": "^mlrun
|
|
82
|
+
"images_to_enrich_registry": "^mlrun/*,python:3.9",
|
|
71
83
|
"kfp_url": "",
|
|
72
84
|
"kfp_ttl": "14400", # KFP ttl in sec, after that completed PODs will be deleted
|
|
73
85
|
"kfp_image": "mlrun/mlrun", # image to use for KFP runner (defaults to mlrun/mlrun)
|
|
@@ -87,7 +99,7 @@ default_config = {
|
|
|
87
99
|
"mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
|
|
88
100
|
"ipython_widget": True,
|
|
89
101
|
"log_level": "INFO",
|
|
90
|
-
# log formatter (options: human | json)
|
|
102
|
+
# log formatter (options: human | human_extended | json)
|
|
91
103
|
"log_formatter": "human",
|
|
92
104
|
"submit_timeout": "180", # timeout when submitting a new k8s resource
|
|
93
105
|
# runtimes cleanup interval in seconds
|
|
@@ -103,7 +115,12 @@ default_config = {
|
|
|
103
115
|
# max number of parallel abort run jobs in runs monitoring
|
|
104
116
|
"concurrent_abort_stale_runs_workers": 10,
|
|
105
117
|
"list_runs_time_period_in_days": 7, # days
|
|
106
|
-
}
|
|
118
|
+
},
|
|
119
|
+
"projects": {
|
|
120
|
+
"summaries": {
|
|
121
|
+
"cache_interval": "30",
|
|
122
|
+
},
|
|
123
|
+
},
|
|
107
124
|
},
|
|
108
125
|
"crud": {
|
|
109
126
|
"runs": {
|
|
@@ -137,6 +154,11 @@ default_config = {
|
|
|
137
154
|
"datasets": {
|
|
138
155
|
"max_preview_columns": 100,
|
|
139
156
|
},
|
|
157
|
+
"limits": {
|
|
158
|
+
"max_chunk_size": 1024 * 1024 * 1, # 1MB
|
|
159
|
+
"max_preview_size": 1024 * 1024 * 10, # 10MB
|
|
160
|
+
"max_download_size": 1024 * 1024 * 100, # 100MB
|
|
161
|
+
},
|
|
140
162
|
},
|
|
141
163
|
# FIXME: Adding these defaults here so we won't need to patch the "installing component" (provazio-controller) to
|
|
142
164
|
# configure this values on field systems, for newer system this will be configured correctly
|
|
@@ -149,7 +171,6 @@ default_config = {
|
|
|
149
171
|
"url": "",
|
|
150
172
|
},
|
|
151
173
|
"v3io_framesd": "http://framesd:8080",
|
|
152
|
-
"datastore": {"async_source_mode": "disabled"},
|
|
153
174
|
# default node selector to be applied to all functions - json string base64 encoded format
|
|
154
175
|
"default_function_node_selector": "e30=",
|
|
155
176
|
# default priority class to be applied to functions running on k8s cluster
|
|
@@ -189,6 +210,7 @@ default_config = {
|
|
|
189
210
|
"background_tasks": {
|
|
190
211
|
# enabled / disabled
|
|
191
212
|
"timeout_mode": "enabled",
|
|
213
|
+
"function_deletion_batch_size": 10,
|
|
192
214
|
# timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
|
|
193
215
|
"default_timeouts": {
|
|
194
216
|
"operations": {
|
|
@@ -197,6 +219,7 @@ default_config = {
|
|
|
197
219
|
"run_abortion": "600",
|
|
198
220
|
"abort_grace_period": "10",
|
|
199
221
|
"delete_project": "900",
|
|
222
|
+
"delete_function": "900",
|
|
200
223
|
},
|
|
201
224
|
"runtimes": {"dask": "600"},
|
|
202
225
|
},
|
|
@@ -227,10 +250,17 @@ default_config = {
|
|
|
227
250
|
"executing": "24h",
|
|
228
251
|
}
|
|
229
252
|
},
|
|
253
|
+
# When the module is reloaded, the maximum depth recursion configuration for the recursive reload
|
|
254
|
+
# function is used to prevent infinite loop
|
|
255
|
+
"reload_max_recursion_depth": 100,
|
|
230
256
|
},
|
|
231
257
|
"databricks": {
|
|
232
258
|
"artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
|
|
233
259
|
},
|
|
260
|
+
"application": {
|
|
261
|
+
"default_sidecar_internal_port": 8050,
|
|
262
|
+
"default_authentication_mode": mlrun.common.schemas.APIGatewayAuthenticationMode.none,
|
|
263
|
+
},
|
|
234
264
|
},
|
|
235
265
|
# TODO: function defaults should be moved to the function spec config above
|
|
236
266
|
"function_defaults": {
|
|
@@ -241,6 +271,7 @@ default_config = {
|
|
|
241
271
|
"remote": "mlrun/mlrun",
|
|
242
272
|
"dask": "mlrun/ml-base",
|
|
243
273
|
"mpijob": "mlrun/mlrun",
|
|
274
|
+
"application": "python:3.9",
|
|
244
275
|
},
|
|
245
276
|
# see enrich_function_preemption_spec for more info,
|
|
246
277
|
# and mlrun.common.schemas.function.PreemptionModes for available options
|
|
@@ -255,6 +286,16 @@ default_config = {
|
|
|
255
286
|
"url": "",
|
|
256
287
|
"service": "mlrun-api-chief",
|
|
257
288
|
"port": 8080,
|
|
289
|
+
"feature_gates": {
|
|
290
|
+
"scheduler": "enabled",
|
|
291
|
+
"project_sync": "enabled",
|
|
292
|
+
"cleanup": "enabled",
|
|
293
|
+
"runs_monitoring": "enabled",
|
|
294
|
+
"pagination_cache": "enabled",
|
|
295
|
+
"project_summaries": "enabled",
|
|
296
|
+
"start_logs": "enabled",
|
|
297
|
+
"stop_logs": "enabled",
|
|
298
|
+
},
|
|
258
299
|
},
|
|
259
300
|
"worker": {
|
|
260
301
|
"sync_with_chief": {
|
|
@@ -292,7 +333,7 @@ default_config = {
|
|
|
292
333
|
"http": {
|
|
293
334
|
# when True, the client will verify the server's TLS
|
|
294
335
|
# set to False for backwards compatibility.
|
|
295
|
-
"verify":
|
|
336
|
+
"verify": True,
|
|
296
337
|
},
|
|
297
338
|
"db": {
|
|
298
339
|
"commit_retry_timeout": 30,
|
|
@@ -325,7 +366,13 @@ default_config = {
|
|
|
325
366
|
# optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
|
|
326
367
|
#
|
|
327
368
|
# if set to "nil" or "none", nothing would be set
|
|
328
|
-
"modes":
|
|
369
|
+
"modes": (
|
|
370
|
+
"STRICT_TRANS_TABLES"
|
|
371
|
+
",NO_ZERO_IN_DATE"
|
|
372
|
+
",NO_ZERO_DATE"
|
|
373
|
+
",ERROR_FOR_DIVISION_BY_ZERO"
|
|
374
|
+
",NO_ENGINE_SUBSTITUTION",
|
|
375
|
+
)
|
|
329
376
|
},
|
|
330
377
|
},
|
|
331
378
|
"jobs": {
|
|
@@ -353,10 +400,12 @@ default_config = {
|
|
|
353
400
|
# is set to ClusterIP
|
|
354
401
|
# ---------------------------------------------------------------------
|
|
355
402
|
# Note: adding a mode requires special handling on
|
|
356
|
-
# - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
|
|
357
|
-
# - mlrun.runtimes.function.enrich_function_with_ingress
|
|
403
|
+
# - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
|
|
404
|
+
# - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
|
|
358
405
|
"add_templated_ingress_host_mode": "never",
|
|
359
406
|
"explicit_ack": "enabled",
|
|
407
|
+
# size of serving spec to move to config maps
|
|
408
|
+
"serving_spec_env_cutoff": 0,
|
|
360
409
|
},
|
|
361
410
|
"logs": {
|
|
362
411
|
"decode": {
|
|
@@ -415,7 +464,6 @@ default_config = {
|
|
|
415
464
|
"followers": "",
|
|
416
465
|
# This is used as the interval for the sync loop both when mlrun is leader and follower
|
|
417
466
|
"periodic_sync_interval": "1 minute",
|
|
418
|
-
"counters_cache_ttl": "2 minutes",
|
|
419
467
|
"project_owners_cache_ttl": "30 seconds",
|
|
420
468
|
# access key to be used when the leader is iguazio and polling is done from it
|
|
421
469
|
"iguazio_access_key": "",
|
|
@@ -444,10 +492,10 @@ default_config = {
|
|
|
444
492
|
# pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
|
|
445
493
|
# git+https://github.com/mlrun/mlrun@development. by default uses the version
|
|
446
494
|
"mlrun_version_specifier": "",
|
|
447
|
-
"kaniko_image": "gcr.io/kaniko-project/executor:v1.
|
|
495
|
+
"kaniko_image": "gcr.io/kaniko-project/executor:v1.23.2", # kaniko builder image
|
|
448
496
|
"kaniko_init_container_image": "alpine:3.18",
|
|
449
497
|
# image for kaniko init container when docker registry is ECR
|
|
450
|
-
"kaniko_aws_cli_image": "amazon/aws-cli:2.
|
|
498
|
+
"kaniko_aws_cli_image": "amazon/aws-cli:2.17.16",
|
|
451
499
|
# kaniko sometimes fails to get filesystem from image, this is a workaround to retry the process
|
|
452
500
|
# a known issue in Kaniko - https://github.com/GoogleContainerTools/kaniko/issues/1717
|
|
453
501
|
"kaniko_image_fs_extraction_retries": "3",
|
|
@@ -474,17 +522,24 @@ default_config = {
|
|
|
474
522
|
# if set to true, will log a warning for trying to use run db functionality while in nop db mode
|
|
475
523
|
"verbose": True,
|
|
476
524
|
},
|
|
525
|
+
"pagination": {
|
|
526
|
+
"default_page_size": 20,
|
|
527
|
+
"pagination_cache": {
|
|
528
|
+
"interval": 60,
|
|
529
|
+
"ttl": 3600,
|
|
530
|
+
"max_size": 10000,
|
|
531
|
+
},
|
|
532
|
+
},
|
|
477
533
|
},
|
|
478
534
|
"model_endpoint_monitoring": {
|
|
479
535
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
480
536
|
"application_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
481
|
-
"drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
|
|
482
537
|
# Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
|
|
483
538
|
# stream, and endpoints.
|
|
484
539
|
"store_prefixes": {
|
|
485
540
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
486
541
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
487
|
-
"
|
|
542
|
+
"monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
|
|
488
543
|
},
|
|
489
544
|
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
490
545
|
# storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
|
|
@@ -493,12 +548,14 @@ default_config = {
|
|
|
493
548
|
# when the user is working in CE environment and has not provided any stream path.
|
|
494
549
|
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
|
|
495
550
|
"default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
|
|
496
|
-
"batch_processing_function_branch": "master",
|
|
497
551
|
"parquet_batching_max_events": 10_000,
|
|
498
552
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
499
|
-
# See mlrun.model_monitoring.stores.
|
|
500
|
-
"store_type": "v3io-nosql",
|
|
553
|
+
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
501
554
|
"endpoint_store_connection": "",
|
|
555
|
+
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
556
|
+
"tsdb_connection": "",
|
|
557
|
+
# See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
|
|
558
|
+
"stream_connection": "",
|
|
502
559
|
},
|
|
503
560
|
"secret_stores": {
|
|
504
561
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -534,9 +591,10 @@ default_config = {
|
|
|
534
591
|
"feature_store": {
|
|
535
592
|
"data_prefixes": {
|
|
536
593
|
"default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
|
|
537
|
-
"nosql": "v3io:///projects/{project}/FeatureStore/{name}/
|
|
594
|
+
"nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
|
|
538
595
|
# "authority" is optional and generalizes [userinfo "@"] host [":" port]
|
|
539
|
-
"redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/
|
|
596
|
+
"redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
|
|
597
|
+
"dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
|
|
540
598
|
},
|
|
541
599
|
"default_targets": "parquet,nosql",
|
|
542
600
|
"default_job_image": "mlrun/mlrun",
|
|
@@ -630,7 +688,9 @@ default_config = {
|
|
|
630
688
|
"failed_runs_grace_period": 3600,
|
|
631
689
|
"verbose": True,
|
|
632
690
|
# the number of workers which will be used to trigger the start log collection
|
|
633
|
-
"concurrent_start_logs_workers":
|
|
691
|
+
"concurrent_start_logs_workers": 50,
|
|
692
|
+
# the number of runs for which to start logs on api startup
|
|
693
|
+
"start_logs_startup_run_limit": 150,
|
|
634
694
|
# the time in hours in which to start log collection from.
|
|
635
695
|
# after upgrade, we might have runs which completed in the mean time or still in non-terminal state and
|
|
636
696
|
# we want to collect their logs in the new log collection method (sidecar)
|
|
@@ -672,6 +732,15 @@ default_config = {
|
|
|
672
732
|
"access_key": "",
|
|
673
733
|
},
|
|
674
734
|
"grafana_url": "",
|
|
735
|
+
"alerts": {
|
|
736
|
+
# supported modes: "enabled", "disabled".
|
|
737
|
+
"mode": "disabled",
|
|
738
|
+
# maximum number of alerts we allow to be configured.
|
|
739
|
+
# user will get an error when exceeding this
|
|
740
|
+
"max_allowed": 10000,
|
|
741
|
+
# maximum allowed value for count in criteria field inside AlertConfig
|
|
742
|
+
"max_criteria_count": 100,
|
|
743
|
+
},
|
|
675
744
|
"auth_with_client_id": {
|
|
676
745
|
"enabled": False,
|
|
677
746
|
"request_timeout": 5,
|
|
@@ -727,7 +796,21 @@ class Config:
|
|
|
727
796
|
for key, value in cfg.items():
|
|
728
797
|
if hasattr(self, key):
|
|
729
798
|
if isinstance(value, dict):
|
|
730
|
-
|
|
799
|
+
# ignore the `skip_errors` flag here
|
|
800
|
+
# if the key does not align with what mlrun config expects it is a user
|
|
801
|
+
# input error that can lead to unexpected behavior.
|
|
802
|
+
# raise the exception to ensure configuration is loaded correctly and do not
|
|
803
|
+
# ignore any errors.
|
|
804
|
+
config_value = getattr(self, key)
|
|
805
|
+
try:
|
|
806
|
+
config_value.update(value)
|
|
807
|
+
except AttributeError as exc:
|
|
808
|
+
if not isinstance(config_value, (dict, Config)):
|
|
809
|
+
raise ValueError(
|
|
810
|
+
f"Can not update `{key}` config. "
|
|
811
|
+
f"Expected a configuration but received {type(value)}"
|
|
812
|
+
) from exc
|
|
813
|
+
raise exc
|
|
731
814
|
else:
|
|
732
815
|
try:
|
|
733
816
|
setattr(self, key, value)
|
|
@@ -775,6 +858,7 @@ class Config:
|
|
|
775
858
|
):
|
|
776
859
|
"""
|
|
777
860
|
decodes and loads the config attribute to expected type
|
|
861
|
+
|
|
778
862
|
:param attribute_path: the path in the default_config e.g. preemptible_nodes.node_selector
|
|
779
863
|
:param expected_type: the object type valid values are : `dict`, `list` etc...
|
|
780
864
|
:return: the expected type instance
|
|
@@ -798,7 +882,7 @@ class Config:
|
|
|
798
882
|
f"Unable to decode {attribute_path}"
|
|
799
883
|
)
|
|
800
884
|
parsed_attribute_value = json.loads(decoded_attribute_value)
|
|
801
|
-
if
|
|
885
|
+
if not isinstance(parsed_attribute_value, expected_type):
|
|
802
886
|
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
803
887
|
f"Expected type {expected_type}, got {type(parsed_attribute_value)}"
|
|
804
888
|
)
|
|
@@ -900,24 +984,6 @@ class Config:
|
|
|
900
984
|
f"is not allowed for iguazio version: {igz_version} < 3.5.1"
|
|
901
985
|
)
|
|
902
986
|
|
|
903
|
-
def resolve_kfp_url(self, namespace=None):
|
|
904
|
-
if config.kfp_url:
|
|
905
|
-
return config.kfp_url
|
|
906
|
-
igz_version = self.get_parsed_igz_version()
|
|
907
|
-
# TODO: When Iguazio 3.4 will deprecate we can remove this line
|
|
908
|
-
if igz_version and igz_version <= semver.VersionInfo.parse("3.6.0-b1"):
|
|
909
|
-
if namespace is None:
|
|
910
|
-
if not config.namespace:
|
|
911
|
-
raise mlrun.errors.MLRunNotFoundError(
|
|
912
|
-
"For KubeFlow Pipelines to function, a namespace must be configured"
|
|
913
|
-
)
|
|
914
|
-
namespace = config.namespace
|
|
915
|
-
# When instead of host we provided namespace we tackled this issue
|
|
916
|
-
# https://github.com/canonical/bundle-kubeflow/issues/412
|
|
917
|
-
# TODO: When we'll move to kfp 1.4.0 (server side) it should be resolved
|
|
918
|
-
return f"http://ml-pipeline.{namespace}.svc.cluster.local:8888"
|
|
919
|
-
return None
|
|
920
|
-
|
|
921
987
|
def resolve_chief_api_url(self) -> str:
|
|
922
988
|
if self.httpdb.clusterization.chief.url:
|
|
923
989
|
return self.httpdb.clusterization.chief.url
|
|
@@ -937,6 +1003,10 @@ class Config:
|
|
|
937
1003
|
self.httpdb.clusterization.chief.url = chief_api_url
|
|
938
1004
|
return self.httpdb.clusterization.chief.url
|
|
939
1005
|
|
|
1006
|
+
@staticmethod
|
|
1007
|
+
def internal_labels():
|
|
1008
|
+
return mlrun.common.constants.MLRunInternalLabels.all()
|
|
1009
|
+
|
|
940
1010
|
@staticmethod
|
|
941
1011
|
def get_storage_auto_mount_params():
|
|
942
1012
|
auto_mount_params = {}
|
|
@@ -1004,6 +1074,14 @@ class Config:
|
|
|
1004
1074
|
resource_requirement.pop(gpu)
|
|
1005
1075
|
return resource_requirement
|
|
1006
1076
|
|
|
1077
|
+
def force_api_gateway_ssl_redirect(self):
|
|
1078
|
+
"""
|
|
1079
|
+
Get the default value for the ssl_redirect configuration.
|
|
1080
|
+
In Iguazio we always want to redirect to HTTPS, in other cases we don't.
|
|
1081
|
+
:return: True if we should redirect to HTTPS, False otherwise.
|
|
1082
|
+
"""
|
|
1083
|
+
return self.is_running_on_iguazio()
|
|
1084
|
+
|
|
1007
1085
|
def to_dict(self):
|
|
1008
1086
|
return copy.deepcopy(self._cfg)
|
|
1009
1087
|
|
|
@@ -1036,6 +1114,9 @@ class Config:
|
|
|
1036
1114
|
# importing here to avoid circular dependency
|
|
1037
1115
|
import mlrun.db
|
|
1038
1116
|
|
|
1117
|
+
# It ensures that SSL verification is set before establishing a connection
|
|
1118
|
+
_configure_ssl_verification(self.httpdb.http.verify)
|
|
1119
|
+
|
|
1039
1120
|
# when dbpath is set we want to connect to it which will sync configuration from it to the client
|
|
1040
1121
|
mlrun.db.get_run_db(value, force_reconnect=True)
|
|
1041
1122
|
|
|
@@ -1064,9 +1145,10 @@ class Config:
|
|
|
1064
1145
|
project: str = "",
|
|
1065
1146
|
kind: str = "",
|
|
1066
1147
|
target: str = "online",
|
|
1067
|
-
artifact_path: str = None,
|
|
1068
|
-
|
|
1069
|
-
|
|
1148
|
+
artifact_path: typing.Optional[str] = None,
|
|
1149
|
+
function_name: typing.Optional[str] = None,
|
|
1150
|
+
**kwargs,
|
|
1151
|
+
) -> str:
|
|
1070
1152
|
"""Get the full path from the configuration based on the provided project and kind.
|
|
1071
1153
|
|
|
1072
1154
|
:param project: Project name.
|
|
@@ -1080,10 +1162,9 @@ class Config:
|
|
|
1080
1162
|
artifact path instead.
|
|
1081
1163
|
:param artifact_path: Optional artifact path that will be used as a relative path. If not provided, the
|
|
1082
1164
|
relative artifact path will be taken from the global MLRun artifact path.
|
|
1083
|
-
:param
|
|
1165
|
+
:param function_name: Application name, None for model_monitoring_stream.
|
|
1084
1166
|
|
|
1085
|
-
:return: Full configured path for the provided kind.
|
|
1086
|
-
or a list of paths in the case of the online model monitoring stream path.
|
|
1167
|
+
:return: Full configured path for the provided kind.
|
|
1087
1168
|
"""
|
|
1088
1169
|
|
|
1089
1170
|
if target != "offline":
|
|
@@ -1092,29 +1173,23 @@ class Config:
|
|
|
1092
1173
|
)
|
|
1093
1174
|
if store_prefix_dict.get(kind):
|
|
1094
1175
|
# Target exist in store prefix and has a valid string value
|
|
1095
|
-
return store_prefix_dict[kind].format(project=project)
|
|
1096
|
-
|
|
1176
|
+
return store_prefix_dict[kind].format(project=project, **kwargs)
|
|
1097
1177
|
if (
|
|
1098
|
-
|
|
1178
|
+
function_name
|
|
1179
|
+
and function_name
|
|
1099
1180
|
!= mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
|
|
1100
1181
|
):
|
|
1101
1182
|
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
1102
1183
|
project=project,
|
|
1103
1184
|
kind=kind
|
|
1104
|
-
if
|
|
1105
|
-
else f"{kind}-{
|
|
1185
|
+
if function_name is None
|
|
1186
|
+
else f"{kind}-{function_name.lower()}",
|
|
1187
|
+
)
|
|
1188
|
+
elif kind == "stream":
|
|
1189
|
+
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
1190
|
+
project=project,
|
|
1191
|
+
kind=kind,
|
|
1106
1192
|
)
|
|
1107
|
-
elif kind == "stream": # return list for mlrun<1.6.3 BC
|
|
1108
|
-
return [
|
|
1109
|
-
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1110
|
-
project=project,
|
|
1111
|
-
kind=kind,
|
|
1112
|
-
), # old stream uri (pipelines) for BC ML-6043
|
|
1113
|
-
mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
1114
|
-
project=project,
|
|
1115
|
-
kind=kind,
|
|
1116
|
-
), # new stream uri (projects)
|
|
1117
|
-
]
|
|
1118
1193
|
else:
|
|
1119
1194
|
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
|
|
1120
1195
|
project=project,
|
|
@@ -1148,7 +1223,7 @@ class Config:
|
|
|
1148
1223
|
ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
|
|
1149
1224
|
)
|
|
1150
1225
|
|
|
1151
|
-
def get_s3_storage_options(self) ->
|
|
1226
|
+
def get_s3_storage_options(self) -> dict[str, typing.Any]:
|
|
1152
1227
|
"""
|
|
1153
1228
|
Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
|
|
1154
1229
|
graph uses this method for generating the storage options for S3 parquet target path.
|
|
@@ -1177,12 +1252,11 @@ class Config:
|
|
|
1177
1252
|
|
|
1178
1253
|
return storage_options
|
|
1179
1254
|
|
|
1180
|
-
def
|
|
1181
|
-
if not version:
|
|
1182
|
-
version = self.nuclio_version
|
|
1255
|
+
def is_explicit_ack_enabled(self) -> bool:
|
|
1183
1256
|
return self.httpdb.nuclio.explicit_ack == "enabled" and (
|
|
1184
|
-
not
|
|
1185
|
-
or semver.VersionInfo.parse(
|
|
1257
|
+
not self.nuclio_version
|
|
1258
|
+
or semver.VersionInfo.parse(self.nuclio_version)
|
|
1259
|
+
>= semver.VersionInfo.parse("1.12.10")
|
|
1186
1260
|
)
|
|
1187
1261
|
|
|
1188
1262
|
|
|
@@ -1232,6 +1306,7 @@ def _do_populate(env=None, skip_errors=False):
|
|
|
1232
1306
|
if data:
|
|
1233
1307
|
config.update(data, skip_errors=skip_errors)
|
|
1234
1308
|
|
|
1309
|
+
_configure_ssl_verification(config.httpdb.http.verify)
|
|
1235
1310
|
_validate_config(config)
|
|
1236
1311
|
|
|
1237
1312
|
|
|
@@ -1291,6 +1366,16 @@ def _convert_str(value, typ):
|
|
|
1291
1366
|
return typ(value)
|
|
1292
1367
|
|
|
1293
1368
|
|
|
1369
|
+
def _configure_ssl_verification(verify_ssl: bool) -> None:
|
|
1370
|
+
"""Configure SSL verification warnings based on the setting."""
|
|
1371
|
+
if not verify_ssl:
|
|
1372
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
1373
|
+
else:
|
|
1374
|
+
# If the user changes the `verify` setting to `True` at runtime using `mlrun.set_env_from_file` after
|
|
1375
|
+
# importing `mlrun`, we need to reload the `mlrun` configuration and enable this warning.
|
|
1376
|
+
warnings.simplefilter("default", urllib3.exceptions.InsecureRequestWarning)
|
|
1377
|
+
|
|
1378
|
+
|
|
1294
1379
|
def read_env(env=None, prefix=env_prefix):
|
|
1295
1380
|
"""Read configuration from environment"""
|
|
1296
1381
|
env = os.environ if env is None else env
|
mlrun/data_types/data_types.py
CHANGED
|
@@ -70,6 +70,11 @@ def pa_type_to_value_type(type_):
|
|
|
70
70
|
if isinstance(type_, TimestampType):
|
|
71
71
|
return ValueType.DATETIME
|
|
72
72
|
|
|
73
|
+
# pandas category type translates to pyarrow DictionaryType
|
|
74
|
+
# we need to unpack the value type (ML-7868)
|
|
75
|
+
if isinstance(type_, pyarrow.DictionaryType):
|
|
76
|
+
type_ = type_.value_type
|
|
77
|
+
|
|
73
78
|
type_map = {
|
|
74
79
|
pyarrow.bool_(): ValueType.BOOL,
|
|
75
80
|
pyarrow.int64(): ValueType.INT64,
|
|
@@ -139,7 +144,7 @@ def gbq_to_pandas_dtype(gbq_type):
|
|
|
139
144
|
"BOOL": "bool",
|
|
140
145
|
"FLOAT": "float64",
|
|
141
146
|
"INTEGER": pd.Int64Dtype(),
|
|
142
|
-
"TIMESTAMP": "datetime64[ns]",
|
|
147
|
+
"TIMESTAMP": "datetime64[ns, UTC]",
|
|
143
148
|
}
|
|
144
149
|
return type_map.get(gbq_type, "object")
|
|
145
150
|
|
mlrun/data_types/spark.py
CHANGED
|
@@ -20,10 +20,10 @@ import pytz
|
|
|
20
20
|
from pyspark.sql.functions import to_utc_timestamp
|
|
21
21
|
from pyspark.sql.types import BooleanType, DoubleType, TimestampType
|
|
22
22
|
|
|
23
|
+
from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
|
|
23
24
|
from mlrun.utils import logger
|
|
24
25
|
|
|
25
26
|
from .data_types import InferOptions, spark_to_value_type
|
|
26
|
-
from .to_pandas import toPandas
|
|
27
27
|
|
|
28
28
|
try:
|
|
29
29
|
import pyspark.sql.functions as funcs
|
|
@@ -75,7 +75,7 @@ def get_df_preview_spark(df, preview_lines=20):
|
|
|
75
75
|
"""capture preview data from spark df"""
|
|
76
76
|
df = df.limit(preview_lines)
|
|
77
77
|
|
|
78
|
-
result_dict =
|
|
78
|
+
result_dict = spark_df_to_pandas(df).to_dict(orient="split")
|
|
79
79
|
return [result_dict["columns"], *result_dict["data"]]
|
|
80
80
|
|
|
81
81
|
|