mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (235) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -1
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +31 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +196 -0
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +13 -2
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +233 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +387 -119
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +245 -20
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +909 -231
  77. mlrun/db/nopdb.py +279 -14
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1176 -406
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +208 -181
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +54 -24
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/__init__.py +1 -0
  178. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  179. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  180. mlrun/runtimes/nuclio/application/application.py +758 -0
  181. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  182. mlrun/runtimes/nuclio/function.py +188 -68
  183. mlrun/runtimes/nuclio/serving.py +57 -60
  184. mlrun/runtimes/pod.py +191 -58
  185. mlrun/runtimes/remotesparkjob.py +11 -8
  186. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  187. mlrun/runtimes/utils.py +40 -73
  188. mlrun/secrets.py +6 -2
  189. mlrun/serving/__init__.py +8 -1
  190. mlrun/serving/remote.py +2 -3
  191. mlrun/serving/routers.py +89 -64
  192. mlrun/serving/server.py +54 -26
  193. mlrun/serving/states.py +187 -56
  194. mlrun/serving/utils.py +19 -11
  195. mlrun/serving/v2_serving.py +136 -63
  196. mlrun/track/tracker.py +2 -1
  197. mlrun/track/trackers/mlflow_tracker.py +5 -0
  198. mlrun/utils/async_http.py +26 -6
  199. mlrun/utils/db.py +18 -0
  200. mlrun/utils/helpers.py +375 -105
  201. mlrun/utils/http.py +2 -2
  202. mlrun/utils/logger.py +75 -9
  203. mlrun/utils/notifications/notification/__init__.py +14 -10
  204. mlrun/utils/notifications/notification/base.py +48 -0
  205. mlrun/utils/notifications/notification/console.py +2 -0
  206. mlrun/utils/notifications/notification/git.py +24 -1
  207. mlrun/utils/notifications/notification/ipython.py +2 -0
  208. mlrun/utils/notifications/notification/slack.py +96 -21
  209. mlrun/utils/notifications/notification/webhook.py +63 -2
  210. mlrun/utils/notifications/notification_pusher.py +146 -16
  211. mlrun/utils/regex.py +9 -0
  212. mlrun/utils/retryer.py +3 -2
  213. mlrun/utils/v3io_clients.py +2 -3
  214. mlrun/utils/version/version.json +2 -2
  215. mlrun-1.7.2.dist-info/METADATA +390 -0
  216. mlrun-1.7.2.dist-info/RECORD +351 -0
  217. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  218. mlrun/feature_store/retrieval/conversion.py +0 -271
  219. mlrun/kfpops.py +0 -868
  220. mlrun/model_monitoring/application.py +0 -310
  221. mlrun/model_monitoring/batch.py +0 -974
  222. mlrun/model_monitoring/controller_handler.py +0 -37
  223. mlrun/model_monitoring/prometheus.py +0 -216
  224. mlrun/model_monitoring/stores/__init__.py +0 -111
  225. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  226. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  227. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  228. mlrun/model_monitoring/stores/models/base.py +0 -84
  229. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  230. mlrun/platforms/other.py +0 -305
  231. mlrun-1.7.0rc4.dist-info/METADATA +0 -269
  232. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  233. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  234. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  235. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/config.py CHANGED
@@ -27,6 +27,7 @@ import copy
27
27
  import json
28
28
  import os
29
29
  import typing
30
+ import warnings
30
31
  from collections.abc import Mapping
31
32
  from datetime import timedelta
32
33
  from distutils.util import strtobool
@@ -35,8 +36,10 @@ from threading import Lock
35
36
 
36
37
  import dotenv
37
38
  import semver
39
+ import urllib3.exceptions
38
40
  import yaml
39
41
 
42
+ import mlrun.common.constants
40
43
  import mlrun.common.schemas
41
44
  import mlrun.errors
42
45
 
@@ -46,11 +49,17 @@ _load_lock = Lock()
46
49
  _none_type = type(None)
47
50
  default_env_file = os.getenv("MLRUN_DEFAULT_ENV_FILE", "~/.mlrun.env")
48
51
 
52
+
49
53
  default_config = {
50
54
  "namespace": "", # default kubernetes namespace
51
55
  "kubernetes": {
52
56
  "kubeconfig_path": "", # local path to kubeconfig file (for development purposes),
53
57
  # empty by default as the API already running inside k8s cluster
58
+ "pagination": {
59
+ # pagination config for interacting with k8s API
60
+ "list_pods_limit": 200,
61
+ "list_crd_objects_limit": 200,
62
+ },
54
63
  },
55
64
  "dbpath": "", # db/api url
56
65
  # url to nuclio dashboard api (can be with user & token, e.g. https://username:password@dashboard-url.com)
@@ -63,11 +72,15 @@ default_config = {
63
72
  "api_base_version": "v1",
64
73
  "version": "", # will be set to current version
65
74
  "images_tag": "", # tag to use with mlrun images e.g. mlrun/mlrun (defaults to version)
66
- "images_registry": "", # registry to use with mlrun images e.g. quay.io/ (defaults to empty, for dockerhub)
75
+ # registry to use with mlrun images that start with "mlrun/" e.g. quay.io/ (defaults to empty, for dockerhub)
76
+ "images_registry": "",
77
+ # registry to use with non-mlrun images (don't start with "mlrun/") specified in 'images_to_enrich_registry'
78
+ # defaults to empty, for dockerhub
79
+ "vendor_images_registry": "",
67
80
  # comma separated list of images that are in the specified images_registry, and therefore will be enriched with this
68
81
  # registry when used. default to mlrun/* which means any image which is of the mlrun repository (mlrun/mlrun,
69
82
  # mlrun/ml-base, etc...)
70
- "images_to_enrich_registry": "^mlrun/*",
83
+ "images_to_enrich_registry": "^mlrun/*,python:3.9",
71
84
  "kfp_url": "",
72
85
  "kfp_ttl": "14400", # KFP ttl in sec, after that completed PODs will be deleted
73
86
  "kfp_image": "mlrun/mlrun", # image to use for KFP runner (defaults to mlrun/mlrun)
@@ -87,7 +100,7 @@ default_config = {
87
100
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
88
101
  "ipython_widget": True,
89
102
  "log_level": "INFO",
90
- # log formatter (options: human | json)
103
+ # log formatter (options: human | human_extended | json)
91
104
  "log_formatter": "human",
92
105
  "submit_timeout": "180", # timeout when submitting a new k8s resource
93
106
  # runtimes cleanup interval in seconds
@@ -103,7 +116,20 @@ default_config = {
103
116
  # max number of parallel abort run jobs in runs monitoring
104
117
  "concurrent_abort_stale_runs_workers": 10,
105
118
  "list_runs_time_period_in_days": 7, # days
106
- }
119
+ },
120
+ "projects": {
121
+ "summaries": {
122
+ "cache_interval": "30",
123
+ "feature_gates": {
124
+ "artifacts": "enabled",
125
+ "schedules": "enabled",
126
+ "feature_sets": "enabled",
127
+ "models": "enabled",
128
+ "runs": "enabled",
129
+ "pipelines": "enabled",
130
+ },
131
+ },
132
+ },
107
133
  },
108
134
  "crud": {
109
135
  "runs": {
@@ -137,6 +163,11 @@ default_config = {
137
163
  "datasets": {
138
164
  "max_preview_columns": 100,
139
165
  },
166
+ "limits": {
167
+ "max_chunk_size": 1024 * 1024 * 1, # 1MB
168
+ "max_preview_size": 1024 * 1024 * 10, # 10MB
169
+ "max_download_size": 1024 * 1024 * 100, # 100MB
170
+ },
140
171
  },
141
172
  # FIXME: Adding these defaults here so we won't need to patch the "installing component" (provazio-controller) to
142
173
  # configure this values on field systems, for newer system this will be configured correctly
@@ -188,6 +219,7 @@ default_config = {
188
219
  "background_tasks": {
189
220
  # enabled / disabled
190
221
  "timeout_mode": "enabled",
222
+ "function_deletion_batch_size": 10,
191
223
  # timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
192
224
  "default_timeouts": {
193
225
  "operations": {
@@ -196,6 +228,7 @@ default_config = {
196
228
  "run_abortion": "600",
197
229
  "abort_grace_period": "10",
198
230
  "delete_project": "900",
231
+ "delete_function": "900",
199
232
  },
200
233
  "runtimes": {"dask": "600"},
201
234
  },
@@ -226,10 +259,17 @@ default_config = {
226
259
  "executing": "24h",
227
260
  }
228
261
  },
262
+ # When the module is reloaded, the maximum depth recursion configuration for the recursive reload
263
+ # function is used to prevent infinite loop
264
+ "reload_max_recursion_depth": 100,
229
265
  },
230
266
  "databricks": {
231
267
  "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
232
268
  },
269
+ "application": {
270
+ "default_sidecar_internal_port": 8050,
271
+ "default_authentication_mode": mlrun.common.schemas.APIGatewayAuthenticationMode.none,
272
+ },
233
273
  },
234
274
  # TODO: function defaults should be moved to the function spec config above
235
275
  "function_defaults": {
@@ -240,6 +280,7 @@ default_config = {
240
280
  "remote": "mlrun/mlrun",
241
281
  "dask": "mlrun/ml-base",
242
282
  "mpijob": "mlrun/mlrun",
283
+ "application": "python:3.9",
243
284
  },
244
285
  # see enrich_function_preemption_spec for more info,
245
286
  # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -254,6 +295,16 @@ default_config = {
254
295
  "url": "",
255
296
  "service": "mlrun-api-chief",
256
297
  "port": 8080,
298
+ "feature_gates": {
299
+ "scheduler": "enabled",
300
+ "project_sync": "enabled",
301
+ "cleanup": "enabled",
302
+ "runs_monitoring": "enabled",
303
+ "pagination_cache": "enabled",
304
+ "project_summaries": "enabled",
305
+ "start_logs": "enabled",
306
+ "stop_logs": "enabled",
307
+ },
257
308
  },
258
309
  "worker": {
259
310
  "sync_with_chief": {
@@ -291,7 +342,7 @@ default_config = {
291
342
  "http": {
292
343
  # when True, the client will verify the server's TLS
293
344
  # set to False for backwards compatibility.
294
- "verify": False,
345
+ "verify": True,
295
346
  },
296
347
  "db": {
297
348
  "commit_retry_timeout": 30,
@@ -324,7 +375,13 @@ default_config = {
324
375
  # optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
325
376
  #
326
377
  # if set to "nil" or "none", nothing would be set
327
- "modes": "STRICT_TRANS_TABLES",
378
+ "modes": (
379
+ "STRICT_TRANS_TABLES"
380
+ ",NO_ZERO_IN_DATE"
381
+ ",NO_ZERO_DATE"
382
+ ",ERROR_FOR_DIVISION_BY_ZERO"
383
+ ",NO_ENGINE_SUBSTITUTION",
384
+ )
328
385
  },
329
386
  },
330
387
  "jobs": {
@@ -352,10 +409,12 @@ default_config = {
352
409
  # is set to ClusterIP
353
410
  # ---------------------------------------------------------------------
354
411
  # Note: adding a mode requires special handling on
355
- # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
412
+ # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
356
413
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
357
414
  "add_templated_ingress_host_mode": "never",
358
415
  "explicit_ack": "enabled",
416
+ # size of serving spec to move to config maps
417
+ "serving_spec_env_cutoff": 0,
359
418
  },
360
419
  "logs": {
361
420
  "decode": {
@@ -414,7 +473,6 @@ default_config = {
414
473
  "followers": "",
415
474
  # This is used as the interval for the sync loop both when mlrun is leader and follower
416
475
  "periodic_sync_interval": "1 minute",
417
- "counters_cache_ttl": "2 minutes",
418
476
  "project_owners_cache_ttl": "30 seconds",
419
477
  # access key to be used when the leader is iguazio and polling is done from it
420
478
  "iguazio_access_key": "",
@@ -443,10 +501,10 @@ default_config = {
443
501
  # pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
444
502
  # git+https://github.com/mlrun/mlrun@development. by default uses the version
445
503
  "mlrun_version_specifier": "",
446
- "kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0", # kaniko builder image
504
+ "kaniko_image": "gcr.io/kaniko-project/executor:v1.23.2", # kaniko builder image
447
505
  "kaniko_init_container_image": "alpine:3.18",
448
506
  # image for kaniko init container when docker registry is ECR
449
- "kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
507
+ "kaniko_aws_cli_image": "amazon/aws-cli:2.17.16",
450
508
  # kaniko sometimes fails to get filesystem from image, this is a workaround to retry the process
451
509
  # a known issue in Kaniko - https://github.com/GoogleContainerTools/kaniko/issues/1717
452
510
  "kaniko_image_fs_extraction_retries": "3",
@@ -473,17 +531,71 @@ default_config = {
473
531
  # if set to true, will log a warning for trying to use run db functionality while in nop db mode
474
532
  "verbose": True,
475
533
  },
534
+ "pagination": {
535
+ "default_page_size": 20,
536
+ "pagination_cache": {
537
+ "interval": 60,
538
+ "ttl": 3600,
539
+ "max_size": 10000,
540
+ },
541
+ },
476
542
  },
477
543
  "model_endpoint_monitoring": {
478
- "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
479
- "application_stream_args": {"shard_count": 1, "retention_period_hours": 24},
480
- "drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
544
+ "serving_stream": {
545
+ "v3io": {
546
+ "shard_count": 2,
547
+ "retention_period_hours": 24,
548
+ "num_workers": 1,
549
+ "min_replicas": 2,
550
+ "max_replicas": 2,
551
+ },
552
+ "kafka": {
553
+ "partition_count": 8,
554
+ "replication_factor": 1,
555
+ "num_workers": 2,
556
+ "min_replicas": 1,
557
+ "max_replicas": 4,
558
+ },
559
+ },
560
+ "application_stream_args": {
561
+ "v3io": {
562
+ "shard_count": 1,
563
+ "retention_period_hours": 24,
564
+ "num_workers": 1,
565
+ "min_replicas": 1,
566
+ "max_replicas": 1,
567
+ },
568
+ "kafka": {
569
+ "partition_count": 1,
570
+ "replication_factor": 1,
571
+ "num_workers": 1,
572
+ "min_replicas": 1,
573
+ "max_replicas": 1,
574
+ },
575
+ },
576
+ "writer_stream_args": {
577
+ "v3io": {
578
+ "shard_count": 1,
579
+ "retention_period_hours": 24,
580
+ "num_workers": 1,
581
+ "min_replicas": 1,
582
+ "max_replicas": 1,
583
+ },
584
+ "kafka": {
585
+ "partition_count": 1,
586
+ # TODO: add retention period configuration
587
+ "replication_factor": 1,
588
+ "num_workers": 1,
589
+ "min_replicas": 1,
590
+ "max_replicas": 1,
591
+ },
592
+ },
481
593
  # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
482
594
  # stream, and endpoints.
483
595
  "store_prefixes": {
484
596
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
485
597
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
486
- "stream": "",
598
+ "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
487
599
  },
488
600
  # Offline storage path can be either relative or a full path. This path is used for general offline data
489
601
  # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -492,12 +604,18 @@ default_config = {
492
604
  # when the user is working in CE environment and has not provided any stream path.
493
605
  "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
494
606
  "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
495
- "batch_processing_function_branch": "master",
496
607
  "parquet_batching_max_events": 10_000,
497
608
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
498
- # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
499
- "store_type": "v3io-nosql",
609
+ # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
500
610
  "endpoint_store_connection": "",
611
+ # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
612
+ "tsdb_connection": "",
613
+ # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
614
+ "stream_connection": "",
615
+ "tdengine": {
616
+ "timeout": 10,
617
+ "retries": 1,
618
+ },
501
619
  },
502
620
  "secret_stores": {
503
621
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -533,9 +651,10 @@ default_config = {
533
651
  "feature_store": {
534
652
  "data_prefixes": {
535
653
  "default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
536
- "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
654
+ "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
537
655
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
538
- "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
656
+ "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
657
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
539
658
  },
540
659
  "default_targets": "parquet,nosql",
541
660
  "default_job_image": "mlrun/mlrun",
@@ -610,8 +729,9 @@ default_config = {
610
729
  },
611
730
  "workflows": {
612
731
  "default_workflow_runner_name": "workflow-runner-{}",
613
- # Default timeout seconds for retrieving workflow id after execution:
614
- "timeouts": {"local": 120, "kfp": 30, "remote": 90},
732
+ # Default timeout seconds for retrieving workflow id after execution
733
+ # Remote workflow timeout is the maximum between remote and the inner engine timeout
734
+ "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
615
735
  },
616
736
  "log_collector": {
617
737
  "address": "localhost:8282",
@@ -628,7 +748,9 @@ default_config = {
628
748
  "failed_runs_grace_period": 3600,
629
749
  "verbose": True,
630
750
  # the number of workers which will be used to trigger the start log collection
631
- "concurrent_start_logs_workers": 15,
751
+ "concurrent_start_logs_workers": 50,
752
+ # the number of runs for which to start logs on api startup
753
+ "start_logs_startup_run_limit": 150,
632
754
  # the time in hours in which to start log collection from.
633
755
  # after upgrade, we might have runs which completed in the mean time or still in non-terminal state and
634
756
  # we want to collect their logs in the new log collection method (sidecar)
@@ -670,8 +792,20 @@ default_config = {
670
792
  "access_key": "",
671
793
  },
672
794
  "grafana_url": "",
795
+ "alerts": {
796
+ # supported modes: "enabled", "disabled".
797
+ "mode": "disabled",
798
+ # maximum number of alerts we allow to be configured.
799
+ # user will get an error when exceeding this
800
+ "max_allowed": 10000,
801
+ # maximum allowed value for count in criteria field inside AlertConfig
802
+ "max_criteria_count": 100,
803
+ },
804
+ "auth_with_client_id": {
805
+ "enabled": False,
806
+ "request_timeout": 5,
807
+ },
673
808
  }
674
-
675
809
  _is_running_as_api = None
676
810
 
677
811
 
@@ -721,7 +855,21 @@ class Config:
721
855
  for key, value in cfg.items():
722
856
  if hasattr(self, key):
723
857
  if isinstance(value, dict):
724
- getattr(self, key).update(value)
858
+ # ignore the `skip_errors` flag here
859
+ # if the key does not align with what mlrun config expects it is a user
860
+ # input error that can lead to unexpected behavior.
861
+ # raise the exception to ensure configuration is loaded correctly and do not
862
+ # ignore any errors.
863
+ config_value = getattr(self, key)
864
+ try:
865
+ config_value.update(value)
866
+ except AttributeError as exc:
867
+ if not isinstance(config_value, (dict, Config)):
868
+ raise ValueError(
869
+ f"Can not update `{key}` config. "
870
+ f"Expected a configuration but received {type(value)}"
871
+ ) from exc
872
+ raise exc
725
873
  else:
726
874
  try:
727
875
  setattr(self, key, value)
@@ -769,6 +917,7 @@ class Config:
769
917
  ):
770
918
  """
771
919
  decodes and loads the config attribute to expected type
920
+
772
921
  :param attribute_path: the path in the default_config e.g. preemptible_nodes.node_selector
773
922
  :param expected_type: the object type valid values are : `dict`, `list` etc...
774
923
  :return: the expected type instance
@@ -792,7 +941,7 @@ class Config:
792
941
  f"Unable to decode {attribute_path}"
793
942
  )
794
943
  parsed_attribute_value = json.loads(decoded_attribute_value)
795
- if type(parsed_attribute_value) != expected_type:
944
+ if not isinstance(parsed_attribute_value, expected_type):
796
945
  raise mlrun.errors.MLRunInvalidArgumentTypeError(
797
946
  f"Expected type {expected_type}, got {type(parsed_attribute_value)}"
798
947
  )
@@ -894,24 +1043,6 @@ class Config:
894
1043
  f"is not allowed for iguazio version: {igz_version} < 3.5.1"
895
1044
  )
896
1045
 
897
- def resolve_kfp_url(self, namespace=None):
898
- if config.kfp_url:
899
- return config.kfp_url
900
- igz_version = self.get_parsed_igz_version()
901
- # TODO: When Iguazio 3.4 will deprecate we can remove this line
902
- if igz_version and igz_version <= semver.VersionInfo.parse("3.6.0-b1"):
903
- if namespace is None:
904
- if not config.namespace:
905
- raise mlrun.errors.MLRunNotFoundError(
906
- "For KubeFlow Pipelines to function, a namespace must be configured"
907
- )
908
- namespace = config.namespace
909
- # When instead of host we provided namespace we tackled this issue
910
- # https://github.com/canonical/bundle-kubeflow/issues/412
911
- # TODO: When we'll move to kfp 1.4.0 (server side) it should be resolved
912
- return f"http://ml-pipeline.{namespace}.svc.cluster.local:8888"
913
- return None
914
-
915
1046
  def resolve_chief_api_url(self) -> str:
916
1047
  if self.httpdb.clusterization.chief.url:
917
1048
  return self.httpdb.clusterization.chief.url
@@ -931,6 +1062,10 @@ class Config:
931
1062
  self.httpdb.clusterization.chief.url = chief_api_url
932
1063
  return self.httpdb.clusterization.chief.url
933
1064
 
1065
+ @staticmethod
1066
+ def internal_labels():
1067
+ return mlrun.common.constants.MLRunInternalLabels.all()
1068
+
934
1069
  @staticmethod
935
1070
  def get_storage_auto_mount_params():
936
1071
  auto_mount_params = {}
@@ -998,6 +1133,14 @@ class Config:
998
1133
  resource_requirement.pop(gpu)
999
1134
  return resource_requirement
1000
1135
 
1136
+ def force_api_gateway_ssl_redirect(self):
1137
+ """
1138
+ Get the default value for the ssl_redirect configuration.
1139
+ In Iguazio we always want to redirect to HTTPS, in other cases we don't.
1140
+ :return: True if we should redirect to HTTPS, False otherwise.
1141
+ """
1142
+ return self.is_running_on_iguazio()
1143
+
1001
1144
  def to_dict(self):
1002
1145
  return copy.deepcopy(self._cfg)
1003
1146
 
@@ -1030,6 +1173,9 @@ class Config:
1030
1173
  # importing here to avoid circular dependency
1031
1174
  import mlrun.db
1032
1175
 
1176
+ # It ensures that SSL verification is set before establishing a connection
1177
+ _configure_ssl_verification(self.httpdb.http.verify)
1178
+
1033
1179
  # when dbpath is set we want to connect to it which will sync configuration from it to the client
1034
1180
  mlrun.db.get_run_db(value, force_reconnect=True)
1035
1181
 
@@ -1058,8 +1204,9 @@ class Config:
1058
1204
  project: str = "",
1059
1205
  kind: str = "",
1060
1206
  target: str = "online",
1061
- artifact_path: str = None,
1062
- function_name: str = None,
1207
+ artifact_path: typing.Optional[str] = None,
1208
+ function_name: typing.Optional[str] = None,
1209
+ **kwargs,
1063
1210
  ) -> str:
1064
1211
  """Get the full path from the configuration based on the provided project and kind.
1065
1212
 
@@ -1085,8 +1232,7 @@ class Config:
1085
1232
  )
1086
1233
  if store_prefix_dict.get(kind):
1087
1234
  # Target exist in store prefix and has a valid string value
1088
- return store_prefix_dict[kind].format(project=project)
1089
-
1235
+ return store_prefix_dict[kind].format(project=project, **kwargs)
1090
1236
  if (
1091
1237
  function_name
1092
1238
  and function_name
@@ -1098,10 +1244,16 @@ class Config:
1098
1244
  if function_name is None
1099
1245
  else f"{kind}-{function_name.lower()}",
1100
1246
  )
1101
- return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1102
- project=project,
1103
- kind=kind,
1104
- )
1247
+ elif kind == "stream":
1248
+ return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
1249
+ project=project,
1250
+ kind=kind,
1251
+ )
1252
+ else:
1253
+ return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
1254
+ project=project,
1255
+ kind=kind,
1256
+ )
1105
1257
 
1106
1258
  # Get the current offline path from the configuration
1107
1259
  file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(
@@ -1159,12 +1311,11 @@ class Config:
1159
1311
 
1160
1312
  return storage_options
1161
1313
 
1162
- def is_explicit_ack(self, version=None) -> bool:
1163
- if not version:
1164
- version = self.nuclio_version
1314
+ def is_explicit_ack_enabled(self) -> bool:
1165
1315
  return self.httpdb.nuclio.explicit_ack == "enabled" and (
1166
- not version
1167
- or semver.VersionInfo.parse(version) >= semver.VersionInfo.parse("1.12.10")
1316
+ not self.nuclio_version
1317
+ or semver.VersionInfo.parse(self.nuclio_version)
1318
+ >= semver.VersionInfo.parse("1.12.10")
1168
1319
  )
1169
1320
 
1170
1321
 
@@ -1214,6 +1365,7 @@ def _do_populate(env=None, skip_errors=False):
1214
1365
  if data:
1215
1366
  config.update(data, skip_errors=skip_errors)
1216
1367
 
1368
+ _configure_ssl_verification(config.httpdb.http.verify)
1217
1369
  _validate_config(config)
1218
1370
 
1219
1371
 
@@ -1273,6 +1425,16 @@ def _convert_str(value, typ):
1273
1425
  return typ(value)
1274
1426
 
1275
1427
 
1428
+ def _configure_ssl_verification(verify_ssl: bool) -> None:
1429
+ """Configure SSL verification warnings based on the setting."""
1430
+ if not verify_ssl:
1431
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
1432
+ else:
1433
+ # If the user changes the `verify` setting to `True` at runtime using `mlrun.set_env_from_file` after
1434
+ # importing `mlrun`, we need to reload the `mlrun` configuration and enable this warning.
1435
+ warnings.simplefilter("default", urllib3.exceptions.InsecureRequestWarning)
1436
+
1437
+
1276
1438
  def read_env(env=None, prefix=env_prefix):
1277
1439
  """Read configuration from environment"""
1278
1440
  env = os.environ if env is None else env
@@ -1348,12 +1510,25 @@ def read_env(env=None, prefix=env_prefix):
1348
1510
  if igz_domain:
1349
1511
  config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
1350
1512
 
1351
- if config.get("log_level"):
1513
+ if log_level := config.get("log_level"):
1352
1514
  import mlrun.utils.logger
1353
1515
 
1354
1516
  # logger created (because of imports mess) before the config is loaded (in tests), therefore we're changing its
1355
1517
  # level manually
1356
- mlrun.utils.logger.set_logger_level(config["log_level"])
1518
+ mlrun.utils.logger.set_logger_level(log_level)
1519
+
1520
+ if log_formatter_name := config.get("log_formatter"):
1521
+ import mlrun.utils.logger
1522
+
1523
+ log_formatter = mlrun.utils.resolve_formatter_by_kind(
1524
+ mlrun.utils.FormatterKinds(log_formatter_name)
1525
+ )
1526
+ current_handler = mlrun.utils.logger.get_handler("default")
1527
+ current_formatter_name = current_handler.formatter.__class__.__name__
1528
+ desired_formatter_name = log_formatter.__name__
1529
+ if current_formatter_name != desired_formatter_name:
1530
+ current_handler.setFormatter(log_formatter())
1531
+
1357
1532
  # The default function pod resource values are of type str; however, when reading from environment variable numbers,
1358
1533
  # it converts them to type int if contains only number, so we want to convert them to str.
1359
1534
  _convert_resources_to_str(config)
@@ -41,6 +41,7 @@ class ValueType(str, Enum):
41
41
  BYTES = "bytes"
42
42
  STRING = "str"
43
43
  DATETIME = "datetime"
44
+ LIST = "List"
44
45
  BYTES_LIST = "List[bytes]"
45
46
  STRING_LIST = "List[string]"
46
47
  INT32_LIST = "List[int32]"
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
48
49
  DOUBLE_LIST = "List[float]"
49
50
  FLOAT_LIST = "List[float32]"
50
51
  BOOL_LIST = "List[bool]"
52
+ Tuple = "Tuple"
51
53
 
52
54
 
53
55
  def pd_schema_to_value_type(value):
@@ -68,6 +70,11 @@ def pa_type_to_value_type(type_):
68
70
  if isinstance(type_, TimestampType):
69
71
  return ValueType.DATETIME
70
72
 
73
+ # pandas category type translates to pyarrow DictionaryType
74
+ # we need to unpack the value type (ML-7868)
75
+ if isinstance(type_, pyarrow.DictionaryType):
76
+ type_ = type_.value_type
77
+
71
78
  type_map = {
72
79
  pyarrow.bool_(): ValueType.BOOL,
73
80
  pyarrow.int64(): ValueType.INT64,
@@ -102,6 +109,8 @@ def python_type_to_value_type(value_type):
102
109
  "datetime64[ns]": ValueType.INT64,
103
110
  "datetime64[ns, tz]": ValueType.INT64,
104
111
  "category": ValueType.STRING,
112
+ "list": ValueType.LIST,
113
+ "tuple": ValueType.Tuple,
105
114
  }
106
115
 
107
116
  if type_name in type_map:
@@ -115,6 +124,7 @@ def spark_to_value_type(data_type):
115
124
  "double": ValueType.DOUBLE,
116
125
  "boolean": ValueType.BOOL,
117
126
  "timestamp": ValueType.DATETIME,
127
+ "timestamp_ntz": ValueType.DATETIME,
118
128
  "string": ValueType.STRING,
119
129
  "array": "list",
120
130
  "map": "dict",
@@ -135,7 +145,7 @@ def gbq_to_pandas_dtype(gbq_type):
135
145
  "BOOL": "bool",
136
146
  "FLOAT": "float64",
137
147
  "INTEGER": pd.Int64Dtype(),
138
- "TIMESTAMP": "datetime64[ns]",
148
+ "TIMESTAMP": "datetime64[ns, UTC]",
139
149
  }
140
150
  return type_map.get(gbq_type, "object")
141
151
 
mlrun/data_types/spark.py CHANGED
@@ -18,12 +18,12 @@ from os import environ
18
18
  import numpy as np
19
19
  import pytz
20
20
  from pyspark.sql.functions import to_utc_timestamp
21
- from pyspark.sql.types import BooleanType, DoubleType, TimestampType
21
+ from pyspark.sql.types import BooleanType, DoubleType
22
22
 
23
+ from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
23
24
  from mlrun.utils import logger
24
25
 
25
26
  from .data_types import InferOptions, spark_to_value_type
26
- from .to_pandas import toPandas
27
27
 
28
28
  try:
29
29
  import pyspark.sql.functions as funcs
@@ -75,7 +75,7 @@ def get_df_preview_spark(df, preview_lines=20):
75
75
  """capture preview data from spark df"""
76
76
  df = df.limit(preview_lines)
77
77
 
78
- result_dict = toPandas(df).to_dict(orient="split")
78
+ result_dict = spark_df_to_pandas(df).to_dict(orient="split")
79
79
  return [result_dict["columns"], *result_dict["data"]]
80
80
 
81
81
 
@@ -143,7 +143,8 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
143
143
  timestamp_columns = set()
144
144
  boolean_columns = set()
145
145
  for field in df_after_type_casts.schema.fields:
146
- is_timestamp = isinstance(field.dataType, TimestampType)
146
+ # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
147
+ is_timestamp = field.dataType.typeName().startswith("timestamp")
147
148
  is_boolean = isinstance(field.dataType, BooleanType)
148
149
  if is_timestamp:
149
150
  df_after_type_casts = df_after_type_casts.withColumn(