mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (291) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +26 -112
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +46 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +47 -48
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +69 -0
  13. mlrun/common/db/sql_session.py +2 -3
  14. mlrun/common/formatters/__init__.py +19 -0
  15. mlrun/common/formatters/artifact.py +21 -0
  16. mlrun/common/formatters/base.py +78 -0
  17. mlrun/common/formatters/function.py +41 -0
  18. mlrun/common/formatters/pipeline.py +53 -0
  19. mlrun/common/formatters/project.py +51 -0
  20. mlrun/common/helpers.py +1 -2
  21. mlrun/common/model_monitoring/helpers.py +9 -5
  22. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  23. mlrun/common/schemas/__init__.py +24 -4
  24. mlrun/common/schemas/alert.py +203 -0
  25. mlrun/common/schemas/api_gateway.py +148 -0
  26. mlrun/common/schemas/artifact.py +18 -8
  27. mlrun/common/schemas/auth.py +11 -5
  28. mlrun/common/schemas/background_task.py +1 -1
  29. mlrun/common/schemas/client_spec.py +4 -1
  30. mlrun/common/schemas/feature_store.py +16 -16
  31. mlrun/common/schemas/frontend_spec.py +8 -7
  32. mlrun/common/schemas/function.py +5 -1
  33. mlrun/common/schemas/hub.py +11 -18
  34. mlrun/common/schemas/memory_reports.py +2 -2
  35. mlrun/common/schemas/model_monitoring/__init__.py +18 -3
  36. mlrun/common/schemas/model_monitoring/constants.py +83 -26
  37. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  38. mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
  39. mlrun/common/schemas/notification.py +4 -4
  40. mlrun/common/schemas/object.py +2 -2
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +1 -10
  43. mlrun/common/schemas/project.py +24 -23
  44. mlrun/common/schemas/runtime_resource.py +8 -12
  45. mlrun/common/schemas/schedule.py +3 -3
  46. mlrun/common/schemas/tag.py +1 -2
  47. mlrun/common/schemas/workflow.py +2 -2
  48. mlrun/common/types.py +7 -1
  49. mlrun/config.py +54 -17
  50. mlrun/data_types/to_pandas.py +10 -12
  51. mlrun/datastore/__init__.py +5 -8
  52. mlrun/datastore/alibaba_oss.py +130 -0
  53. mlrun/datastore/azure_blob.py +17 -5
  54. mlrun/datastore/base.py +62 -39
  55. mlrun/datastore/datastore.py +28 -9
  56. mlrun/datastore/datastore_profile.py +146 -20
  57. mlrun/datastore/filestore.py +0 -1
  58. mlrun/datastore/google_cloud_storage.py +6 -2
  59. mlrun/datastore/hdfs.py +56 -0
  60. mlrun/datastore/inmem.py +2 -2
  61. mlrun/datastore/redis.py +6 -2
  62. mlrun/datastore/s3.py +9 -0
  63. mlrun/datastore/snowflake_utils.py +43 -0
  64. mlrun/datastore/sources.py +201 -96
  65. mlrun/datastore/spark_utils.py +1 -2
  66. mlrun/datastore/store_resources.py +7 -7
  67. mlrun/datastore/targets.py +358 -104
  68. mlrun/datastore/utils.py +72 -58
  69. mlrun/datastore/v3io.py +5 -1
  70. mlrun/db/base.py +185 -35
  71. mlrun/db/factory.py +1 -1
  72. mlrun/db/httpdb.py +614 -179
  73. mlrun/db/nopdb.py +210 -26
  74. mlrun/errors.py +12 -1
  75. mlrun/execution.py +41 -24
  76. mlrun/feature_store/__init__.py +0 -2
  77. mlrun/feature_store/api.py +40 -72
  78. mlrun/feature_store/common.py +1 -1
  79. mlrun/feature_store/feature_set.py +76 -55
  80. mlrun/feature_store/feature_vector.py +28 -30
  81. mlrun/feature_store/ingestion.py +7 -6
  82. mlrun/feature_store/retrieval/base.py +16 -11
  83. mlrun/feature_store/retrieval/conversion.py +11 -13
  84. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  85. mlrun/feature_store/retrieval/job.py +9 -3
  86. mlrun/feature_store/retrieval/local_merger.py +2 -0
  87. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  88. mlrun/feature_store/steps.py +37 -34
  89. mlrun/features.py +9 -20
  90. mlrun/frameworks/_common/artifacts_library.py +9 -9
  91. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  92. mlrun/frameworks/_common/model_handler.py +48 -48
  93. mlrun/frameworks/_common/plan.py +2 -3
  94. mlrun/frameworks/_common/producer.py +3 -4
  95. mlrun/frameworks/_common/utils.py +5 -5
  96. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  97. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  98. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  99. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  100. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  101. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  102. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  103. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  104. mlrun/frameworks/_ml_common/plan.py +1 -1
  105. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  109. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  110. mlrun/frameworks/_ml_common/utils.py +4 -4
  111. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  112. mlrun/frameworks/huggingface/model_server.py +4 -4
  113. mlrun/frameworks/lgbm/__init__.py +33 -33
  114. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  117. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  119. mlrun/frameworks/lgbm/model_handler.py +10 -10
  120. mlrun/frameworks/lgbm/model_server.py +6 -6
  121. mlrun/frameworks/lgbm/utils.py +5 -5
  122. mlrun/frameworks/onnx/dataset.py +8 -8
  123. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  124. mlrun/frameworks/onnx/model_handler.py +6 -6
  125. mlrun/frameworks/onnx/model_server.py +7 -7
  126. mlrun/frameworks/parallel_coordinates.py +4 -3
  127. mlrun/frameworks/pytorch/__init__.py +18 -18
  128. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  129. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  130. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  131. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  132. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  133. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  134. mlrun/frameworks/pytorch/model_handler.py +17 -17
  135. mlrun/frameworks/pytorch/model_server.py +7 -7
  136. mlrun/frameworks/sklearn/__init__.py +13 -13
  137. mlrun/frameworks/sklearn/estimator.py +4 -4
  138. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  139. mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
  140. mlrun/frameworks/sklearn/model_handler.py +2 -2
  141. mlrun/frameworks/tf_keras/__init__.py +10 -7
  142. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  143. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  144. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  145. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  146. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  147. mlrun/frameworks/tf_keras/model_server.py +6 -6
  148. mlrun/frameworks/xgboost/__init__.py +13 -13
  149. mlrun/frameworks/xgboost/model_handler.py +6 -6
  150. mlrun/k8s_utils.py +14 -16
  151. mlrun/launcher/__init__.py +1 -1
  152. mlrun/launcher/base.py +16 -15
  153. mlrun/launcher/client.py +8 -6
  154. mlrun/launcher/factory.py +1 -1
  155. mlrun/launcher/local.py +17 -11
  156. mlrun/launcher/remote.py +16 -10
  157. mlrun/lists.py +7 -6
  158. mlrun/model.py +238 -73
  159. mlrun/model_monitoring/__init__.py +1 -1
  160. mlrun/model_monitoring/api.py +138 -315
  161. mlrun/model_monitoring/application.py +5 -296
  162. mlrun/model_monitoring/applications/__init__.py +24 -0
  163. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  164. mlrun/model_monitoring/applications/base.py +282 -0
  165. mlrun/model_monitoring/applications/context.py +214 -0
  166. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  167. mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
  168. mlrun/model_monitoring/applications/results.py +99 -0
  169. mlrun/model_monitoring/controller.py +104 -84
  170. mlrun/model_monitoring/controller_handler.py +13 -5
  171. mlrun/model_monitoring/db/__init__.py +18 -0
  172. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  173. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  174. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
  175. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  176. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  177. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  178. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  179. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  180. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  181. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  182. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
  183. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  184. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  185. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  187. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  188. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  189. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  190. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  191. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  192. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  193. mlrun/model_monitoring/evidently_application.py +6 -118
  194. mlrun/model_monitoring/features_drift_table.py +134 -106
  195. mlrun/model_monitoring/helpers.py +127 -28
  196. mlrun/model_monitoring/metrics/__init__.py +13 -0
  197. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  198. mlrun/model_monitoring/model_endpoint.py +3 -2
  199. mlrun/model_monitoring/prometheus.py +1 -4
  200. mlrun/model_monitoring/stream_processing.py +62 -231
  201. mlrun/model_monitoring/tracking_policy.py +9 -2
  202. mlrun/model_monitoring/writer.py +152 -124
  203. mlrun/package/__init__.py +6 -6
  204. mlrun/package/context_handler.py +5 -5
  205. mlrun/package/packager.py +7 -7
  206. mlrun/package/packagers/default_packager.py +6 -6
  207. mlrun/package/packagers/numpy_packagers.py +15 -15
  208. mlrun/package/packagers/pandas_packagers.py +5 -5
  209. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  210. mlrun/package/packagers_manager.py +19 -23
  211. mlrun/package/utils/_formatter.py +6 -6
  212. mlrun/package/utils/_pickler.py +2 -2
  213. mlrun/package/utils/_supported_format.py +4 -4
  214. mlrun/package/utils/log_hint_utils.py +2 -2
  215. mlrun/package/utils/type_hint_utils.py +4 -9
  216. mlrun/platforms/__init__.py +11 -10
  217. mlrun/platforms/iguazio.py +24 -203
  218. mlrun/projects/operations.py +35 -21
  219. mlrun/projects/pipelines.py +68 -99
  220. mlrun/projects/project.py +830 -266
  221. mlrun/render.py +3 -11
  222. mlrun/run.py +162 -166
  223. mlrun/runtimes/__init__.py +62 -7
  224. mlrun/runtimes/base.py +39 -32
  225. mlrun/runtimes/daskjob.py +8 -8
  226. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  227. mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
  228. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  229. mlrun/runtimes/funcdoc.py +0 -28
  230. mlrun/runtimes/function_reference.py +1 -1
  231. mlrun/runtimes/kubejob.py +28 -122
  232. mlrun/runtimes/local.py +6 -3
  233. mlrun/runtimes/mpijob/__init__.py +0 -20
  234. mlrun/runtimes/mpijob/abstract.py +9 -10
  235. mlrun/runtimes/mpijob/v1.py +1 -1
  236. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  237. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  238. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  239. mlrun/runtimes/nuclio/application/application.py +523 -0
  240. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  241. mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
  242. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  243. mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
  244. mlrun/runtimes/pod.py +286 -88
  245. mlrun/runtimes/remotesparkjob.py +2 -2
  246. mlrun/runtimes/sparkjob/spark3job.py +51 -34
  247. mlrun/runtimes/utils.py +7 -75
  248. mlrun/secrets.py +9 -5
  249. mlrun/serving/remote.py +2 -7
  250. mlrun/serving/routers.py +13 -10
  251. mlrun/serving/server.py +22 -26
  252. mlrun/serving/states.py +99 -25
  253. mlrun/serving/utils.py +3 -3
  254. mlrun/serving/v1_serving.py +6 -7
  255. mlrun/serving/v2_serving.py +59 -20
  256. mlrun/track/tracker.py +2 -1
  257. mlrun/track/tracker_manager.py +3 -3
  258. mlrun/track/trackers/mlflow_tracker.py +1 -2
  259. mlrun/utils/async_http.py +5 -7
  260. mlrun/utils/azure_vault.py +1 -1
  261. mlrun/utils/clones.py +1 -2
  262. mlrun/utils/condition_evaluator.py +3 -3
  263. mlrun/utils/db.py +3 -3
  264. mlrun/utils/helpers.py +183 -197
  265. mlrun/utils/http.py +2 -5
  266. mlrun/utils/logger.py +76 -14
  267. mlrun/utils/notifications/notification/__init__.py +17 -12
  268. mlrun/utils/notifications/notification/base.py +14 -2
  269. mlrun/utils/notifications/notification/console.py +2 -0
  270. mlrun/utils/notifications/notification/git.py +3 -1
  271. mlrun/utils/notifications/notification/ipython.py +3 -1
  272. mlrun/utils/notifications/notification/slack.py +101 -21
  273. mlrun/utils/notifications/notification/webhook.py +11 -1
  274. mlrun/utils/notifications/notification_pusher.py +155 -30
  275. mlrun/utils/retryer.py +208 -0
  276. mlrun/utils/singleton.py +1 -1
  277. mlrun/utils/v3io_clients.py +2 -4
  278. mlrun/utils/version/version.json +2 -2
  279. mlrun/utils/version/version.py +2 -6
  280. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
  281. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  282. mlrun/kfpops.py +0 -868
  283. mlrun/model_monitoring/batch.py +0 -1095
  284. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  285. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  286. mlrun/platforms/other.py +0 -306
  287. mlrun-1.6.4rc2.dist-info/RECORD +0 -314
  288. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  289. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
  290. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  291. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
- import typing
16
15
 
17
16
  import pydantic
18
17
 
@@ -29,4 +28,4 @@ class TagObjects(pydantic.BaseModel):
29
28
 
30
29
  kind: str
31
30
  # TODO: Add more types to the list for new supported tagged objects
32
- identifiers: typing.List[ArtifactIdentifier]
31
+ identifiers: list[ArtifactIdentifier]
@@ -36,12 +36,12 @@ class WorkflowSpec(pydantic.BaseModel):
36
36
 
37
37
  class WorkflowRequest(pydantic.BaseModel):
38
38
  spec: typing.Optional[WorkflowSpec] = None
39
- arguments: typing.Optional[typing.Dict] = None
39
+ arguments: typing.Optional[dict] = None
40
40
  artifact_path: typing.Optional[str] = None
41
41
  source: typing.Optional[str] = None
42
42
  run_name: typing.Optional[str] = None
43
43
  namespace: typing.Optional[str] = None
44
- notifications: typing.Optional[typing.List[Notification]] = None
44
+ notifications: typing.Optional[list[Notification]] = None
45
45
 
46
46
 
47
47
  class WorkflowResponse(pydantic.BaseModel):
mlrun/common/types.py CHANGED
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import enum
17
16
 
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
23
22
 
24
23
  def __repr__(self):
25
24
  return self.value
25
+
26
+
27
+ # Partial backport from Python 3.11
28
+ # https://docs.python.org/3/library/http.html#http.HTTPMethod
29
+ class HTTPMethod(StrEnum):
30
+ GET = "GET"
31
+ POST = "POST"
mlrun/config.py CHANGED
@@ -17,7 +17,7 @@ Configuration system.
17
17
  Configuration can be in either a configuration file specified by
18
18
  MLRUN_CONFIG_FILE environment variable or by environment variables.
19
19
 
20
- Environment variables are in the format "MLRUN_httpdb__port=8080". This will be
20
+ Environment variables are in the format "MLRUN_HTTPDB__PORT=8080". This will be
21
21
  mapped to config.httpdb.port. Values should be in JSON format.
22
22
  """
23
23
 
@@ -37,6 +37,7 @@ import dotenv
37
37
  import semver
38
38
  import yaml
39
39
 
40
+ import mlrun.common.constants
40
41
  import mlrun.common.schemas
41
42
  import mlrun.errors
42
43
 
@@ -87,7 +88,7 @@ default_config = {
87
88
  "mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
88
89
  "ipython_widget": True,
89
90
  "log_level": "INFO",
90
- # log formatter (options: human | json)
91
+ # log formatter (options: human | human_extended | json)
91
92
  "log_formatter": "human",
92
93
  "submit_timeout": "180", # timeout when submitting a new k8s resource
93
94
  # runtimes cleanup interval in seconds
@@ -149,7 +150,6 @@ default_config = {
149
150
  "url": "",
150
151
  },
151
152
  "v3io_framesd": "http://framesd:8080",
152
- "datastore": {"async_source_mode": "disabled"},
153
153
  # default node selector to be applied to all functions - json string base64 encoded format
154
154
  "default_function_node_selector": "e30=",
155
155
  # default priority class to be applied to functions running on k8s cluster
@@ -189,6 +189,7 @@ default_config = {
189
189
  "background_tasks": {
190
190
  # enabled / disabled
191
191
  "timeout_mode": "enabled",
192
+ "function_deletion_batch_size": 10,
192
193
  # timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
193
194
  "default_timeouts": {
194
195
  "operations": {
@@ -197,6 +198,7 @@ default_config = {
197
198
  "run_abortion": "600",
198
199
  "abort_grace_period": "10",
199
200
  "delete_project": "900",
201
+ "delete_function": "900",
200
202
  },
201
203
  "runtimes": {"dask": "600"},
202
204
  },
@@ -231,6 +233,10 @@ default_config = {
231
233
  "databricks": {
232
234
  "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
233
235
  },
236
+ "application": {
237
+ "default_sidecar_internal_port": 8050,
238
+ "default_authentication_mode": "accessKey",
239
+ },
234
240
  },
235
241
  # TODO: function defaults should be moved to the function spec config above
236
242
  "function_defaults": {
@@ -241,6 +247,7 @@ default_config = {
241
247
  "remote": "mlrun/mlrun",
242
248
  "dask": "mlrun/ml-base",
243
249
  "mpijob": "mlrun/mlrun",
250
+ "application": "python:3.9-slim",
244
251
  },
245
252
  # see enrich_function_preemption_spec for more info,
246
253
  # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -325,7 +332,13 @@ default_config = {
325
332
  # optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
326
333
  #
327
334
  # if set to "nil" or "none", nothing would be set
328
- "modes": "STRICT_TRANS_TABLES",
335
+ "modes": (
336
+ "STRICT_TRANS_TABLES"
337
+ ",NO_ZERO_IN_DATE"
338
+ ",NO_ZERO_DATE"
339
+ ",ERROR_FOR_DIVISION_BY_ZERO"
340
+ ",NO_ENGINE_SUBSTITUTION",
341
+ )
329
342
  },
330
343
  },
331
344
  "jobs": {
@@ -353,10 +366,12 @@ default_config = {
353
366
  # is set to ClusterIP
354
367
  # ---------------------------------------------------------------------
355
368
  # Note: adding a mode requires special handling on
356
- # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
357
- # - mlrun.runtimes.function.enrich_function_with_ingress
369
+ # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
370
+ # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
358
371
  "add_templated_ingress_host_mode": "never",
359
372
  "explicit_ack": "enabled",
373
+ # size of serving spec to move to config maps
374
+ "serving_spec_env_cutoff": 0,
360
375
  },
361
376
  "logs": {
362
377
  "decode": {
@@ -474,6 +489,14 @@ default_config = {
474
489
  # if set to true, will log a warning for trying to use run db functionality while in nop db mode
475
490
  "verbose": True,
476
491
  },
492
+ "pagination": {
493
+ "default_page_size": 20,
494
+ "pagination_cache": {
495
+ "interval": 60,
496
+ "ttl": 3600,
497
+ "max_size": 10000,
498
+ },
499
+ },
477
500
  },
478
501
  "model_endpoint_monitoring": {
479
502
  "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
@@ -485,6 +508,7 @@ default_config = {
485
508
  "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
486
509
  "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
487
510
  "stream": "",
511
+ "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
488
512
  },
489
513
  # Offline storage path can be either relative or a full path. This path is used for general offline data
490
514
  # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -493,12 +517,14 @@ default_config = {
493
517
  # when the user is working in CE environment and has not provided any stream path.
494
518
  "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
495
519
  "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
496
- "batch_processing_function_branch": "master",
497
520
  "parquet_batching_max_events": 10_000,
498
521
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
499
- # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
522
+ # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
500
523
  "store_type": "v3io-nosql",
501
524
  "endpoint_store_connection": "",
525
+ # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
526
+ "tsdb_connector_type": "v3io-tsdb",
527
+ "tsdb_connection": "",
502
528
  },
503
529
  "secret_stores": {
504
530
  # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -534,9 +560,10 @@ default_config = {
534
560
  "feature_store": {
535
561
  "data_prefixes": {
536
562
  "default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
537
- "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
563
+ "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
538
564
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
539
- "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
565
+ "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
566
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
540
567
  },
541
568
  "default_targets": "parquet,nosql",
542
569
  "default_job_image": "mlrun/mlrun",
@@ -672,6 +699,10 @@ default_config = {
672
699
  "access_key": "",
673
700
  },
674
701
  "grafana_url": "",
702
+ "alerts": {
703
+ # supported modes: "enabled", "disabled".
704
+ "mode": "enabled"
705
+ },
675
706
  "auth_with_client_id": {
676
707
  "enabled": False,
677
708
  "request_timeout": 5,
@@ -937,6 +968,10 @@ class Config:
937
968
  self.httpdb.clusterization.chief.url = chief_api_url
938
969
  return self.httpdb.clusterization.chief.url
939
970
 
971
+ @staticmethod
972
+ def internal_labels():
973
+ return mlrun.common.constants.MLRunInternalLabels.all()
974
+
940
975
  @staticmethod
941
976
  def get_storage_auto_mount_params():
942
977
  auto_mount_params = {}
@@ -1065,7 +1100,8 @@ class Config:
1065
1100
  kind: str = "",
1066
1101
  target: str = "online",
1067
1102
  artifact_path: str = None,
1068
- application_name: str = None,
1103
+ function_name: str = None,
1104
+ **kwargs,
1069
1105
  ) -> typing.Union[str, list[str]]:
1070
1106
  """Get the full path from the configuration based on the provided project and kind.
1071
1107
 
@@ -1080,7 +1116,7 @@ class Config:
1080
1116
  artifact path instead.
1081
1117
  :param artifact_path: Optional artifact path that will be used as a relative path. If not provided, the
1082
1118
  relative artifact path will be taken from the global MLRun artifact path.
1083
- :param application_name: Application name, None for model_monitoring_stream.
1119
+ :param function_name: Application name, None for model_monitoring_stream.
1084
1120
 
1085
1121
  :return: Full configured path for the provided kind. Can be either a single path
1086
1122
  or a list of paths in the case of the online model monitoring stream path.
@@ -1092,17 +1128,18 @@ class Config:
1092
1128
  )
1093
1129
  if store_prefix_dict.get(kind):
1094
1130
  # Target exist in store prefix and has a valid string value
1095
- return store_prefix_dict[kind].format(project=project)
1131
+ return store_prefix_dict[kind].format(project=project, **kwargs)
1096
1132
 
1097
1133
  if (
1098
- application_name
1134
+ function_name
1135
+ and function_name
1099
1136
  != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
1100
1137
  ):
1101
1138
  return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
1102
1139
  project=project,
1103
1140
  kind=kind
1104
- if application_name is None
1105
- else f"{kind}-{application_name.lower()}",
1141
+ if function_name is None
1142
+ else f"{kind}-{function_name.lower()}",
1106
1143
  )
1107
1144
  elif kind == "stream": # return list for mlrun<1.6.3 BC
1108
1145
  return [
@@ -1148,7 +1185,7 @@ class Config:
1148
1185
  ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
1149
1186
  )
1150
1187
 
1151
- def get_s3_storage_options(self) -> typing.Dict[str, typing.Any]:
1188
+ def get_s3_storage_options(self) -> dict[str, typing.Any]:
1152
1189
  """
1153
1190
  Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
1154
1191
  graph uses this method for generating the storage options for S3 parquet target path.
@@ -65,10 +65,10 @@ def toPandas(spark_df):
65
65
  msg = (
66
66
  "toPandas attempted Arrow optimization because "
67
67
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
68
- "failed by the reason below:\n %s\n"
68
+ f"failed by the reason below:\n {e}\n"
69
69
  "Attempting non-optimization as "
70
70
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
71
- "true." % str(e)
71
+ "true."
72
72
  )
73
73
  warnings.warn(msg)
74
74
  use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
78
78
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
79
79
  "reached the error below and will not continue because automatic fallback "
80
80
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
81
- "false.\n %s" % str(e)
81
+ f"false.\n {e}"
82
82
  )
83
83
  warnings.warn(msg)
84
84
  raise
@@ -94,9 +94,7 @@ def toPandas(spark_df):
94
94
  )
95
95
 
96
96
  # Rename columns to avoid duplicated column names.
97
- tmp_column_names = [
98
- "col_{}".format(i) for i in range(len(spark_df.columns))
99
- ]
97
+ tmp_column_names = [f"col_{i}" for i in range(len(spark_df.columns))]
100
98
  self_destruct = spark_df.sql_ctx._conf.arrowPySparkSelfDestructEnabled()
101
99
  batches = spark_df.toDF(*tmp_column_names)._collect_as_arrow(
102
100
  split_batches=self_destruct
@@ -146,7 +144,7 @@ def toPandas(spark_df):
146
144
  "reached the error below and can not continue. Note that "
147
145
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
148
146
  "effect on failures in the middle of "
149
- "computation.\n %s" % str(e)
147
+ f"computation.\n {e}"
150
148
  )
151
149
  warnings.warn(msg)
152
150
  raise
@@ -156,10 +154,10 @@ def toPandas(spark_df):
156
154
  column_counter = Counter(spark_df.columns)
157
155
 
158
156
  dtype = [None] * len(spark_df.schema)
159
- for fieldIdx, field in enumerate(spark_df.schema):
157
+ for field_idx, field in enumerate(spark_df.schema):
160
158
  # For duplicate column name, we use `iloc` to access it.
161
159
  if column_counter[field.name] > 1:
162
- pandas_col = pdf.iloc[:, fieldIdx]
160
+ pandas_col = pdf.iloc[:, field_idx]
163
161
  else:
164
162
  pandas_col = pdf[field.name]
165
163
 
@@ -173,12 +171,12 @@ def toPandas(spark_df):
173
171
  and field.nullable
174
172
  and pandas_col.isnull().any()
175
173
  ):
176
- dtype[fieldIdx] = pandas_type
174
+ dtype[field_idx] = pandas_type
177
175
  # Ensure we fall back to nullable numpy types, even when whole column is null:
178
176
  if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
179
- dtype[fieldIdx] = np.float64
177
+ dtype[field_idx] = np.float64
180
178
  if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
181
- dtype[fieldIdx] = object
179
+ dtype[field_idx] = object
182
180
 
183
181
  df = pd.DataFrame()
184
182
  for index, t in enumerate(dtype):
@@ -64,7 +64,7 @@ from .store_resources import (
64
64
  parse_store_uri,
65
65
  )
66
66
  from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
67
- from .utils import parse_kafka_url
67
+ from .utils import get_kafka_brokers_from_dict, parse_kafka_url
68
68
 
69
69
  store_manager = StoreManager()
70
70
 
@@ -107,13 +107,10 @@ def get_stream_pusher(stream_path: str, **kwargs):
107
107
  :param stream_path: path/url of stream
108
108
  """
109
109
 
110
- if stream_path.startswith("kafka://") or "kafka_bootstrap_servers" in kwargs:
111
- topic, bootstrap_servers = parse_kafka_url(
112
- stream_path, kwargs.get("kafka_bootstrap_servers")
113
- )
114
- return KafkaOutputStream(
115
- topic, bootstrap_servers, kwargs.get("kafka_producer_options")
116
- )
110
+ kafka_brokers = get_kafka_brokers_from_dict(kwargs)
111
+ if stream_path.startswith("kafka://") or kafka_brokers:
112
+ topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
113
+ return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
117
114
  elif stream_path.startswith("http://") or stream_path.startswith("https://"):
118
115
  return HTTPOutputStream(stream_path=stream_path)
119
116
  elif "://" not in stream_path:
@@ -0,0 +1,130 @@
1
+ # Copyright 2023 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import time
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from urllib.parse import urlparse
19
+
20
+ import oss2
21
+ from fsspec.registry import get_filesystem_class
22
+
23
+ import mlrun.errors
24
+
25
+ from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
26
+
27
+
28
+ class OSSStore(DataStore):
29
+ using_bucket = True
30
+
31
+ def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
+ super().__init__(parent, name, schema, endpoint, secrets)
33
+ # will be used in case user asks to assume a role and work through fsspec
34
+
35
+ access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
36
+ secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
37
+ endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
38
+ if access_key_id and secret_key and endpoint_url:
39
+ self.auth = oss2.Auth(access_key_id, secret_key)
40
+ self.endpoint_url = endpoint_url
41
+ else:
42
+ raise mlrun.errors.MLRunInvalidArgumentError(
43
+ "missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
44
+ )
45
+
46
+ @property
47
+ def filesystem(self):
48
+ """return fsspec file system object, if supported"""
49
+ if self._filesystem:
50
+ return self._filesystem
51
+ try:
52
+ import ossfs # noqa
53
+ except ImportError as exc:
54
+ raise ImportError("ALIBABA ossfs not installed") from exc
55
+ filesystem_class = get_filesystem_class(protocol=self.kind)
56
+ self._filesystem = makeDatastoreSchemaSanitizer(
57
+ filesystem_class,
58
+ using_bucket=self.using_bucket,
59
+ **self.get_storage_options(),
60
+ )
61
+ return self._filesystem
62
+
63
+ def get_storage_options(self):
64
+ res = dict(
65
+ endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
66
+ key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
67
+ secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
68
+ )
69
+ return self._sanitize_storage_options(res)
70
+
71
+ def get_bucket_and_key(self, key):
72
+ path = self._join(key)[1:]
73
+ return self.endpoint, path
74
+
75
+ def upload(self, key, src_path):
76
+ bucket, key = self.get_bucket_and_key(key)
77
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
78
+ oss.put_object(key, open(src_path, "rb"))
79
+
80
+ def get(self, key, size=None, offset=0):
81
+ bucket, key = self.get_bucket_and_key(key)
82
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
83
+ if size or offset:
84
+ return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
85
+ return oss.get_object(key).read()
86
+
87
+ def put(self, key, data, append=False):
88
+ bucket, key = self.get_bucket_and_key(key)
89
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
90
+ oss.put_object(key, data)
91
+
92
+ def stat(self, key):
93
+ bucket, key = self.get_bucket_and_key(key)
94
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
95
+ obj = oss.get_object_meta(key)
96
+ size = obj.content_length
97
+ modified = datetime.fromtimestamp(obj.last_modified)
98
+ return FileStats(size, time.mktime(modified.timetuple()))
99
+
100
+ def listdir(self, key):
101
+ remote_path = self._convert_key_to_remote_path(key)
102
+ if self.filesystem.isfile(remote_path):
103
+ return key
104
+ remote_path = f"{remote_path}/**"
105
+ files = self.filesystem.glob(remote_path)
106
+ key_length = len(key)
107
+ files = [
108
+ f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
109
+ ]
110
+ return files
111
+
112
+ def delete(self, key):
113
+ bucket, key = self.get_bucket_and_key(key)
114
+ oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
115
+ oss.delete_object(key)
116
+
117
+ def _convert_key_to_remote_path(self, key):
118
+ key = key.strip("/")
119
+ schema = urlparse(key).scheme
120
+ # if called without passing dataitem - like in fset.purge_targets,
121
+ # key will include schema.
122
+ if not schema:
123
+ key = Path(self.endpoint, key).as_posix()
124
+ return key
125
+
126
+ @staticmethod
127
+ def get_range(size, offset):
128
+ if size:
129
+ return [offset, size]
130
+ return [offset, None]
@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
158
158
  st[key] = parsed_value
159
159
 
160
160
  account_name = st.get("account_name")
161
- if not account_name:
162
- raise mlrun.errors.MLRunInvalidArgumentError(
163
- "Property 'account_name' is absent both in storage settings and connection string"
164
- )
165
161
  if primary_url:
166
162
  if primary_url.startswith("http://"):
167
163
  primary_url = primary_url[len("http://") :]
168
164
  if primary_url.startswith("https://"):
169
165
  primary_url = primary_url[len("https://") :]
170
166
  host = primary_url
171
- else:
167
+ elif account_name:
172
168
  host = f"{account_name}.{service}.core.windows.net"
169
+ else:
170
+ return res
171
+
173
172
  if "account_key" in st:
174
173
  res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
175
174
 
@@ -199,3 +198,16 @@ class AzureBlobStore(DataStore):
199
198
  )
200
199
  res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
201
200
  return res
201
+
202
+ @property
203
+ def spark_url(self):
204
+ spark_options = self.get_spark_options()
205
+ url = f"wasbs://{self.endpoint}"
206
+ prefix = "spark.hadoop.fs.azure.account.key."
207
+ if spark_options:
208
+ for key in spark_options:
209
+ if key.startswith(prefix):
210
+ account_key = key[len(prefix) :]
211
+ url += f"@{account_key}"
212
+ break
213
+ return url