mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (259) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +2 -3
  5. mlrun/artifacts/base.py +55 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +378 -0
  8. mlrun/artifacts/manager.py +26 -17
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +8 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +67 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +92 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +68 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/data_types.py +1 -0
  56. mlrun/data_types/infer.py +3 -1
  57. mlrun/data_types/spark.py +5 -3
  58. mlrun/data_types/to_pandas.py +11 -2
  59. mlrun/datastore/__init__.py +2 -2
  60. mlrun/datastore/alibaba_oss.py +4 -1
  61. mlrun/datastore/azure_blob.py +4 -1
  62. mlrun/datastore/base.py +12 -4
  63. mlrun/datastore/datastore.py +9 -3
  64. mlrun/datastore/datastore_profile.py +79 -20
  65. mlrun/datastore/dbfs_store.py +4 -1
  66. mlrun/datastore/filestore.py +4 -1
  67. mlrun/datastore/google_cloud_storage.py +4 -1
  68. mlrun/datastore/hdfs.py +4 -1
  69. mlrun/datastore/inmem.py +4 -1
  70. mlrun/datastore/redis.py +4 -1
  71. mlrun/datastore/s3.py +4 -1
  72. mlrun/datastore/sources.py +52 -51
  73. mlrun/datastore/store_resources.py +7 -4
  74. mlrun/datastore/targets.py +23 -22
  75. mlrun/datastore/utils.py +2 -2
  76. mlrun/datastore/v3io.py +4 -1
  77. mlrun/datastore/vectorstore.py +229 -0
  78. mlrun/datastore/wasbfs/fs.py +13 -12
  79. mlrun/db/base.py +213 -83
  80. mlrun/db/factory.py +0 -3
  81. mlrun/db/httpdb.py +1265 -387
  82. mlrun/db/nopdb.py +205 -74
  83. mlrun/errors.py +2 -2
  84. mlrun/execution.py +136 -50
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +41 -40
  87. mlrun/feature_store/common.py +9 -9
  88. mlrun/feature_store/feature_set.py +20 -18
  89. mlrun/feature_store/feature_vector.py +27 -24
  90. mlrun/feature_store/retrieval/base.py +14 -9
  91. mlrun/feature_store/retrieval/job.py +2 -1
  92. mlrun/feature_store/steps.py +2 -2
  93. mlrun/features.py +30 -13
  94. mlrun/frameworks/__init__.py +1 -2
  95. mlrun/frameworks/_common/__init__.py +1 -2
  96. mlrun/frameworks/_common/artifacts_library.py +2 -2
  97. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  98. mlrun/frameworks/_common/model_handler.py +29 -27
  99. mlrun/frameworks/_common/producer.py +3 -1
  100. mlrun/frameworks/_dl_common/__init__.py +1 -2
  101. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  102. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  103. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  104. mlrun/frameworks/_ml_common/__init__.py +1 -2
  105. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  107. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  108. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  109. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  110. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  111. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  112. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  113. mlrun/frameworks/huggingface/__init__.py +1 -2
  114. mlrun/frameworks/huggingface/model_server.py +9 -9
  115. mlrun/frameworks/lgbm/__init__.py +47 -44
  116. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  117. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  118. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  119. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  120. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  121. mlrun/frameworks/lgbm/model_handler.py +15 -11
  122. mlrun/frameworks/lgbm/model_server.py +11 -7
  123. mlrun/frameworks/lgbm/utils.py +2 -2
  124. mlrun/frameworks/onnx/__init__.py +1 -2
  125. mlrun/frameworks/onnx/dataset.py +3 -3
  126. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  127. mlrun/frameworks/onnx/model_handler.py +7 -5
  128. mlrun/frameworks/onnx/model_server.py +8 -6
  129. mlrun/frameworks/parallel_coordinates.py +11 -11
  130. mlrun/frameworks/pytorch/__init__.py +22 -23
  131. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  132. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  133. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  134. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  135. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  136. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  137. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  138. mlrun/frameworks/pytorch/model_handler.py +21 -17
  139. mlrun/frameworks/pytorch/model_server.py +13 -9
  140. mlrun/frameworks/sklearn/__init__.py +19 -18
  141. mlrun/frameworks/sklearn/estimator.py +2 -2
  142. mlrun/frameworks/sklearn/metric.py +3 -3
  143. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  144. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  145. mlrun/frameworks/sklearn/model_handler.py +4 -3
  146. mlrun/frameworks/tf_keras/__init__.py +11 -12
  147. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  148. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  149. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  150. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  151. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  152. mlrun/frameworks/tf_keras/model_server.py +12 -8
  153. mlrun/frameworks/xgboost/__init__.py +19 -18
  154. mlrun/frameworks/xgboost/model_handler.py +13 -9
  155. mlrun/launcher/base.py +3 -4
  156. mlrun/launcher/local.py +1 -1
  157. mlrun/launcher/remote.py +1 -1
  158. mlrun/lists.py +4 -3
  159. mlrun/model.py +117 -46
  160. mlrun/model_monitoring/__init__.py +4 -4
  161. mlrun/model_monitoring/api.py +72 -59
  162. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  163. mlrun/model_monitoring/applications/base.py +165 -6
  164. mlrun/model_monitoring/applications/context.py +88 -37
  165. mlrun/model_monitoring/applications/evidently_base.py +0 -1
  166. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  167. mlrun/model_monitoring/applications/results.py +55 -3
  168. mlrun/model_monitoring/controller.py +207 -239
  169. mlrun/model_monitoring/db/__init__.py +0 -2
  170. mlrun/model_monitoring/db/_schedules.py +156 -0
  171. mlrun/model_monitoring/db/_stats.py +189 -0
  172. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  173. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  174. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  175. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
  176. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  177. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  178. mlrun/model_monitoring/helpers.py +151 -49
  179. mlrun/model_monitoring/stream_processing.py +99 -283
  180. mlrun/model_monitoring/tracking_policy.py +10 -3
  181. mlrun/model_monitoring/writer.py +48 -36
  182. mlrun/package/__init__.py +3 -6
  183. mlrun/package/context_handler.py +1 -1
  184. mlrun/package/packager.py +12 -9
  185. mlrun/package/packagers/__init__.py +0 -2
  186. mlrun/package/packagers/default_packager.py +14 -11
  187. mlrun/package/packagers/numpy_packagers.py +16 -7
  188. mlrun/package/packagers/pandas_packagers.py +18 -18
  189. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  190. mlrun/package/packagers_manager.py +31 -14
  191. mlrun/package/utils/__init__.py +0 -3
  192. mlrun/package/utils/_pickler.py +6 -6
  193. mlrun/platforms/__init__.py +47 -16
  194. mlrun/platforms/iguazio.py +4 -1
  195. mlrun/projects/operations.py +27 -27
  196. mlrun/projects/pipelines.py +71 -36
  197. mlrun/projects/project.py +890 -220
  198. mlrun/run.py +53 -10
  199. mlrun/runtimes/__init__.py +1 -3
  200. mlrun/runtimes/base.py +15 -11
  201. mlrun/runtimes/daskjob.py +9 -9
  202. mlrun/runtimes/generators.py +2 -1
  203. mlrun/runtimes/kubejob.py +4 -5
  204. mlrun/runtimes/mounts.py +572 -0
  205. mlrun/runtimes/mpijob/__init__.py +0 -2
  206. mlrun/runtimes/mpijob/abstract.py +7 -6
  207. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  208. mlrun/runtimes/nuclio/application/application.py +11 -11
  209. mlrun/runtimes/nuclio/function.py +19 -17
  210. mlrun/runtimes/nuclio/serving.py +18 -13
  211. mlrun/runtimes/pod.py +154 -45
  212. mlrun/runtimes/remotesparkjob.py +3 -2
  213. mlrun/runtimes/sparkjob/__init__.py +0 -2
  214. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  215. mlrun/runtimes/utils.py +6 -5
  216. mlrun/serving/merger.py +6 -4
  217. mlrun/serving/remote.py +18 -17
  218. mlrun/serving/routers.py +185 -172
  219. mlrun/serving/server.py +7 -1
  220. mlrun/serving/states.py +97 -78
  221. mlrun/serving/utils.py +13 -2
  222. mlrun/serving/v1_serving.py +3 -2
  223. mlrun/serving/v2_serving.py +105 -72
  224. mlrun/track/__init__.py +1 -1
  225. mlrun/track/tracker.py +2 -2
  226. mlrun/track/trackers/mlflow_tracker.py +6 -5
  227. mlrun/utils/async_http.py +1 -1
  228. mlrun/utils/clones.py +1 -1
  229. mlrun/utils/helpers.py +63 -19
  230. mlrun/utils/logger.py +106 -4
  231. mlrun/utils/notifications/notification/__init__.py +22 -19
  232. mlrun/utils/notifications/notification/base.py +33 -14
  233. mlrun/utils/notifications/notification/console.py +6 -6
  234. mlrun/utils/notifications/notification/git.py +11 -11
  235. mlrun/utils/notifications/notification/ipython.py +10 -9
  236. mlrun/utils/notifications/notification/mail.py +176 -0
  237. mlrun/utils/notifications/notification/slack.py +6 -6
  238. mlrun/utils/notifications/notification/webhook.py +6 -6
  239. mlrun/utils/notifications/notification_pusher.py +86 -44
  240. mlrun/utils/regex.py +11 -2
  241. mlrun/utils/version/version.json +2 -2
  242. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +29 -24
  243. mlrun-1.8.0rc11.dist-info/RECORD +347 -0
  244. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  245. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  246. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  247. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  248. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  249. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  250. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  251. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  252. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  253. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  254. mlrun/model_monitoring/model_endpoint.py +0 -118
  255. mlrun-1.7.1rc10.dist-info/RECORD +0 -351
  256. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
  257. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
  258. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
  259. {mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0
@@ -14,13 +14,13 @@
14
14
 
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
  from deprecated import deprecated
19
19
 
20
20
  import mlrun.common.types
21
21
 
22
22
 
23
- class RunIdentifier(pydantic.BaseModel):
23
+ class RunIdentifier(pydantic.v1.BaseModel):
24
24
  kind: typing.Literal["run"] = "run"
25
25
  uid: typing.Optional[str]
26
26
  iter: typing.Optional[int]
@@ -14,7 +14,7 @@
14
14
  #
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
 
19
19
  import mlrun.common.types
20
20
 
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
24
24
  project = "project"
25
25
 
26
26
 
27
- class RuntimeResource(pydantic.BaseModel):
27
+ class RuntimeResource(pydantic.v1.BaseModel):
28
28
  name: str
29
29
  labels: dict[str, str] = {}
30
30
  status: typing.Optional[dict]
31
31
 
32
32
 
33
- class RuntimeResources(pydantic.BaseModel):
33
+ class RuntimeResources(pydantic.v1.BaseModel):
34
34
  crd_resources: list[RuntimeResource] = []
35
35
  pod_resources: list[RuntimeResource] = []
36
36
  # only for dask runtime
37
37
  service_resources: typing.Optional[list[RuntimeResource]] = None
38
38
 
39
39
  class Config:
40
- extra = pydantic.Extra.allow
40
+ extra = pydantic.v1.Extra.allow
41
41
 
42
42
 
43
- class KindRuntimeResources(pydantic.BaseModel):
43
+ class KindRuntimeResources(pydantic.v1.BaseModel):
44
44
  kind: str
45
45
  resources: RuntimeResources
46
46
 
@@ -15,7 +15,7 @@
15
15
  from datetime import datetime
16
16
  from typing import Any, Literal, Optional, Union
17
17
 
18
- from pydantic import BaseModel
18
+ from pydantic.v1 import BaseModel
19
19
 
20
20
  import mlrun.common.types
21
21
  from mlrun.common.schemas.auth import Credentials
@@ -14,7 +14,7 @@
14
14
  #
15
15
  from typing import Optional
16
16
 
17
- from pydantic import BaseModel, Field
17
+ from pydantic.v1 import BaseModel, Field
18
18
 
19
19
  import mlrun.common.types
20
20
 
@@ -13,17 +13,17 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- import pydantic
16
+ import pydantic.v1
17
17
 
18
18
  from .artifact import ArtifactIdentifier
19
19
 
20
20
 
21
- class Tag(pydantic.BaseModel):
21
+ class Tag(pydantic.v1.BaseModel):
22
22
  name: str
23
23
  project: str
24
24
 
25
25
 
26
- class TagObjects(pydantic.BaseModel):
26
+ class TagObjects(pydantic.v1.BaseModel):
27
27
  """Tag object"""
28
28
 
29
29
  kind: str
@@ -14,14 +14,14 @@
14
14
  #
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
 
19
19
  from mlrun.common.schemas.notification import Notification
20
20
  from mlrun.common.schemas.schedule import ScheduleCronTrigger
21
21
  from mlrun.common.types import StrEnum
22
22
 
23
23
 
24
- class WorkflowSpec(pydantic.BaseModel):
24
+ class WorkflowSpec(pydantic.v1.BaseModel):
25
25
  name: str
26
26
  engine: typing.Optional[str] = None
27
27
  code: typing.Optional[str] = None
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
36
36
  workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
37
37
 
38
38
 
39
- class WorkflowRequest(pydantic.BaseModel):
39
+ class WorkflowRequest(pydantic.v1.BaseModel):
40
40
  spec: typing.Optional[WorkflowSpec] = None
41
41
  arguments: typing.Optional[dict] = None
42
42
  artifact_path: typing.Optional[str] = None
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
46
46
  notifications: typing.Optional[list[Notification]] = None
47
47
 
48
48
 
49
- class WorkflowResponse(pydantic.BaseModel):
49
+ class WorkflowResponse(pydantic.v1.BaseModel):
50
50
  project: str = None
51
51
  name: str = None
52
52
  status: str = None
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
54
54
  schedule: typing.Union[str, ScheduleCronTrigger] = None
55
55
 
56
56
 
57
- class GetWorkflowResponse(pydantic.BaseModel):
57
+ class GetWorkflowResponse(pydantic.v1.BaseModel):
58
58
  workflow_id: str = None
59
59
 
60
60
 
mlrun/config.py CHANGED
@@ -102,6 +102,9 @@ default_config = {
102
102
  "log_level": "INFO",
103
103
  # log formatter (options: human | human_extended | json)
104
104
  "log_formatter": "human",
105
+ # custom logger format, workes only with log_formatter: custom
106
+ # Note that your custom format must include those 4 fields - timestamp, level, message and more
107
+ "log_format_override": None,
105
108
  "submit_timeout": "180", # timeout when submitting a new k8s resource
106
109
  # runtimes cleanup interval in seconds
107
110
  "runtimes_cleanup_interval": "300",
@@ -132,6 +135,12 @@ default_config = {
132
135
  "delete_crd_resources_timeout": "5 minutes",
133
136
  },
134
137
  },
138
+ "object_retentions": {
139
+ "alert_activations": 14 * 7, # days
140
+ },
141
+ # A safety margin to account for delays
142
+ # This ensures that extra partitions are available beyond the specified retention period
143
+ "partitions_buffer_multiplier": 3,
135
144
  # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
136
145
  # before deleting them (4 hours)
137
146
  "runtime_resources_deletion_grace_period": "14400",
@@ -151,6 +160,7 @@ default_config = {
151
160
  # migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
152
161
  # migration progress.
153
162
  "artifact_migration_batch_size": 200,
163
+ "artifact_migration_v9_batch_size": 30000,
154
164
  "artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
155
165
  "datasets": {
156
166
  "max_preview_columns": 100,
@@ -223,6 +233,7 @@ default_config = {
223
233
  "delete_function": "900",
224
234
  },
225
235
  "runtimes": {"dask": "600"},
236
+ "push_notifications": "60",
226
237
  },
227
238
  },
228
239
  "function": {
@@ -306,7 +317,7 @@ default_config = {
306
317
  },
307
318
  "request_timeout": 45, # seconds
308
319
  },
309
- # see server.api.utils.helpers.ensure_running_on_chief
320
+ # see server.py.services.api.utils.helpers.ensure_running_on_chief
310
321
  "ensure_function_running_on_chief_mode": "enabled",
311
322
  },
312
323
  "port": 8080,
@@ -524,7 +535,7 @@ default_config = {
524
535
  "verbose": True,
525
536
  },
526
537
  "pagination": {
527
- "default_page_size": 20,
538
+ "default_page_size": 200,
528
539
  "pagination_cache": {
529
540
  "interval": 60,
530
541
  "ttl": 3600,
@@ -598,8 +609,6 @@ default_config = {
598
609
  "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
599
610
  "parquet_batching_max_events": 10_000,
600
611
  "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
601
- # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
602
- "endpoint_store_connection": "",
603
612
  # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
604
613
  "tsdb_connection": "",
605
614
  # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
@@ -786,17 +795,36 @@ default_config = {
786
795
  "grafana_url": "",
787
796
  "alerts": {
788
797
  # supported modes: "enabled", "disabled".
789
- "mode": "disabled",
798
+ "mode": "enabled",
790
799
  # maximum number of alerts we allow to be configured.
791
800
  # user will get an error when exceeding this
792
801
  "max_allowed": 10000,
793
802
  # maximum allowed value for count in criteria field inside AlertConfig
794
803
  "max_criteria_count": 100,
804
+ # interval for periodic events generation job
805
+ "events_generation_interval": "30",
795
806
  },
796
807
  "auth_with_client_id": {
797
808
  "enabled": False,
798
809
  "request_timeout": 5,
799
810
  },
811
+ "services": {
812
+ # The running service name. One of: "api", "alerts"
813
+ "service_name": "api",
814
+ "hydra": {
815
+ # Comma separated list of services to run on the instance.
816
+ # Currently, this is only considered when the service_name is "api".
817
+ # "*" starts all services on the same instance,
818
+ # other options are considered as running only the api service.
819
+ "services": "*",
820
+ },
821
+ },
822
+ "notifications": {
823
+ "smtp": {
824
+ "config_secret_name": "mlrun-smtp-config",
825
+ "refresh_interval": "30",
826
+ }
827
+ },
800
828
  }
801
829
  _is_running_as_api = None
802
830
 
@@ -843,6 +871,22 @@ class Config:
843
871
  name = self.__class__.__name__
844
872
  return f"{name}({self._cfg!r})"
845
873
 
874
+ def __iter__(self):
875
+ if isinstance(self._cfg, Mapping):
876
+ return self._cfg.__iter__()
877
+
878
+ def items(self):
879
+ if isinstance(self._cfg, Mapping):
880
+ return iter(self._cfg.items())
881
+
882
+ def keys(self):
883
+ if isinstance(self._cfg, Mapping):
884
+ return iter(self.data.keys())
885
+
886
+ def values(self):
887
+ if isinstance(self._cfg, Mapping):
888
+ return iter(self.data.values())
889
+
846
890
  def update(self, cfg, skip_errors=False):
847
891
  for key, value in cfg.items():
848
892
  if hasattr(self, key):
@@ -1035,6 +1079,17 @@ class Config:
1035
1079
  f"is not allowed for iguazio version: {igz_version} < 3.5.1"
1036
1080
  )
1037
1081
 
1082
+ def validate_object_retentions(self):
1083
+ for table_name, retention_days in self.object_retentions.items():
1084
+ if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
1085
+ raise mlrun.errors.MLRunInvalidArgumentError(
1086
+ f"{table_name} partition interval must be greater than a week"
1087
+ )
1088
+ elif retention_days > 53 * 7:
1089
+ raise mlrun.errors.MLRunInvalidArgumentError(
1090
+ f"{table_name} partition interval must be less than a year"
1091
+ )
1092
+
1038
1093
  def resolve_chief_api_url(self) -> str:
1039
1094
  if self.httpdb.clusterization.chief.url:
1040
1095
  return self.httpdb.clusterization.chief.url
@@ -1193,9 +1248,9 @@ class Config:
1193
1248
 
1194
1249
  def get_model_monitoring_file_target_path(
1195
1250
  self,
1196
- project: str = "",
1197
- kind: str = "",
1198
- target: str = "online",
1251
+ project: str,
1252
+ kind: str,
1253
+ target: typing.Literal["online", "offline"] = "online",
1199
1254
  artifact_path: typing.Optional[str] = None,
1200
1255
  function_name: typing.Optional[str] = None,
1201
1256
  **kwargs,
@@ -1373,9 +1428,12 @@ def _validate_config(config):
1373
1428
  pass
1374
1429
 
1375
1430
  config.verify_security_context_enrichment_mode_is_allowed()
1431
+ config.validate_object_retentions()
1376
1432
 
1377
1433
 
1378
- def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
1434
+ def _verify_gpu_requests_and_limits(
1435
+ requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
1436
+ ):
1379
1437
  # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
1380
1438
  if requests_gpu and not limits_gpu:
1381
1439
  raise mlrun.errors.MLRunConflictError(
@@ -1388,7 +1446,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
1388
1446
  )
1389
1447
 
1390
1448
 
1391
- def _convert_resources_to_str(config: dict = None):
1449
+ def _convert_resources_to_str(config: typing.Optional[dict] = None):
1392
1450
  resources_types = ["cpu", "memory", "gpu"]
1393
1451
  resource_requirements = ["requests", "limits"]
1394
1452
  if not config.get("default_function_pod_resources"):
@@ -11,8 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
14
 
17
15
  from .data_types import (
18
16
  InferOptions,
@@ -124,6 +124,7 @@ def spark_to_value_type(data_type):
124
124
  "double": ValueType.DOUBLE,
125
125
  "boolean": ValueType.BOOL,
126
126
  "timestamp": ValueType.DATETIME,
127
+ "timestamp_ntz": ValueType.DATETIME,
127
128
  "string": ValueType.STRING,
128
129
  "array": "list",
129
130
  "map": "dict",
mlrun/data_types/infer.py CHANGED
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ from typing import Optional
16
+
15
17
  import numpy as np
16
18
  import packaging.version
17
19
  import pandas as pd
@@ -29,7 +31,7 @@ def infer_schema_from_df(
29
31
  df: pd.DataFrame,
30
32
  features,
31
33
  entities,
32
- timestamp_key: str = None,
34
+ timestamp_key: Optional[str] = None,
33
35
  entity_columns=None,
34
36
  options: InferOptions = InferOptions.Null,
35
37
  ):
mlrun/data_types/spark.py CHANGED
@@ -14,11 +14,12 @@
14
14
  #
15
15
  from datetime import datetime
16
16
  from os import environ
17
+ from typing import Optional
17
18
 
18
19
  import numpy as np
19
20
  import pytz
20
21
  from pyspark.sql.functions import to_utc_timestamp
21
- from pyspark.sql.types import BooleanType, DoubleType, TimestampType
22
+ from pyspark.sql.types import BooleanType, DoubleType
22
23
 
23
24
  from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
24
25
  from mlrun.utils import logger
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
35
36
  df,
36
37
  features,
37
38
  entities,
38
- timestamp_key: str = None,
39
+ timestamp_key: Optional[str] = None,
39
40
  entity_columns=None,
40
41
  options: InferOptions = InferOptions.Null,
41
42
  ):
@@ -143,7 +144,8 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
143
144
  timestamp_columns = set()
144
145
  boolean_columns = set()
145
146
  for field in df_after_type_casts.schema.fields:
146
- is_timestamp = isinstance(field.dataType, TimestampType)
147
+ # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
148
+ is_timestamp = field.dataType.typeName().startswith("timestamp")
147
149
  is_boolean = isinstance(field.dataType, BooleanType)
148
150
  if is_timestamp:
149
151
  df_after_type_casts = df_after_type_casts.withColumn(
@@ -244,6 +244,15 @@ def _to_corrected_pandas_type(dt):
244
244
 
245
245
 
246
246
  def spark_df_to_pandas(spark_df):
247
+ import pyspark
248
+
249
+ if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
250
+
251
+ def to_pandas(spark_df_inner):
252
+ return spark_df_inner.toPandas()
253
+ else:
254
+ to_pandas = _to_pandas
255
+
247
256
  # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
248
257
  # when we upgrade pyspark, we should check whether this workaround is still necessary
249
258
  # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
@@ -262,9 +271,9 @@ def spark_df_to_pandas(spark_df):
262
271
  )
263
272
  type_conversion_dict[field.name] = "datetime64[ns]"
264
273
 
265
- df = _to_pandas(spark_df)
274
+ df = to_pandas(spark_df)
266
275
  if type_conversion_dict:
267
276
  df = df.astype(type_conversion_dict)
268
277
  return df
269
278
  else:
270
- return _to_pandas(spark_df)
279
+ return to_pandas(spark_df)
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
15
  __all__ = [
18
16
  "DataItem",
19
17
  "get_store_resource",
@@ -32,6 +30,8 @@ __all__ = [
32
30
  "DatabricksFileSystemDisableCache",
33
31
  "DatabricksFileBugFixed",
34
32
  "get_stream_pusher",
33
+ "ConfigProfile",
34
+ "VectorStoreCollection",
35
35
  ]
36
36
 
37
37
  import fsspec
@@ -15,6 +15,7 @@
15
15
  import time
16
16
  from datetime import datetime
17
17
  from pathlib import Path
18
+ from typing import Optional
18
19
  from urllib.parse import urlparse
19
20
 
20
21
  import oss2
@@ -28,7 +29,9 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
28
29
  class OSSStore(DataStore):
29
30
  using_bucket = True
30
31
 
31
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
+ def __init__(
33
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
34
+ ):
32
35
  super().__init__(parent, name, schema, endpoint, secrets)
33
36
  # will be used in case user asks to assume a role and work through fsspec
34
37
 
@@ -14,6 +14,7 @@
14
14
 
15
15
  import time
16
16
  from pathlib import Path
17
+ from typing import Optional
17
18
  from urllib.parse import urlparse
18
19
 
19
20
  from azure.storage.blob import BlobServiceClient
@@ -36,7 +37,9 @@ class AzureBlobStore(DataStore):
36
37
  1024 * 1024 * 8
37
38
  ) # for service_client property only, does not affect filesystem
38
39
 
39
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
40
+ def __init__(
41
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
42
+ ):
40
43
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
41
44
  self._service_client = None
42
45
  self._storage_options = None
mlrun/datastore/base.py CHANGED
@@ -48,7 +48,7 @@ class FileStats:
48
48
  class DataStore:
49
49
  using_bucket = False
50
50
 
51
- def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
51
+ def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
52
52
  self._parent = parent
53
53
  self.kind = kind
54
54
  self.name = name
@@ -500,12 +500,18 @@ class DataItem:
500
500
  """DataItem url e.g. /dir/path, s3://bucket/path"""
501
501
  return self._url
502
502
 
503
- def get(self, size=None, offset=0, encoding=None):
503
+ def get(
504
+ self,
505
+ size: Optional[int] = None,
506
+ offset: int = 0,
507
+ encoding: Optional[str] = None,
508
+ ) -> Union[bytes, str]:
504
509
  """read all or a byte range and return the content
505
510
 
506
511
  :param size: number of bytes to get
507
512
  :param offset: fetch from offset (in bytes)
508
513
  :param encoding: encoding (e.g. "utf-8") for converting bytes to str
514
+ :return: the bytes/str content
509
515
  """
510
516
  body = self._store.get(self._path, size=size, offset=offset)
511
517
  if encoding and isinstance(body, bytes):
@@ -519,7 +525,7 @@ class DataItem:
519
525
  """
520
526
  self._store.download(self._path, target_path)
521
527
 
522
- def put(self, data, append=False):
528
+ def put(self, data: Union[bytes, str], append: bool = False) -> None:
523
529
  """write/upload the data, append is only supported by some datastores
524
530
 
525
531
  :param data: data (bytes/str) to write
@@ -687,7 +693,9 @@ def basic_auth_header(user, password):
687
693
 
688
694
 
689
695
  class HttpStore(DataStore):
690
- def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
696
+ def __init__(
697
+ self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
698
+ ):
691
699
  super().__init__(parent, name, schema, endpoint, secrets)
692
700
  self._https_auth_token = None
693
701
  self._schema = schema
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ from typing import Optional
14
15
  from urllib.parse import urlparse
15
16
 
16
17
  from mergedeep import merge
@@ -178,12 +179,17 @@ class StoreManager:
178
179
  # which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
179
180
  if not target and not allow_empty_resources:
180
181
  raise mlrun.errors.MLRunInvalidArgumentError(
181
- f"resource {url} does not have a valid/persistent offline target"
182
+ f"Resource {url} does not have a valid/persistent offline target"
182
183
  )
183
184
  return resource, target or ""
184
185
 
185
186
  def object(
186
- self, url, key="", project="", allow_empty_resources=None, secrets: dict = None
187
+ self,
188
+ url,
189
+ key="",
190
+ project="",
191
+ allow_empty_resources=None,
192
+ secrets: Optional[dict] = None,
187
193
  ) -> DataItem:
188
194
  meta = artifact_url = None
189
195
  if is_store_uri(url):
@@ -205,7 +211,7 @@ class StoreManager:
205
211
  )
206
212
 
207
213
  def get_or_create_store(
208
- self, url, secrets: dict = None, project_name=""
214
+ self, url, secrets: Optional[dict] = None, project_name=""
209
215
  ) -> (DataStore, str, str):
210
216
  schema, endpoint, parsed_url = parse_url(url)
211
217
  subpath = parsed_url.path