mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (250) hide show
  1. mlrun/__init__.py +18 -18
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +19 -12
  4. mlrun/artifacts/__init__.py +0 -2
  5. mlrun/artifacts/base.py +34 -11
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/manager.py +13 -13
  8. mlrun/artifacts/model.py +66 -53
  9. mlrun/common/constants.py +6 -0
  10. mlrun/common/formatters/__init__.py +1 -0
  11. mlrun/common/formatters/feature_set.py +1 -0
  12. mlrun/common/formatters/function.py +1 -0
  13. mlrun/common/formatters/model_endpoint.py +30 -0
  14. mlrun/common/formatters/pipeline.py +1 -2
  15. mlrun/common/formatters/project.py +9 -0
  16. mlrun/common/model_monitoring/__init__.py +0 -3
  17. mlrun/common/model_monitoring/helpers.py +1 -1
  18. mlrun/common/runtimes/constants.py +1 -2
  19. mlrun/common/schemas/__init__.py +7 -2
  20. mlrun/common/schemas/alert.py +31 -18
  21. mlrun/common/schemas/api_gateway.py +3 -3
  22. mlrun/common/schemas/artifact.py +7 -13
  23. mlrun/common/schemas/auth.py +6 -4
  24. mlrun/common/schemas/background_task.py +7 -7
  25. mlrun/common/schemas/client_spec.py +2 -2
  26. mlrun/common/schemas/clusterization_spec.py +2 -2
  27. mlrun/common/schemas/common.py +53 -3
  28. mlrun/common/schemas/datastore_profile.py +1 -1
  29. mlrun/common/schemas/feature_store.py +9 -9
  30. mlrun/common/schemas/frontend_spec.py +4 -4
  31. mlrun/common/schemas/function.py +10 -10
  32. mlrun/common/schemas/hub.py +1 -1
  33. mlrun/common/schemas/k8s.py +3 -3
  34. mlrun/common/schemas/memory_reports.py +3 -3
  35. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  36. mlrun/common/schemas/model_monitoring/constants.py +62 -12
  37. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  38. mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -6
  40. mlrun/common/schemas/notification.py +18 -3
  41. mlrun/common/schemas/object.py +1 -1
  42. mlrun/common/schemas/pagination.py +4 -4
  43. mlrun/common/schemas/partition.py +137 -0
  44. mlrun/common/schemas/pipeline.py +2 -2
  45. mlrun/common/schemas/project.py +22 -17
  46. mlrun/common/schemas/runs.py +2 -2
  47. mlrun/common/schemas/runtime_resource.py +5 -5
  48. mlrun/common/schemas/schedule.py +1 -1
  49. mlrun/common/schemas/secret.py +1 -1
  50. mlrun/common/schemas/tag.py +3 -3
  51. mlrun/common/schemas/workflow.py +5 -5
  52. mlrun/config.py +65 -15
  53. mlrun/data_types/__init__.py +0 -2
  54. mlrun/data_types/data_types.py +0 -1
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +4 -4
  57. mlrun/data_types/to_pandas.py +2 -11
  58. mlrun/datastore/__init__.py +0 -2
  59. mlrun/datastore/alibaba_oss.py +4 -1
  60. mlrun/datastore/azure_blob.py +4 -1
  61. mlrun/datastore/base.py +12 -4
  62. mlrun/datastore/datastore.py +9 -3
  63. mlrun/datastore/datastore_profile.py +20 -20
  64. mlrun/datastore/dbfs_store.py +4 -1
  65. mlrun/datastore/filestore.py +4 -1
  66. mlrun/datastore/google_cloud_storage.py +4 -1
  67. mlrun/datastore/hdfs.py +4 -1
  68. mlrun/datastore/inmem.py +4 -1
  69. mlrun/datastore/redis.py +4 -1
  70. mlrun/datastore/s3.py +4 -1
  71. mlrun/datastore/sources.py +51 -49
  72. mlrun/datastore/store_resources.py +0 -2
  73. mlrun/datastore/targets.py +22 -23
  74. mlrun/datastore/utils.py +2 -2
  75. mlrun/datastore/v3io.py +4 -1
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +170 -64
  78. mlrun/db/factory.py +3 -0
  79. mlrun/db/httpdb.py +986 -238
  80. mlrun/db/nopdb.py +155 -57
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +55 -29
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +40 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +110 -46
  158. mlrun/model_monitoring/__init__.py +1 -2
  159. mlrun/model_monitoring/api.py +6 -6
  160. mlrun/model_monitoring/applications/_application_steps.py +13 -15
  161. mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
  162. mlrun/model_monitoring/applications/results.py +55 -3
  163. mlrun/model_monitoring/controller.py +185 -223
  164. mlrun/model_monitoring/db/_schedules.py +156 -0
  165. mlrun/model_monitoring/db/_stats.py +189 -0
  166. mlrun/model_monitoring/db/stores/__init__.py +1 -1
  167. mlrun/model_monitoring/db/stores/base/store.py +6 -65
  168. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
  169. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
  170. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
  171. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
  172. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
  173. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
  174. mlrun/model_monitoring/db/tsdb/base.py +76 -24
  175. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
  176. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  177. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +253 -28
  178. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  179. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
  180. mlrun/model_monitoring/helpers.py +91 -1
  181. mlrun/model_monitoring/model_endpoint.py +4 -2
  182. mlrun/model_monitoring/stream_processing.py +16 -13
  183. mlrun/model_monitoring/tracking_policy.py +10 -3
  184. mlrun/model_monitoring/writer.py +47 -26
  185. mlrun/package/__init__.py +3 -6
  186. mlrun/package/context_handler.py +1 -1
  187. mlrun/package/packager.py +12 -9
  188. mlrun/package/packagers/__init__.py +0 -2
  189. mlrun/package/packagers/default_packager.py +14 -11
  190. mlrun/package/packagers/numpy_packagers.py +16 -7
  191. mlrun/package/packagers/pandas_packagers.py +18 -18
  192. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  193. mlrun/package/packagers_manager.py +31 -14
  194. mlrun/package/utils/__init__.py +0 -3
  195. mlrun/package/utils/_pickler.py +6 -6
  196. mlrun/platforms/__init__.py +3 -16
  197. mlrun/platforms/iguazio.py +4 -1
  198. mlrun/projects/operations.py +27 -27
  199. mlrun/projects/pipelines.py +34 -35
  200. mlrun/projects/project.py +535 -182
  201. mlrun/run.py +13 -10
  202. mlrun/runtimes/__init__.py +1 -3
  203. mlrun/runtimes/base.py +15 -11
  204. mlrun/runtimes/daskjob.py +9 -9
  205. mlrun/runtimes/generators.py +2 -1
  206. mlrun/runtimes/kubejob.py +4 -5
  207. mlrun/runtimes/mounts.py +572 -0
  208. mlrun/runtimes/mpijob/__init__.py +0 -2
  209. mlrun/runtimes/mpijob/abstract.py +7 -6
  210. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  211. mlrun/runtimes/nuclio/application/application.py +11 -11
  212. mlrun/runtimes/nuclio/function.py +13 -13
  213. mlrun/runtimes/nuclio/serving.py +9 -9
  214. mlrun/runtimes/pod.py +154 -45
  215. mlrun/runtimes/remotesparkjob.py +3 -2
  216. mlrun/runtimes/sparkjob/__init__.py +0 -2
  217. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  218. mlrun/runtimes/utils.py +6 -5
  219. mlrun/serving/merger.py +6 -4
  220. mlrun/serving/remote.py +18 -17
  221. mlrun/serving/routers.py +27 -27
  222. mlrun/serving/server.py +1 -1
  223. mlrun/serving/states.py +76 -71
  224. mlrun/serving/utils.py +13 -2
  225. mlrun/serving/v1_serving.py +3 -2
  226. mlrun/serving/v2_serving.py +4 -4
  227. mlrun/track/__init__.py +1 -1
  228. mlrun/track/tracker.py +2 -2
  229. mlrun/track/trackers/mlflow_tracker.py +6 -5
  230. mlrun/utils/async_http.py +1 -1
  231. mlrun/utils/helpers.py +70 -16
  232. mlrun/utils/logger.py +106 -4
  233. mlrun/utils/notifications/notification/__init__.py +22 -19
  234. mlrun/utils/notifications/notification/base.py +33 -14
  235. mlrun/utils/notifications/notification/console.py +6 -6
  236. mlrun/utils/notifications/notification/git.py +11 -11
  237. mlrun/utils/notifications/notification/ipython.py +10 -9
  238. mlrun/utils/notifications/notification/mail.py +149 -0
  239. mlrun/utils/notifications/notification/slack.py +6 -6
  240. mlrun/utils/notifications/notification/webhook.py +18 -22
  241. mlrun/utils/notifications/notification_pusher.py +43 -31
  242. mlrun/utils/regex.py +3 -1
  243. mlrun/utils/version/version.json +2 -2
  244. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/METADATA +18 -14
  245. mlrun-1.8.0rc2.dist-info/RECORD +358 -0
  246. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/WHEEL +1 -1
  247. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  248. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/LICENSE +0 -0
  249. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/entry_points.txt +0 -0
  250. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,137 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ from datetime import datetime, timedelta
16
+
17
+ from mlrun.common.types import StrEnum
18
+
19
+
20
+ class PartitionInterval(StrEnum):
21
+ DAY = "DAY"
22
+ MONTH = "MONTH"
23
+ YEARWEEK = "YEARWEEK"
24
+
25
+ @classmethod
26
+ def is_valid(cls, value: str) -> bool:
27
+ return value in cls._value2member_map_
28
+
29
+ @classmethod
30
+ def valid_intervals(cls) -> list:
31
+ return list(cls._value2member_map_.keys())
32
+
33
+ def as_duration(self) -> timedelta:
34
+ """
35
+ Convert the partition interval to a duration-like timedelta.
36
+
37
+ Returns:
38
+ timedelta: A duration representing the partition interval.
39
+ """
40
+ if self == PartitionInterval.DAY:
41
+ return timedelta(days=1)
42
+ elif self == PartitionInterval.MONTH:
43
+ # Approximate a month as 30 days
44
+ return timedelta(days=30)
45
+ elif self == PartitionInterval.YEARWEEK:
46
+ return timedelta(weeks=1)
47
+
48
+ @classmethod
49
+ def from_function(cls, partition_function: str):
50
+ """
51
+ Returns the corresponding PartitionInterval for a given partition function,
52
+ or None if the function is not mapped.
53
+
54
+ :param partition_function: The partition function to map to an interval.
55
+ :return: PartitionInterval corresponding to the function, or None if no match is found.
56
+ """
57
+ partition_function_to_partitions_interval = {
58
+ "DAY": "DAY",
59
+ "DAYOFMONTH": "DAY",
60
+ "MONTH": "MONTH",
61
+ "YEARWEEK": "YEARWEEK",
62
+ }
63
+ interval = partition_function_to_partitions_interval.get(partition_function)
64
+ if interval and cls.is_valid(interval):
65
+ return cls[interval]
66
+ raise KeyError(f"Partition function: {partition_function} isn't supported")
67
+
68
+ def get_partition_info(
69
+ self,
70
+ start_datetime: datetime,
71
+ partition_number: int = 1,
72
+ ) -> list[tuple[str, str]]:
73
+ """
74
+ Generates partition details for a specified number of partitions starting from a given datetime.
75
+
76
+ :param start_datetime: The starting datetime used for generating partition details.
77
+ :param partition_number: The number of partitions to generate details for.
78
+
79
+ :return: A list of tuples:
80
+ - partition_name: The name for the partition.
81
+ - partition_value: The "LESS THAN" value for the next partition boundary.
82
+ """
83
+ partitioning_information_list = []
84
+ current_datetime = start_datetime
85
+
86
+ for _ in range(partition_number):
87
+ partition_name = self.get_partition_name(current_datetime)
88
+ partition_boundary_date = self.get_next_partition_time(current_datetime)
89
+ partition_value = self.get_partition_name(partition_boundary_date)
90
+ partitioning_information_list.append((partition_name, partition_value))
91
+
92
+ # Move to the next interval
93
+ current_datetime = partition_boundary_date
94
+
95
+ return partitioning_information_list
96
+
97
+ def get_next_partition_time(self, current_datetime: datetime) -> datetime:
98
+ """
99
+ Calculates the next partition boundary time based on the specified partition interval.
100
+ :param current_datetime: The current datetime from which the next interval is calculated.
101
+
102
+ :return: A datetime object representing the start of the next partition interval.
103
+ - If the interval is DAY, it advances by one day.
104
+ - If the interval is MONTH, it advances to the first day of the next month.
105
+ - If the interval is YEARWEEK, it advances by one week.
106
+ """
107
+ if self == PartitionInterval.DAY:
108
+ return current_datetime + timedelta(days=1)
109
+ elif self == PartitionInterval.MONTH:
110
+ return (current_datetime.replace(day=1) + timedelta(days=32)).replace(day=1)
111
+ elif self == PartitionInterval.YEARWEEK:
112
+ return current_datetime + timedelta(weeks=1)
113
+
114
+ def get_partition_name(self, current_datetime: datetime) -> str:
115
+ if self == PartitionInterval.DAY:
116
+ return current_datetime.strftime("%Y%m%d")
117
+ elif self == PartitionInterval.MONTH:
118
+ return current_datetime.strftime("%Y%m")
119
+ elif self == PartitionInterval.YEARWEEK:
120
+ year, week, _ = current_datetime.isocalendar()
121
+ return f"{year}{week:02d}"
122
+
123
+ def get_partition_expression(self):
124
+ if self == PartitionInterval.YEARWEEK:
125
+ return "YEARWEEK(activation_time, 1)"
126
+ else:
127
+ return f"{self}(activation_time)"
128
+
129
+ def get_number_of_partitions(self, days: int) -> int:
130
+ # Calculate the number partitions based on given number of days
131
+ if self == PartitionInterval.DAY:
132
+ return days
133
+ elif self == PartitionInterval.MONTH:
134
+ # Average number days in a month is 30.44
135
+ return int(days / 30.44)
136
+ elif self == PartitionInterval.YEARWEEK:
137
+ return int(days / 7)
@@ -14,7 +14,7 @@
14
14
  #
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
  from deprecated import deprecated
19
19
 
20
20
  import mlrun.common.types
@@ -39,7 +39,7 @@ class PipelinesPagination(str):
39
39
  max_page_size = 200
40
40
 
41
41
 
42
- class PipelinesOutput(pydantic.BaseModel):
42
+ class PipelinesOutput(pydantic.v1.BaseModel):
43
43
  # use the format query param to control what is returned
44
44
  runs: list[typing.Union[dict, str]]
45
45
  total_size: int
@@ -15,7 +15,7 @@
15
15
  import datetime
16
16
  import typing
17
17
 
18
- import pydantic
18
+ import pydantic.v1
19
19
  from deprecated import deprecated
20
20
 
21
21
  import mlrun.common.types
@@ -40,14 +40,14 @@ class ProjectsFormat(mlrun.common.types.StrEnum):
40
40
  leader = "leader"
41
41
 
42
42
 
43
- class ProjectMetadata(pydantic.BaseModel):
43
+ class ProjectMetadata(pydantic.v1.BaseModel):
44
44
  name: str
45
45
  created: typing.Optional[datetime.datetime] = None
46
46
  labels: typing.Optional[dict] = {}
47
47
  annotations: typing.Optional[dict] = {}
48
48
 
49
49
  class Config:
50
- extra = pydantic.Extra.allow
50
+ extra = pydantic.v1.Extra.allow
51
51
 
52
52
 
53
53
  class ProjectDesiredState(mlrun.common.types.StrEnum):
@@ -77,7 +77,7 @@ class ProjectStatus(ObjectStatus):
77
77
  state: typing.Optional[ProjectState]
78
78
 
79
79
 
80
- class ProjectSpec(pydantic.BaseModel):
80
+ class ProjectSpec(pydantic.v1.BaseModel):
81
81
  description: typing.Optional[str] = None
82
82
  owner: typing.Optional[str] = None
83
83
  goals: typing.Optional[str] = None
@@ -97,10 +97,10 @@ class ProjectSpec(pydantic.BaseModel):
97
97
  default_function_node_selector: typing.Optional[dict] = {}
98
98
 
99
99
  class Config:
100
- extra = pydantic.Extra.allow
100
+ extra = pydantic.v1.Extra.allow
101
101
 
102
102
 
103
- class ProjectSpecOut(pydantic.BaseModel):
103
+ class ProjectSpecOut(pydantic.v1.BaseModel):
104
104
  description: typing.Optional[str] = None
105
105
  owner: typing.Optional[str] = None
106
106
  goals: typing.Optional[str] = None
@@ -120,11 +120,11 @@ class ProjectSpecOut(pydantic.BaseModel):
120
120
  default_function_node_selector: typing.Optional[dict] = {}
121
121
 
122
122
  class Config:
123
- extra = pydantic.Extra.allow
123
+ extra = pydantic.v1.Extra.allow
124
124
 
125
125
 
126
- class Project(pydantic.BaseModel):
127
- kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
126
+ class Project(pydantic.v1.BaseModel):
127
+ kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
128
128
  metadata: ProjectMetadata
129
129
  spec: ProjectSpec = ProjectSpec()
130
130
  status: ObjectStatus = ObjectStatus()
@@ -132,19 +132,19 @@ class Project(pydantic.BaseModel):
132
132
 
133
133
  # The reason we have a different schema for the response model is that we don't want to validate project.spec.build in
134
134
  # the response as the validation was added late and there may be corrupted values in the DB.
135
- class ProjectOut(pydantic.BaseModel):
136
- kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
135
+ class ProjectOut(pydantic.v1.BaseModel):
136
+ kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
137
137
  metadata: ProjectMetadata
138
138
  spec: ProjectSpecOut = ProjectSpecOut()
139
139
  status: ObjectStatus = ObjectStatus()
140
140
 
141
141
 
142
- class ProjectOwner(pydantic.BaseModel):
142
+ class ProjectOwner(pydantic.v1.BaseModel):
143
143
  username: str
144
144
  access_key: str
145
145
 
146
146
 
147
- class ProjectSummary(pydantic.BaseModel):
147
+ class ProjectSummary(pydantic.v1.BaseModel):
148
148
  name: str
149
149
  files_count: int = 0
150
150
  feature_sets_count: int = 0
@@ -161,7 +161,7 @@ class ProjectSummary(pydantic.BaseModel):
161
161
  updated: typing.Optional[datetime.datetime] = None
162
162
 
163
163
 
164
- class IguazioProject(pydantic.BaseModel):
164
+ class IguazioProject(pydantic.v1.BaseModel):
165
165
  data: dict
166
166
 
167
167
 
@@ -175,13 +175,18 @@ class IguazioProject(pydantic.BaseModel):
175
175
  # to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
176
176
  # https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
177
177
  ProjectOutput = typing.TypeVar(
178
- "ProjectOutput", ProjectOut, str, ProjectSummary, IguazioProject
178
+ "ProjectOutput",
179
+ ProjectOut,
180
+ str,
181
+ ProjectSummary,
182
+ IguazioProject,
183
+ tuple[str, datetime.datetime],
179
184
  )
180
185
 
181
186
 
182
- class ProjectsOutput(pydantic.BaseModel):
187
+ class ProjectsOutput(pydantic.v1.BaseModel):
183
188
  projects: list[ProjectOutput]
184
189
 
185
190
 
186
- class ProjectSummariesOutput(pydantic.BaseModel):
191
+ class ProjectSummariesOutput(pydantic.v1.BaseModel):
187
192
  project_summaries: list[ProjectSummary]
@@ -14,13 +14,13 @@
14
14
 
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
  from deprecated import deprecated
19
19
 
20
20
  import mlrun.common.types
21
21
 
22
22
 
23
- class RunIdentifier(pydantic.BaseModel):
23
+ class RunIdentifier(pydantic.v1.BaseModel):
24
24
  kind: typing.Literal["run"] = "run"
25
25
  uid: typing.Optional[str]
26
26
  iter: typing.Optional[int]
@@ -14,7 +14,7 @@
14
14
  #
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
 
19
19
  import mlrun.common.types
20
20
 
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
24
24
  project = "project"
25
25
 
26
26
 
27
- class RuntimeResource(pydantic.BaseModel):
27
+ class RuntimeResource(pydantic.v1.BaseModel):
28
28
  name: str
29
29
  labels: dict[str, str] = {}
30
30
  status: typing.Optional[dict]
31
31
 
32
32
 
33
- class RuntimeResources(pydantic.BaseModel):
33
+ class RuntimeResources(pydantic.v1.BaseModel):
34
34
  crd_resources: list[RuntimeResource] = []
35
35
  pod_resources: list[RuntimeResource] = []
36
36
  # only for dask runtime
37
37
  service_resources: typing.Optional[list[RuntimeResource]] = None
38
38
 
39
39
  class Config:
40
- extra = pydantic.Extra.allow
40
+ extra = pydantic.v1.Extra.allow
41
41
 
42
42
 
43
- class KindRuntimeResources(pydantic.BaseModel):
43
+ class KindRuntimeResources(pydantic.v1.BaseModel):
44
44
  kind: str
45
45
  resources: RuntimeResources
46
46
 
@@ -15,7 +15,7 @@
15
15
  from datetime import datetime
16
16
  from typing import Any, Literal, Optional, Union
17
17
 
18
- from pydantic import BaseModel
18
+ from pydantic.v1 import BaseModel
19
19
 
20
20
  import mlrun.common.types
21
21
  from mlrun.common.schemas.auth import Credentials
@@ -14,7 +14,7 @@
14
14
  #
15
15
  from typing import Optional
16
16
 
17
- from pydantic import BaseModel, Field
17
+ from pydantic.v1 import BaseModel, Field
18
18
 
19
19
  import mlrun.common.types
20
20
 
@@ -13,17 +13,17 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- import pydantic
16
+ import pydantic.v1
17
17
 
18
18
  from .artifact import ArtifactIdentifier
19
19
 
20
20
 
21
- class Tag(pydantic.BaseModel):
21
+ class Tag(pydantic.v1.BaseModel):
22
22
  name: str
23
23
  project: str
24
24
 
25
25
 
26
- class TagObjects(pydantic.BaseModel):
26
+ class TagObjects(pydantic.v1.BaseModel):
27
27
  """Tag object"""
28
28
 
29
29
  kind: str
@@ -14,14 +14,14 @@
14
14
  #
15
15
  import typing
16
16
 
17
- import pydantic
17
+ import pydantic.v1
18
18
 
19
19
  from mlrun.common.schemas.notification import Notification
20
20
  from mlrun.common.schemas.schedule import ScheduleCronTrigger
21
21
  from mlrun.common.types import StrEnum
22
22
 
23
23
 
24
- class WorkflowSpec(pydantic.BaseModel):
24
+ class WorkflowSpec(pydantic.v1.BaseModel):
25
25
  name: str
26
26
  engine: typing.Optional[str] = None
27
27
  code: typing.Optional[str] = None
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
36
36
  workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
37
37
 
38
38
 
39
- class WorkflowRequest(pydantic.BaseModel):
39
+ class WorkflowRequest(pydantic.v1.BaseModel):
40
40
  spec: typing.Optional[WorkflowSpec] = None
41
41
  arguments: typing.Optional[dict] = None
42
42
  artifact_path: typing.Optional[str] = None
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
46
46
  notifications: typing.Optional[list[Notification]] = None
47
47
 
48
48
 
49
- class WorkflowResponse(pydantic.BaseModel):
49
+ class WorkflowResponse(pydantic.v1.BaseModel):
50
50
  project: str = None
51
51
  name: str = None
52
52
  status: str = None
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
54
54
  schedule: typing.Union[str, ScheduleCronTrigger] = None
55
55
 
56
56
 
57
- class GetWorkflowResponse(pydantic.BaseModel):
57
+ class GetWorkflowResponse(pydantic.v1.BaseModel):
58
58
  workflow_id: str = None
59
59
 
60
60
 
mlrun/config.py CHANGED
@@ -102,6 +102,9 @@ default_config = {
102
102
  "log_level": "INFO",
103
103
  # log formatter (options: human | human_extended | json)
104
104
  "log_formatter": "human",
105
+ # custom logger format, workes only with log_formatter: custom
106
+ # Note that your custom format must include those 4 fields - timestamp, level, message and more
107
+ "log_format_override": None,
105
108
  "submit_timeout": "180", # timeout when submitting a new k8s resource
106
109
  # runtimes cleanup interval in seconds
107
110
  "runtimes_cleanup_interval": "300",
@@ -120,14 +123,6 @@ default_config = {
120
123
  "projects": {
121
124
  "summaries": {
122
125
  "cache_interval": "30",
123
- "feature_gates": {
124
- "artifacts": "enabled",
125
- "schedules": "enabled",
126
- "feature_sets": "enabled",
127
- "models": "enabled",
128
- "runs": "enabled",
129
- "pipelines": "enabled",
130
- },
131
126
  },
132
127
  },
133
128
  },
@@ -140,6 +135,12 @@ default_config = {
140
135
  "delete_crd_resources_timeout": "5 minutes",
141
136
  },
142
137
  },
138
+ "object_retentions": {
139
+ "alert_activation": 14 * 7, # days
140
+ },
141
+ # A safety margin to account for delays
142
+ # This ensures that extra partitions are available beyond the specified retention period
143
+ "partitions_buffer_multiplier": 3,
143
144
  # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
144
145
  # before deleting them (4 hours)
145
146
  "runtime_resources_deletion_grace_period": "14400",
@@ -314,7 +315,7 @@ default_config = {
314
315
  },
315
316
  "request_timeout": 45, # seconds
316
317
  },
317
- # see server.api.utils.helpers.ensure_running_on_chief
318
+ # see server.py.services.api.utils.helpers.ensure_running_on_chief
318
319
  "ensure_function_running_on_chief_mode": "enabled",
319
320
  },
320
321
  "port": 8080,
@@ -794,17 +795,36 @@ default_config = {
794
795
  "grafana_url": "",
795
796
  "alerts": {
796
797
  # supported modes: "enabled", "disabled".
797
- "mode": "disabled",
798
+ "mode": "enabled",
798
799
  # maximum number of alerts we allow to be configured.
799
800
  # user will get an error when exceeding this
800
801
  "max_allowed": 10000,
801
802
  # maximum allowed value for count in criteria field inside AlertConfig
802
803
  "max_criteria_count": 100,
804
+ # interval for periodic events generation job
805
+ "events_generation_interval": "30",
803
806
  },
804
807
  "auth_with_client_id": {
805
808
  "enabled": False,
806
809
  "request_timeout": 5,
807
810
  },
811
+ "services": {
812
+ # The running service name. One of: "api", "alerts"
813
+ "service_name": "api",
814
+ "hydra": {
815
+ # Comma separated list of services to run on the instance.
816
+ # Currently, this is only considered when the service_name is "api".
817
+ # "*" starts all services on the same instance,
818
+ # other options are considered as running only the api service.
819
+ "services": "*",
820
+ },
821
+ },
822
+ "notifications": {
823
+ "smtp": {
824
+ "config_secret_name": "mlrun-smtp-config",
825
+ "refresh_interval": "30",
826
+ }
827
+ },
808
828
  }
809
829
  _is_running_as_api = None
810
830
 
@@ -851,6 +871,22 @@ class Config:
851
871
  name = self.__class__.__name__
852
872
  return f"{name}({self._cfg!r})"
853
873
 
874
+ def __iter__(self):
875
+ if isinstance(self._cfg, Mapping):
876
+ return self._cfg.__iter__()
877
+
878
+ def items(self):
879
+ if isinstance(self._cfg, Mapping):
880
+ return iter(self._cfg.items())
881
+
882
+ def keys(self):
883
+ if isinstance(self._cfg, Mapping):
884
+ return iter(self.data.keys())
885
+
886
+ def values(self):
887
+ if isinstance(self._cfg, Mapping):
888
+ return iter(self.data.values())
889
+
854
890
  def update(self, cfg, skip_errors=False):
855
891
  for key, value in cfg.items():
856
892
  if hasattr(self, key):
@@ -1043,6 +1079,17 @@ class Config:
1043
1079
  f"is not allowed for iguazio version: {igz_version} < 3.5.1"
1044
1080
  )
1045
1081
 
1082
+ def validate_object_retentions(self):
1083
+ for table_name, retention_days in self.object_retentions.items():
1084
+ if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
1085
+ raise mlrun.errors.MLRunInvalidArgumentError(
1086
+ f"{table_name} partition interval must be greater than a week"
1087
+ )
1088
+ elif retention_days > 53 * 7:
1089
+ raise mlrun.errors.MLRunInvalidArgumentError(
1090
+ f"{table_name} partition interval must be less than a year"
1091
+ )
1092
+
1046
1093
  def resolve_chief_api_url(self) -> str:
1047
1094
  if self.httpdb.clusterization.chief.url:
1048
1095
  return self.httpdb.clusterization.chief.url
@@ -1201,9 +1248,9 @@ class Config:
1201
1248
 
1202
1249
  def get_model_monitoring_file_target_path(
1203
1250
  self,
1204
- project: str = "",
1205
- kind: str = "",
1206
- target: str = "online",
1251
+ project: str,
1252
+ kind: str,
1253
+ target: typing.Literal["online", "offline"] = "online",
1207
1254
  artifact_path: typing.Optional[str] = None,
1208
1255
  function_name: typing.Optional[str] = None,
1209
1256
  **kwargs,
@@ -1381,9 +1428,12 @@ def _validate_config(config):
1381
1428
  pass
1382
1429
 
1383
1430
  config.verify_security_context_enrichment_mode_is_allowed()
1431
+ config.validate_object_retentions()
1384
1432
 
1385
1433
 
1386
- def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
1434
+ def _verify_gpu_requests_and_limits(
1435
+ requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
1436
+ ):
1387
1437
  # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
1388
1438
  if requests_gpu and not limits_gpu:
1389
1439
  raise mlrun.errors.MLRunConflictError(
@@ -1396,7 +1446,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
1396
1446
  )
1397
1447
 
1398
1448
 
1399
- def _convert_resources_to_str(config: dict = None):
1449
+ def _convert_resources_to_str(config: typing.Optional[dict] = None):
1400
1450
  resources_types = ["cpu", "memory", "gpu"]
1401
1451
  resource_requirements = ["requests", "limits"]
1402
1452
  if not config.get("default_function_pod_resources"):
@@ -11,8 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
14
 
17
15
  from .data_types import (
18
16
  InferOptions,
@@ -124,7 +124,6 @@ def spark_to_value_type(data_type):
124
124
  "double": ValueType.DOUBLE,
125
125
  "boolean": ValueType.BOOL,
126
126
  "timestamp": ValueType.DATETIME,
127
- "timestamp_ntz": ValueType.DATETIME,
128
127
  "string": ValueType.STRING,
129
128
  "array": "list",
130
129
  "map": "dict",
mlrun/data_types/infer.py CHANGED
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ from typing import Optional
16
+
15
17
  import numpy as np
16
18
  import packaging.version
17
19
  import pandas as pd
@@ -29,7 +31,7 @@ def infer_schema_from_df(
29
31
  df: pd.DataFrame,
30
32
  features,
31
33
  entities,
32
- timestamp_key: str = None,
34
+ timestamp_key: Optional[str] = None,
33
35
  entity_columns=None,
34
36
  options: InferOptions = InferOptions.Null,
35
37
  ):
mlrun/data_types/spark.py CHANGED
@@ -14,11 +14,12 @@
14
14
  #
15
15
  from datetime import datetime
16
16
  from os import environ
17
+ from typing import Optional
17
18
 
18
19
  import numpy as np
19
20
  import pytz
20
21
  from pyspark.sql.functions import to_utc_timestamp
21
- from pyspark.sql.types import BooleanType, DoubleType
22
+ from pyspark.sql.types import BooleanType, DoubleType, TimestampType
22
23
 
23
24
  from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
24
25
  from mlrun.utils import logger
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
35
36
  df,
36
37
  features,
37
38
  entities,
38
- timestamp_key: str = None,
39
+ timestamp_key: Optional[str] = None,
39
40
  entity_columns=None,
40
41
  options: InferOptions = InferOptions.Null,
41
42
  ):
@@ -143,8 +144,7 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
143
144
  timestamp_columns = set()
144
145
  boolean_columns = set()
145
146
  for field in df_after_type_casts.schema.fields:
146
- # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
147
- is_timestamp = field.dataType.typeName().startswith("timestamp")
147
+ is_timestamp = isinstance(field.dataType, TimestampType)
148
148
  is_boolean = isinstance(field.dataType, BooleanType)
149
149
  if is_timestamp:
150
150
  df_after_type_casts = df_after_type_casts.withColumn(
@@ -244,15 +244,6 @@ def _to_corrected_pandas_type(dt):
244
244
 
245
245
 
246
246
  def spark_df_to_pandas(spark_df):
247
- import pyspark
248
-
249
- if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
250
-
251
- def to_pandas(spark_df_inner):
252
- return spark_df_inner.toPandas()
253
- else:
254
- to_pandas = _to_pandas
255
-
256
247
  # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
257
248
  # when we upgrade pyspark, we should check whether this workaround is still necessary
258
249
  # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
@@ -271,9 +262,9 @@ def spark_df_to_pandas(spark_df):
271
262
  )
272
263
  type_conversion_dict[field.name] = "datetime64[ns]"
273
264
 
274
- df = to_pandas(spark_df)
265
+ df = _to_pandas(spark_df)
275
266
  if type_conversion_dict:
276
267
  df = df.astype(type_conversion_dict)
277
268
  return df
278
269
  else:
279
- return to_pandas(spark_df)
270
+ return _to_pandas(spark_df)
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
16
-
17
15
  __all__ = [
18
16
  "DataItem",
19
17
  "get_store_resource",