mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (291) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +26 -112
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +46 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +47 -48
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +69 -0
  13. mlrun/common/db/sql_session.py +2 -3
  14. mlrun/common/formatters/__init__.py +19 -0
  15. mlrun/common/formatters/artifact.py +21 -0
  16. mlrun/common/formatters/base.py +78 -0
  17. mlrun/common/formatters/function.py +41 -0
  18. mlrun/common/formatters/pipeline.py +53 -0
  19. mlrun/common/formatters/project.py +51 -0
  20. mlrun/common/helpers.py +1 -2
  21. mlrun/common/model_monitoring/helpers.py +9 -5
  22. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  23. mlrun/common/schemas/__init__.py +24 -4
  24. mlrun/common/schemas/alert.py +203 -0
  25. mlrun/common/schemas/api_gateway.py +148 -0
  26. mlrun/common/schemas/artifact.py +18 -8
  27. mlrun/common/schemas/auth.py +11 -5
  28. mlrun/common/schemas/background_task.py +1 -1
  29. mlrun/common/schemas/client_spec.py +4 -1
  30. mlrun/common/schemas/feature_store.py +16 -16
  31. mlrun/common/schemas/frontend_spec.py +8 -7
  32. mlrun/common/schemas/function.py +5 -1
  33. mlrun/common/schemas/hub.py +11 -18
  34. mlrun/common/schemas/memory_reports.py +2 -2
  35. mlrun/common/schemas/model_monitoring/__init__.py +18 -3
  36. mlrun/common/schemas/model_monitoring/constants.py +83 -26
  37. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  38. mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
  39. mlrun/common/schemas/notification.py +4 -4
  40. mlrun/common/schemas/object.py +2 -2
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +1 -10
  43. mlrun/common/schemas/project.py +24 -23
  44. mlrun/common/schemas/runtime_resource.py +8 -12
  45. mlrun/common/schemas/schedule.py +3 -3
  46. mlrun/common/schemas/tag.py +1 -2
  47. mlrun/common/schemas/workflow.py +2 -2
  48. mlrun/common/types.py +7 -1
  49. mlrun/config.py +54 -17
  50. mlrun/data_types/to_pandas.py +10 -12
  51. mlrun/datastore/__init__.py +5 -8
  52. mlrun/datastore/alibaba_oss.py +130 -0
  53. mlrun/datastore/azure_blob.py +17 -5
  54. mlrun/datastore/base.py +62 -39
  55. mlrun/datastore/datastore.py +28 -9
  56. mlrun/datastore/datastore_profile.py +146 -20
  57. mlrun/datastore/filestore.py +0 -1
  58. mlrun/datastore/google_cloud_storage.py +6 -2
  59. mlrun/datastore/hdfs.py +56 -0
  60. mlrun/datastore/inmem.py +2 -2
  61. mlrun/datastore/redis.py +6 -2
  62. mlrun/datastore/s3.py +9 -0
  63. mlrun/datastore/snowflake_utils.py +43 -0
  64. mlrun/datastore/sources.py +201 -96
  65. mlrun/datastore/spark_utils.py +1 -2
  66. mlrun/datastore/store_resources.py +7 -7
  67. mlrun/datastore/targets.py +358 -104
  68. mlrun/datastore/utils.py +72 -58
  69. mlrun/datastore/v3io.py +5 -1
  70. mlrun/db/base.py +185 -35
  71. mlrun/db/factory.py +1 -1
  72. mlrun/db/httpdb.py +614 -179
  73. mlrun/db/nopdb.py +210 -26
  74. mlrun/errors.py +12 -1
  75. mlrun/execution.py +41 -24
  76. mlrun/feature_store/__init__.py +0 -2
  77. mlrun/feature_store/api.py +40 -72
  78. mlrun/feature_store/common.py +1 -1
  79. mlrun/feature_store/feature_set.py +76 -55
  80. mlrun/feature_store/feature_vector.py +28 -30
  81. mlrun/feature_store/ingestion.py +7 -6
  82. mlrun/feature_store/retrieval/base.py +16 -11
  83. mlrun/feature_store/retrieval/conversion.py +11 -13
  84. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  85. mlrun/feature_store/retrieval/job.py +9 -3
  86. mlrun/feature_store/retrieval/local_merger.py +2 -0
  87. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  88. mlrun/feature_store/steps.py +37 -34
  89. mlrun/features.py +9 -20
  90. mlrun/frameworks/_common/artifacts_library.py +9 -9
  91. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  92. mlrun/frameworks/_common/model_handler.py +48 -48
  93. mlrun/frameworks/_common/plan.py +2 -3
  94. mlrun/frameworks/_common/producer.py +3 -4
  95. mlrun/frameworks/_common/utils.py +5 -5
  96. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  97. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  98. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  99. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  100. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  101. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  102. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  103. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  104. mlrun/frameworks/_ml_common/plan.py +1 -1
  105. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  109. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  110. mlrun/frameworks/_ml_common/utils.py +4 -4
  111. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  112. mlrun/frameworks/huggingface/model_server.py +4 -4
  113. mlrun/frameworks/lgbm/__init__.py +33 -33
  114. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  117. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  119. mlrun/frameworks/lgbm/model_handler.py +10 -10
  120. mlrun/frameworks/lgbm/model_server.py +6 -6
  121. mlrun/frameworks/lgbm/utils.py +5 -5
  122. mlrun/frameworks/onnx/dataset.py +8 -8
  123. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  124. mlrun/frameworks/onnx/model_handler.py +6 -6
  125. mlrun/frameworks/onnx/model_server.py +7 -7
  126. mlrun/frameworks/parallel_coordinates.py +4 -3
  127. mlrun/frameworks/pytorch/__init__.py +18 -18
  128. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  129. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  130. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  131. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  132. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  133. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  134. mlrun/frameworks/pytorch/model_handler.py +17 -17
  135. mlrun/frameworks/pytorch/model_server.py +7 -7
  136. mlrun/frameworks/sklearn/__init__.py +13 -13
  137. mlrun/frameworks/sklearn/estimator.py +4 -4
  138. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  139. mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
  140. mlrun/frameworks/sklearn/model_handler.py +2 -2
  141. mlrun/frameworks/tf_keras/__init__.py +10 -7
  142. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  143. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  144. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  145. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  146. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  147. mlrun/frameworks/tf_keras/model_server.py +6 -6
  148. mlrun/frameworks/xgboost/__init__.py +13 -13
  149. mlrun/frameworks/xgboost/model_handler.py +6 -6
  150. mlrun/k8s_utils.py +14 -16
  151. mlrun/launcher/__init__.py +1 -1
  152. mlrun/launcher/base.py +16 -15
  153. mlrun/launcher/client.py +8 -6
  154. mlrun/launcher/factory.py +1 -1
  155. mlrun/launcher/local.py +17 -11
  156. mlrun/launcher/remote.py +16 -10
  157. mlrun/lists.py +7 -6
  158. mlrun/model.py +238 -73
  159. mlrun/model_monitoring/__init__.py +1 -1
  160. mlrun/model_monitoring/api.py +138 -315
  161. mlrun/model_monitoring/application.py +5 -296
  162. mlrun/model_monitoring/applications/__init__.py +24 -0
  163. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  164. mlrun/model_monitoring/applications/base.py +282 -0
  165. mlrun/model_monitoring/applications/context.py +214 -0
  166. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  167. mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
  168. mlrun/model_monitoring/applications/results.py +99 -0
  169. mlrun/model_monitoring/controller.py +104 -84
  170. mlrun/model_monitoring/controller_handler.py +13 -5
  171. mlrun/model_monitoring/db/__init__.py +18 -0
  172. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  173. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  174. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
  175. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  176. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  177. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  178. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  179. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  180. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  181. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  182. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
  183. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  184. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  185. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  187. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  188. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  189. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  190. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  191. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  192. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  193. mlrun/model_monitoring/evidently_application.py +6 -118
  194. mlrun/model_monitoring/features_drift_table.py +134 -106
  195. mlrun/model_monitoring/helpers.py +127 -28
  196. mlrun/model_monitoring/metrics/__init__.py +13 -0
  197. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  198. mlrun/model_monitoring/model_endpoint.py +3 -2
  199. mlrun/model_monitoring/prometheus.py +1 -4
  200. mlrun/model_monitoring/stream_processing.py +62 -231
  201. mlrun/model_monitoring/tracking_policy.py +9 -2
  202. mlrun/model_monitoring/writer.py +152 -124
  203. mlrun/package/__init__.py +6 -6
  204. mlrun/package/context_handler.py +5 -5
  205. mlrun/package/packager.py +7 -7
  206. mlrun/package/packagers/default_packager.py +6 -6
  207. mlrun/package/packagers/numpy_packagers.py +15 -15
  208. mlrun/package/packagers/pandas_packagers.py +5 -5
  209. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  210. mlrun/package/packagers_manager.py +19 -23
  211. mlrun/package/utils/_formatter.py +6 -6
  212. mlrun/package/utils/_pickler.py +2 -2
  213. mlrun/package/utils/_supported_format.py +4 -4
  214. mlrun/package/utils/log_hint_utils.py +2 -2
  215. mlrun/package/utils/type_hint_utils.py +4 -9
  216. mlrun/platforms/__init__.py +11 -10
  217. mlrun/platforms/iguazio.py +24 -203
  218. mlrun/projects/operations.py +35 -21
  219. mlrun/projects/pipelines.py +68 -99
  220. mlrun/projects/project.py +830 -266
  221. mlrun/render.py +3 -11
  222. mlrun/run.py +162 -166
  223. mlrun/runtimes/__init__.py +62 -7
  224. mlrun/runtimes/base.py +39 -32
  225. mlrun/runtimes/daskjob.py +8 -8
  226. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  227. mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
  228. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  229. mlrun/runtimes/funcdoc.py +0 -28
  230. mlrun/runtimes/function_reference.py +1 -1
  231. mlrun/runtimes/kubejob.py +28 -122
  232. mlrun/runtimes/local.py +6 -3
  233. mlrun/runtimes/mpijob/__init__.py +0 -20
  234. mlrun/runtimes/mpijob/abstract.py +9 -10
  235. mlrun/runtimes/mpijob/v1.py +1 -1
  236. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  237. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  238. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  239. mlrun/runtimes/nuclio/application/application.py +523 -0
  240. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  241. mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
  242. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  243. mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
  244. mlrun/runtimes/pod.py +286 -88
  245. mlrun/runtimes/remotesparkjob.py +2 -2
  246. mlrun/runtimes/sparkjob/spark3job.py +51 -34
  247. mlrun/runtimes/utils.py +7 -75
  248. mlrun/secrets.py +9 -5
  249. mlrun/serving/remote.py +2 -7
  250. mlrun/serving/routers.py +13 -10
  251. mlrun/serving/server.py +22 -26
  252. mlrun/serving/states.py +99 -25
  253. mlrun/serving/utils.py +3 -3
  254. mlrun/serving/v1_serving.py +6 -7
  255. mlrun/serving/v2_serving.py +59 -20
  256. mlrun/track/tracker.py +2 -1
  257. mlrun/track/tracker_manager.py +3 -3
  258. mlrun/track/trackers/mlflow_tracker.py +1 -2
  259. mlrun/utils/async_http.py +5 -7
  260. mlrun/utils/azure_vault.py +1 -1
  261. mlrun/utils/clones.py +1 -2
  262. mlrun/utils/condition_evaluator.py +3 -3
  263. mlrun/utils/db.py +3 -3
  264. mlrun/utils/helpers.py +183 -197
  265. mlrun/utils/http.py +2 -5
  266. mlrun/utils/logger.py +76 -14
  267. mlrun/utils/notifications/notification/__init__.py +17 -12
  268. mlrun/utils/notifications/notification/base.py +14 -2
  269. mlrun/utils/notifications/notification/console.py +2 -0
  270. mlrun/utils/notifications/notification/git.py +3 -1
  271. mlrun/utils/notifications/notification/ipython.py +3 -1
  272. mlrun/utils/notifications/notification/slack.py +101 -21
  273. mlrun/utils/notifications/notification/webhook.py +11 -1
  274. mlrun/utils/notifications/notification_pusher.py +155 -30
  275. mlrun/utils/retryer.py +208 -0
  276. mlrun/utils/singleton.py +1 -1
  277. mlrun/utils/v3io_clients.py +2 -4
  278. mlrun/utils/version/version.json +2 -2
  279. mlrun/utils/version/version.py +2 -6
  280. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
  281. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  282. mlrun/kfpops.py +0 -868
  283. mlrun/model_monitoring/batch.py +0 -1095
  284. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  285. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  286. mlrun/platforms/other.py +0 -306
  287. mlrun-1.6.4rc2.dist-info/RECORD +0 -314
  288. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  289. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
  290. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  291. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -15,9 +15,8 @@ import copy
15
15
  import importlib.util
16
16
  import pathlib
17
17
  import sys
18
- import typing
19
18
  from datetime import datetime
20
- from typing import Any, Dict, List, Optional, Union
19
+ from typing import Any, Optional, Union
21
20
 
22
21
  import pandas as pd
23
22
  from deprecated import deprecated
@@ -103,7 +102,7 @@ def get_offline_features(
103
102
  entity_timestamp_column: str = None,
104
103
  target: DataTargetBase = None,
105
104
  run_config: RunConfig = None,
106
- drop_columns: List[str] = None,
105
+ drop_columns: list[str] = None,
107
106
  start_time: Union[str, datetime] = None,
108
107
  end_time: Union[str, datetime] = None,
109
108
  with_indexes: bool = False,
@@ -111,9 +110,10 @@ def get_offline_features(
111
110
  engine: str = None,
112
111
  engine_args: dict = None,
113
112
  query: str = None,
114
- order_by: Union[str, List[str]] = None,
113
+ order_by: Union[str, list[str]] = None,
115
114
  spark_service: str = None,
116
- timestamp_for_filtering: Union[str, Dict[str, str]] = None,
115
+ timestamp_for_filtering: Union[str, dict[str, str]] = None,
116
+ additional_filters: list = None,
117
117
  ):
118
118
  """retrieve offline feature vector results
119
119
 
@@ -137,7 +137,10 @@ def get_offline_features(
137
137
  ]
138
138
  vector = FeatureVector(features=features)
139
139
  resp = get_offline_features(
140
- vector, entity_rows=trades, entity_timestamp_column="time", query="ticker in ['GOOG'] and bid>100"
140
+ vector,
141
+ entity_rows=trades,
142
+ entity_timestamp_column="time",
143
+ query="ticker in ['GOOG'] and bid>100",
141
144
  )
142
145
  print(resp.to_dataframe())
143
146
  print(vector.get_stats_table())
@@ -173,6 +176,13 @@ def get_offline_features(
173
176
  By default, the filter executes on the timestamp_key of each feature set.
174
177
  Note: the time filtering is performed on each feature set before the
175
178
  merge process using start_time and end_time params.
179
+ :param additional_filters: List of additional_filter conditions as tuples.
180
+ Each tuple should be in the format (column_name, operator, value).
181
+ Supported operators: "=", ">=", "<=", ">", "<".
182
+ Example: [("Product", "=", "Computer")]
183
+ For all supported filters, please see:
184
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
185
+
176
186
 
177
187
  """
178
188
  return _get_offline_features(
@@ -192,6 +202,7 @@ def get_offline_features(
192
202
  order_by,
193
203
  spark_service,
194
204
  timestamp_for_filtering,
205
+ additional_filters,
195
206
  )
196
207
 
197
208
 
@@ -201,7 +212,7 @@ def _get_offline_features(
201
212
  entity_timestamp_column: str = None,
202
213
  target: DataTargetBase = None,
203
214
  run_config: RunConfig = None,
204
- drop_columns: List[str] = None,
215
+ drop_columns: list[str] = None,
205
216
  start_time: Union[str, datetime] = None,
206
217
  end_time: Union[str, datetime] = None,
207
218
  with_indexes: bool = False,
@@ -209,9 +220,10 @@ def _get_offline_features(
209
220
  engine: str = None,
210
221
  engine_args: dict = None,
211
222
  query: str = None,
212
- order_by: Union[str, List[str]] = None,
223
+ order_by: Union[str, list[str]] = None,
213
224
  spark_service: str = None,
214
- timestamp_for_filtering: Union[str, Dict[str, str]] = None,
225
+ timestamp_for_filtering: Union[str, dict[str, str]] = None,
226
+ additional_filters=None,
215
227
  ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
216
228
  if entity_rows is None and entity_timestamp_column is not None:
217
229
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -250,6 +262,7 @@ def _get_offline_features(
250
262
  start_time=start_time,
251
263
  end_time=end_time,
252
264
  timestamp_for_filtering=timestamp_for_filtering,
265
+ additional_filters=additional_filters,
253
266
  )
254
267
 
255
268
  merger = merger_engine(feature_vector, **(engine_args or {}))
@@ -265,6 +278,7 @@ def _get_offline_features(
265
278
  update_stats=update_stats,
266
279
  query=query,
267
280
  order_by=order_by,
281
+ additional_filters=additional_filters,
268
282
  )
269
283
 
270
284
 
@@ -280,7 +294,7 @@ def get_online_feature_service(
280
294
  fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
281
295
  impute_policy: dict = None,
282
296
  update_stats: bool = False,
283
- entity_keys: List[str] = None,
297
+ entity_keys: list[str] = None,
284
298
  ):
285
299
  """initialize and return online feature vector service api,
286
300
  returns :py:class:`~mlrun.feature_store.OnlineVectorService`
@@ -308,7 +322,7 @@ def get_online_feature_service(
308
322
 
309
323
  Example::
310
324
 
311
- svc = get_online_feature_service(vector_uri, entity_keys=['ticker'])
325
+ svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
312
326
  try:
313
327
  resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
314
328
  print(resp)
@@ -361,7 +375,7 @@ def _get_online_feature_service(
361
375
  fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
362
376
  impute_policy: dict = None,
363
377
  update_stats: bool = False,
364
- entity_keys: List[str] = None,
378
+ entity_keys: list[str] = None,
365
379
  ) -> OnlineVectorService:
366
380
  if isinstance(feature_vector, FeatureVector):
367
381
  update_stats = True
@@ -413,7 +427,7 @@ def _rename_source_dataframe_columns(df: pd.DataFrame) -> pd.DataFrame:
413
427
  return df
414
428
 
415
429
 
416
- def _get_namespace(run_config: RunConfig) -> Dict[str, Any]:
430
+ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
417
431
  # if running locally, we need to import the file dynamically to get its namespace
418
432
  if run_config and run_config.local and run_config.function:
419
433
  filename = run_config.function.spec.filename
@@ -431,7 +445,7 @@ def _get_namespace(run_config: RunConfig) -> Dict[str, Any]:
431
445
  def ingest(
432
446
  featureset: Union[FeatureSet, str] = None,
433
447
  source=None,
434
- targets: List[DataTargetBase] = None,
448
+ targets: list[DataTargetBase] = None,
435
449
  namespace=None,
436
450
  return_df: bool = True,
437
451
  infer_options: InferOptions = InferOptions.default(),
@@ -457,7 +471,7 @@ def ingest(
457
471
  df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
458
472
 
459
473
  # for running as remote job
460
- config = RunConfig(image='mlrun/mlrun')
474
+ config = RunConfig(image="mlrun/mlrun")
461
475
  df = ingest(stocks_set, stocks, run_config=config)
462
476
 
463
477
  # specify source and targets
@@ -511,7 +525,7 @@ def ingest(
511
525
  def _ingest(
512
526
  featureset: Union[FeatureSet, str] = None,
513
527
  source=None,
514
- targets: List[DataTargetBase] = None,
528
+ targets: list[DataTargetBase] = None,
515
529
  namespace=None,
516
530
  return_df: bool = True,
517
531
  infer_options: InferOptions = InferOptions.default(),
@@ -876,7 +890,7 @@ def _preview(
876
890
  def _run_ingestion_job(
877
891
  featureset: Union[FeatureSet, str],
878
892
  source: DataSource = None,
879
- targets: List[DataTargetBase] = None,
893
+ targets: list[DataTargetBase] = None,
880
894
  name: str = None,
881
895
  infer_options: InferOptions = InferOptions.default(),
882
896
  run_config: RunConfig = None,
@@ -901,11 +915,11 @@ def _run_ingestion_job(
901
915
  def deploy_ingestion_service_v2(
902
916
  featureset: Union[FeatureSet, str],
903
917
  source: DataSource = None,
904
- targets: List[DataTargetBase] = None,
918
+ targets: list[DataTargetBase] = None,
905
919
  name: str = None,
906
920
  run_config: RunConfig = None,
907
921
  verbose=False,
908
- ) -> typing.Tuple[str, BaseRuntime]:
922
+ ) -> tuple[str, BaseRuntime]:
909
923
  """Start real-time ingestion service using nuclio function
910
924
 
911
925
  Deploy a real-time function implementing feature ingestion pipeline
@@ -944,11 +958,11 @@ def deploy_ingestion_service_v2(
944
958
  def _deploy_ingestion_service_v2(
945
959
  featureset: Union[FeatureSet, str],
946
960
  source: DataSource = None,
947
- targets: List[DataTargetBase] = None,
961
+ targets: list[DataTargetBase] = None,
948
962
  name: str = None,
949
963
  run_config: RunConfig = None,
950
964
  verbose=False,
951
- ) -> typing.Tuple[str, BaseRuntime]:
965
+ ) -> tuple[str, BaseRuntime]:
952
966
  if isinstance(featureset, str):
953
967
  featureset = get_feature_set_by_uri(featureset)
954
968
 
@@ -1003,58 +1017,11 @@ def _deploy_ingestion_service_v2(
1003
1017
  return function.deploy(), function
1004
1018
 
1005
1019
 
1006
- @deprecated(
1007
- version="1.5.0",
1008
- reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
1009
- category=FutureWarning,
1010
- )
1011
- def deploy_ingestion_service(
1012
- featureset: Union[FeatureSet, str],
1013
- source: DataSource = None,
1014
- targets: List[DataTargetBase] = None,
1015
- name: str = None,
1016
- run_config: RunConfig = None,
1017
- verbose=False,
1018
- ) -> str:
1019
- """Start real-time ingestion service using nuclio function
1020
-
1021
- Deploy a real-time function implementing feature ingestion pipeline
1022
- the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
1023
-
1024
- the `run_config` parameter allow specifying the function and job configuration,
1025
- see: :py:class:`~mlrun.feature_store.RunConfig`
1026
-
1027
- example::
1028
-
1029
- source = HTTPSource()
1030
- func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
1031
- config = RunConfig(function=func)
1032
- my_set.deploy_ingestion_service(source, run_config=config)
1033
-
1034
- :param featureset: feature set object or uri
1035
- :param source: data source object describing the online or offline source
1036
- :param targets: list of data target objects
1037
- :param name: name for the job/function
1038
- :param run_config: service runtime configuration (function object/uri, resources, etc..)
1039
- :param verbose: verbose log
1040
-
1041
- :return: URL to access the deployed ingestion service
1042
- """
1043
- endpoint, _ = featureset.deploy_ingestion_service(
1044
- source=source,
1045
- targets=targets,
1046
- name=name,
1047
- run_config=run_config,
1048
- verbose=verbose,
1049
- )
1050
- return endpoint
1051
-
1052
-
1053
1020
  def _ingest_with_spark(
1054
1021
  spark=None,
1055
1022
  featureset: Union[FeatureSet, str] = None,
1056
1023
  source: BaseSourceDriver = None,
1057
- targets: List[BaseStoreTarget] = None,
1024
+ targets: list[BaseStoreTarget] = None,
1058
1025
  infer_options: InferOptions = InferOptions.default(),
1059
1026
  mlrun_context=None,
1060
1027
  namespace=None,
@@ -1122,9 +1089,10 @@ def _ingest_with_spark(
1122
1089
  df_to_write = target.prepare_spark_df(
1123
1090
  df_to_write, key_columns, timestamp_key, spark_options
1124
1091
  )
1092
+ write_format = spark_options.pop("format", None)
1125
1093
  if overwrite:
1126
1094
  write_spark_dataframe_with_options(
1127
- spark_options, df_to_write, "overwrite"
1095
+ spark_options, df_to_write, "overwrite", write_format=write_format
1128
1096
  )
1129
1097
  else:
1130
1098
  # appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
@@ -1132,7 +1100,7 @@ def _ingest_with_spark(
1132
1100
  df_to_write.persist()
1133
1101
  if df_to_write.count() > 0:
1134
1102
  write_spark_dataframe_with_options(
1135
- spark_options, df_to_write, "append"
1103
+ spark_options, df_to_write, "append", write_format=write_format
1136
1104
  )
1137
1105
  target.update_resource_status("ready")
1138
1106
 
@@ -1207,7 +1175,7 @@ def _infer_from_static_df(
1207
1175
  def set_task_params(
1208
1176
  featureset: FeatureSet,
1209
1177
  source: DataSource = None,
1210
- targets: List[DataTargetBase] = None,
1178
+ targets: list[DataTargetBase] = None,
1211
1179
  parameters: dict = None,
1212
1180
  infer_options: InferOptions = InferOptions.Null,
1213
1181
  overwrite=None,
@@ -192,7 +192,7 @@ class RunConfig:
192
192
  owner=None,
193
193
  credentials: typing.Optional[mlrun.model.Credentials] = None,
194
194
  code: str = None,
195
- requirements: typing.Union[str, typing.List[str]] = None,
195
+ requirements: typing.Union[str, list[str]] = None,
196
196
  extra_spec: dict = None,
197
197
  auth_info=None,
198
198
  ):
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  import warnings
15
15
  from datetime import datetime
16
- from typing import Dict, List, Optional, Tuple, Union
16
+ from typing import Optional, Union
17
17
 
18
18
  import pandas as pd
19
19
  from storey import EmitEveryEvent, EmitPolicy
@@ -119,9 +119,9 @@ class FeatureSetSpec(ModelObj):
119
119
 
120
120
  self.owner = owner
121
121
  self.description = description
122
- self.entities: List[Union[Entity, str]] = entities or []
123
- self.relations: Dict[str, Union[Entity, str]] = relations or {}
124
- self.features: List[Feature] = features or []
122
+ self.entities: list[Union[Entity, str]] = entities or []
123
+ self.relations: dict[str, Union[Entity, str]] = relations or {}
124
+ self.features: list[Feature] = features or []
125
125
  self.partition_keys = partition_keys or []
126
126
  self.timestamp_key = timestamp_key
127
127
  self.source = source
@@ -136,12 +136,12 @@ class FeatureSetSpec(ModelObj):
136
136
  self.with_default_targets = True
137
137
 
138
138
  @property
139
- def entities(self) -> List[Entity]:
139
+ def entities(self) -> list[Entity]:
140
140
  """feature set entities (indexes)"""
141
141
  return self._entities
142
142
 
143
143
  @entities.setter
144
- def entities(self, entities: List[Union[Entity, str]]):
144
+ def entities(self, entities: list[Union[Entity, str]]):
145
145
  if entities:
146
146
  # if the entity is a string, convert it to Entity class
147
147
  for i, entity in enumerate(entities):
@@ -163,21 +163,21 @@ class FeatureSetSpec(ModelObj):
163
163
  self._entities = ObjectList.from_list(Entity, entities)
164
164
 
165
165
  @property
166
- def features(self) -> List[Feature]:
166
+ def features(self) -> list[Feature]:
167
167
  """feature set features list"""
168
168
  return self._features
169
169
 
170
170
  @features.setter
171
- def features(self, features: List[Feature]):
171
+ def features(self, features: list[Feature]):
172
172
  self._features = ObjectList.from_list(Feature, features)
173
173
 
174
174
  @property
175
- def targets(self) -> List[DataTargetBase]:
175
+ def targets(self) -> list[DataTargetBase]:
176
176
  """list of desired targets (material storage)"""
177
177
  return self._targets
178
178
 
179
179
  @targets.setter
180
- def targets(self, targets: List[DataTargetBase]):
180
+ def targets(self, targets: list[DataTargetBase]):
181
181
  self._targets = ObjectList.from_list(DataTargetBase, targets)
182
182
 
183
183
  @property
@@ -230,12 +230,12 @@ class FeatureSetSpec(ModelObj):
230
230
  self._source = source
231
231
 
232
232
  @property
233
- def relations(self) -> Dict[str, Entity]:
233
+ def relations(self) -> dict[str, Entity]:
234
234
  """feature set relations dict"""
235
235
  return self._relations
236
236
 
237
237
  @relations.setter
238
- def relations(self, relations: Dict[str, Entity]):
238
+ def relations(self, relations: dict[str, Entity]):
239
239
  for col, ent in relations.items():
240
240
  if isinstance(ent, str):
241
241
  relations[col] = Entity(ent)
@@ -284,12 +284,12 @@ class FeatureSetStatus(ModelObj):
284
284
  self.run_uri = run_uri
285
285
 
286
286
  @property
287
- def targets(self) -> List[DataTarget]:
287
+ def targets(self) -> list[DataTarget]:
288
288
  """list of material storage targets + their status/path"""
289
289
  return self._targets
290
290
 
291
291
  @targets.setter
292
- def targets(self, targets: List[DataTarget]):
292
+ def targets(self, targets: list[DataTarget]):
293
293
  self._targets = ObjectList.from_list(DataTarget, targets)
294
294
 
295
295
  def update_target(self, target: DataTarget):
@@ -318,8 +318,6 @@ def emit_policy_to_dict(policy: EmitPolicy):
318
318
 
319
319
 
320
320
  class FeatureSet(ModelObj):
321
- """Feature set object, defines a set of features and their data pipeline"""
322
-
323
321
  kind = mlrun.common.schemas.ObjectKind.feature_set.value
324
322
  _dict_fields = ["kind", "metadata", "spec", "status"]
325
323
 
@@ -327,11 +325,11 @@ class FeatureSet(ModelObj):
327
325
  self,
328
326
  name: str = None,
329
327
  description: str = None,
330
- entities: List[Union[Entity, str]] = None,
328
+ entities: list[Union[Entity, str]] = None,
331
329
  timestamp_key: str = None,
332
330
  engine: str = None,
333
331
  label_column: str = None,
334
- relations: Dict[str, Union[Entity, str]] = None,
332
+ relations: dict[str, Union[Entity, str]] = None,
335
333
  passthrough: bool = None,
336
334
  ):
337
335
  """Feature set object, defines a set of features and their data pipeline
@@ -339,7 +337,10 @@ class FeatureSet(ModelObj):
339
337
  example::
340
338
 
341
339
  import mlrun.feature_store as fstore
342
- ticks = fstore.FeatureSet("ticks", entities=["stock"], timestamp_key="timestamp")
340
+
341
+ ticks = fstore.FeatureSet(
342
+ "ticks", entities=["stock"], timestamp_key="timestamp"
343
+ )
343
344
  ticks.ingest(df)
344
345
 
345
346
  :param name: name of the feature set
@@ -532,7 +533,7 @@ class FeatureSet(ModelObj):
532
533
  self, **(class_args if class_args is not None else {})
533
534
  )
534
535
 
535
- def purge_targets(self, target_names: List[str] = None, silent: bool = False):
536
+ def purge_targets(self, target_names: list[str] = None, silent: bool = False):
536
537
  """Delete data of specific targets
537
538
  :param target_names: List of names of targets to delete (default: delete all ingested targets)
538
539
  :param silent: Fail silently if target doesn't exist in featureset status"""
@@ -560,7 +561,7 @@ class FeatureSet(ModelObj):
560
561
 
561
562
  def update_targets_for_ingest(
562
563
  self,
563
- targets: List[DataTargetBase],
564
+ targets: list[DataTargetBase],
564
565
  overwrite: bool = None,
565
566
  ):
566
567
  if not targets:
@@ -581,7 +582,7 @@ class FeatureSet(ModelObj):
581
582
  update_targets_run_id_for_ingest(overwrite, targets, status_targets)
582
583
 
583
584
  def _reload_and_get_status_targets(
584
- self, target_names: List[str] = None, silent: bool = False
585
+ self, target_names: list[str] = None, silent: bool = False
585
586
  ):
586
587
  try:
587
588
  self.reload(update_spec=False)
@@ -602,9 +603,7 @@ class FeatureSet(ModelObj):
602
603
  pass
603
604
  else:
604
605
  raise mlrun.errors.MLRunNotFoundError(
605
- "Target not found in status (fset={0}, target={1})".format(
606
- self.metadata.name, target_name
607
- )
606
+ f"Target not found in status (fset={self.metadata.name}, target={target_name})"
608
607
  )
609
608
  else:
610
609
  targets = self.status.targets
@@ -621,7 +620,7 @@ class FeatureSet(ModelObj):
621
620
  name: str,
622
621
  value_type: mlrun.data_types.ValueType = None,
623
622
  description: str = None,
624
- labels: Optional[Dict[str, str]] = None,
623
+ labels: Optional[dict[str, str]] = None,
625
624
  ):
626
625
  """add/set an entity (dataset index)
627
626
 
@@ -629,12 +628,12 @@ class FeatureSet(ModelObj):
629
628
 
630
629
  import mlrun.feature_store as fstore
631
630
 
632
- ticks = fstore.FeatureSet("ticks",
633
- entities=["stock"],
634
- timestamp_key="timestamp")
635
- ticks.add_entity("country",
636
- mlrun.data_types.ValueType.STRING,
637
- description="stock country")
631
+ ticks = fstore.FeatureSet(
632
+ "ticks", entities=["stock"], timestamp_key="timestamp"
633
+ )
634
+ ticks.add_entity(
635
+ "country", mlrun.data_types.ValueType.STRING, description="stock country"
636
+ )
638
637
  ticks.add_entity("year", mlrun.data_types.ValueType.INT16)
639
638
  ticks.save()
640
639
 
@@ -654,13 +653,23 @@ class FeatureSet(ModelObj):
654
653
  import mlrun.feature_store as fstore
655
654
  from mlrun.features import Feature
656
655
 
657
- ticks = fstore.FeatureSet("ticks",
658
- entities=["stock"],
659
- timestamp_key="timestamp")
660
- ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.STRING,
661
- description="client consistency"),"ABC01")
662
- ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.FLOAT,
663
- description="client volatility"),"SAB")
656
+ ticks = fstore.FeatureSet(
657
+ "ticks", entities=["stock"], timestamp_key="timestamp"
658
+ )
659
+ ticks.add_feature(
660
+ Feature(
661
+ value_type=mlrun.data_types.ValueType.STRING,
662
+ description="client consistency",
663
+ ),
664
+ "ABC01",
665
+ )
666
+ ticks.add_feature(
667
+ Feature(
668
+ value_type=mlrun.data_types.ValueType.FLOAT,
669
+ description="client volatility",
670
+ ),
671
+ "SAB",
672
+ )
664
673
  ticks.save()
665
674
 
666
675
  :param feature: setting of Feature
@@ -864,15 +873,18 @@ class FeatureSet(ModelObj):
864
873
  example::
865
874
 
866
875
  import mlrun.feature_store as fstore
876
+
867
877
  ...
868
- ticks = fstore.FeatureSet("ticks",
869
- entities=["stock"],
870
- timestamp_key="timestamp")
871
- ticks.add_aggregation(name='priceN',
872
- column='price',
873
- operations=['avg'],
874
- windows=['1d'],
875
- period='1h')
878
+ ticks = fstore.FeatureSet(
879
+ "ticks", entities=["stock"], timestamp_key="timestamp"
880
+ )
881
+ ticks.add_aggregation(
882
+ name="priceN",
883
+ column="price",
884
+ operations=["avg"],
885
+ windows=["1d"],
886
+ period="1h",
887
+ )
876
888
  ticks.plot(rankdir="LR", with_targets=True)
877
889
 
878
890
  :param filename: target filepath for the graph image (None for the notebook)
@@ -905,6 +917,7 @@ class FeatureSet(ModelObj):
905
917
  start_time=None,
906
918
  end_time=None,
907
919
  time_column=None,
920
+ additional_filters=None,
908
921
  **kwargs,
909
922
  ):
910
923
  """return featureset (offline) data as dataframe
@@ -916,6 +929,12 @@ class FeatureSet(ModelObj):
916
929
  :param end_time: filter by end time
917
930
  :param time_column: specify the time column name in the file
918
931
  :param kwargs: additional reader (csv, parquet, ..) args
932
+ :param additional_filters: List of additional_filter conditions as tuples.
933
+ Each tuple should be in the format (column_name, operator, value).
934
+ Supported operators: "=", ">=", "<=", ">", "<".
935
+ Example: [("Product", "=", "Computer")]
936
+ For all supported filters, please see:
937
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
919
938
  :return: DataFrame
920
939
  """
921
940
  entities = list(self.spec.entities.keys())
@@ -934,6 +953,7 @@ class FeatureSet(ModelObj):
934
953
  start_time=start_time,
935
954
  end_time=end_time,
936
955
  time_field=time_column,
956
+ additional_filters=additional_filters,
937
957
  **kwargs,
938
958
  )
939
959
  # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
@@ -953,6 +973,7 @@ class FeatureSet(ModelObj):
953
973
  start_time=start_time,
954
974
  end_time=end_time,
955
975
  time_column=time_column,
976
+ additional_filters=additional_filters,
956
977
  **kwargs,
957
978
  )
958
979
  return result
@@ -983,7 +1004,7 @@ class FeatureSet(ModelObj):
983
1004
  def ingest(
984
1005
  self,
985
1006
  source=None,
986
- targets: List[DataTargetBase] = None,
1007
+ targets: list[DataTargetBase] = None,
987
1008
  namespace=None,
988
1009
  return_df: bool = True,
989
1010
  infer_options: InferOptions = InferOptions.default(),
@@ -1009,7 +1030,7 @@ class FeatureSet(ModelObj):
1009
1030
  df = stocks_set.ingest(stocks, infer_options=fstore.InferOptions.default())
1010
1031
 
1011
1032
  # for running as remote job
1012
- config = RunConfig(image='mlrun/mlrun')
1033
+ config = RunConfig(image="mlrun/mlrun")
1013
1034
  df = ingest(stocks_set, stocks, run_config=config)
1014
1035
 
1015
1036
  # specify source and targets
@@ -1085,11 +1106,11 @@ class FeatureSet(ModelObj):
1085
1106
  def deploy_ingestion_service(
1086
1107
  self,
1087
1108
  source: DataSource = None,
1088
- targets: List[DataTargetBase] = None,
1109
+ targets: list[DataTargetBase] = None,
1089
1110
  name: str = None,
1090
1111
  run_config: RunConfig = None,
1091
1112
  verbose=False,
1092
- ) -> Tuple[str, BaseRuntime]:
1113
+ ) -> tuple[str, BaseRuntime]:
1093
1114
  """Start real-time ingestion service using nuclio function
1094
1115
 
1095
1116
  Deploy a real-time function implementing feature ingestion pipeline
@@ -1122,7 +1143,7 @@ class FeatureSet(ModelObj):
1122
1143
  def extract_relation_keys(
1123
1144
  self,
1124
1145
  other_feature_set,
1125
- relations: Dict[str, Union[str, Entity]] = None,
1146
+ relations: dict[str, Union[str, Entity]] = None,
1126
1147
  ) -> list[str]:
1127
1148
  """
1128
1149
  Checks whether a feature set can be merged to the right of this feature set.
@@ -1189,10 +1210,10 @@ class SparkAggregateByKey(StepToDict):
1189
1210
 
1190
1211
  def __init__(
1191
1212
  self,
1192
- key_columns: List[str],
1213
+ key_columns: list[str],
1193
1214
  time_column: str,
1194
- aggregates: List[Dict],
1195
- emit_policy: Union[EmitPolicy, Dict] = None,
1215
+ aggregates: list[dict],
1216
+ emit_policy: Union[EmitPolicy, dict] = None,
1196
1217
  ):
1197
1218
  self.key_columns = key_columns
1198
1219
  self.time_column = time_column