mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +40 -122
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +47 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +79 -47
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +74 -1
  13. mlrun/common/db/sql_session.py +5 -5
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +45 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +33 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +12 -3
  23. mlrun/common/model_monitoring/helpers.py +9 -5
  24. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  25. mlrun/common/schemas/__init__.py +31 -5
  26. mlrun/common/schemas/alert.py +202 -0
  27. mlrun/common/schemas/api_gateway.py +196 -0
  28. mlrun/common/schemas/artifact.py +25 -4
  29. mlrun/common/schemas/auth.py +16 -5
  30. mlrun/common/schemas/background_task.py +1 -1
  31. mlrun/common/schemas/client_spec.py +4 -2
  32. mlrun/common/schemas/common.py +7 -4
  33. mlrun/common/schemas/constants.py +3 -0
  34. mlrun/common/schemas/feature_store.py +74 -44
  35. mlrun/common/schemas/frontend_spec.py +15 -7
  36. mlrun/common/schemas/function.py +12 -1
  37. mlrun/common/schemas/hub.py +11 -18
  38. mlrun/common/schemas/memory_reports.py +2 -2
  39. mlrun/common/schemas/model_monitoring/__init__.py +20 -4
  40. mlrun/common/schemas/model_monitoring/constants.py +123 -42
  41. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  42. mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
  43. mlrun/common/schemas/notification.py +71 -14
  44. mlrun/common/schemas/object.py +2 -2
  45. mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
  46. mlrun/common/schemas/pipeline.py +8 -1
  47. mlrun/common/schemas/project.py +69 -18
  48. mlrun/common/schemas/runs.py +7 -1
  49. mlrun/common/schemas/runtime_resource.py +8 -12
  50. mlrun/common/schemas/schedule.py +4 -4
  51. mlrun/common/schemas/tag.py +1 -2
  52. mlrun/common/schemas/workflow.py +12 -4
  53. mlrun/common/types.py +14 -1
  54. mlrun/config.py +154 -69
  55. mlrun/data_types/data_types.py +6 -1
  56. mlrun/data_types/spark.py +2 -2
  57. mlrun/data_types/to_pandas.py +67 -37
  58. mlrun/datastore/__init__.py +6 -8
  59. mlrun/datastore/alibaba_oss.py +131 -0
  60. mlrun/datastore/azure_blob.py +143 -42
  61. mlrun/datastore/base.py +102 -58
  62. mlrun/datastore/datastore.py +34 -13
  63. mlrun/datastore/datastore_profile.py +146 -20
  64. mlrun/datastore/dbfs_store.py +3 -7
  65. mlrun/datastore/filestore.py +1 -4
  66. mlrun/datastore/google_cloud_storage.py +97 -33
  67. mlrun/datastore/hdfs.py +56 -0
  68. mlrun/datastore/inmem.py +6 -3
  69. mlrun/datastore/redis.py +7 -2
  70. mlrun/datastore/s3.py +34 -12
  71. mlrun/datastore/snowflake_utils.py +45 -0
  72. mlrun/datastore/sources.py +303 -111
  73. mlrun/datastore/spark_utils.py +31 -2
  74. mlrun/datastore/store_resources.py +9 -7
  75. mlrun/datastore/storeytargets.py +151 -0
  76. mlrun/datastore/targets.py +453 -176
  77. mlrun/datastore/utils.py +72 -58
  78. mlrun/datastore/v3io.py +6 -1
  79. mlrun/db/base.py +274 -41
  80. mlrun/db/factory.py +1 -1
  81. mlrun/db/httpdb.py +893 -225
  82. mlrun/db/nopdb.py +291 -33
  83. mlrun/errors.py +36 -6
  84. mlrun/execution.py +115 -42
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +65 -73
  87. mlrun/feature_store/common.py +7 -12
  88. mlrun/feature_store/feature_set.py +76 -55
  89. mlrun/feature_store/feature_vector.py +39 -31
  90. mlrun/feature_store/ingestion.py +7 -6
  91. mlrun/feature_store/retrieval/base.py +16 -11
  92. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  93. mlrun/feature_store/retrieval/job.py +13 -4
  94. mlrun/feature_store/retrieval/local_merger.py +2 -0
  95. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  96. mlrun/feature_store/steps.py +45 -34
  97. mlrun/features.py +11 -21
  98. mlrun/frameworks/_common/artifacts_library.py +9 -9
  99. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  100. mlrun/frameworks/_common/model_handler.py +48 -48
  101. mlrun/frameworks/_common/plan.py +5 -6
  102. mlrun/frameworks/_common/producer.py +3 -4
  103. mlrun/frameworks/_common/utils.py +5 -5
  104. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  105. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  106. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  107. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  108. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  109. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  110. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  111. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  112. mlrun/frameworks/_ml_common/plan.py +2 -2
  113. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  114. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  115. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  116. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  117. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  118. mlrun/frameworks/_ml_common/utils.py +4 -4
  119. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  120. mlrun/frameworks/huggingface/model_server.py +4 -4
  121. mlrun/frameworks/lgbm/__init__.py +33 -33
  122. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  123. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  124. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  125. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  126. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  127. mlrun/frameworks/lgbm/model_handler.py +10 -10
  128. mlrun/frameworks/lgbm/model_server.py +6 -6
  129. mlrun/frameworks/lgbm/utils.py +5 -5
  130. mlrun/frameworks/onnx/dataset.py +8 -8
  131. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  132. mlrun/frameworks/onnx/model_handler.py +6 -6
  133. mlrun/frameworks/onnx/model_server.py +7 -7
  134. mlrun/frameworks/parallel_coordinates.py +6 -6
  135. mlrun/frameworks/pytorch/__init__.py +18 -18
  136. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  137. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  138. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  139. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  140. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  141. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  142. mlrun/frameworks/pytorch/model_handler.py +17 -17
  143. mlrun/frameworks/pytorch/model_server.py +7 -7
  144. mlrun/frameworks/sklearn/__init__.py +13 -13
  145. mlrun/frameworks/sklearn/estimator.py +4 -4
  146. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  147. mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
  148. mlrun/frameworks/sklearn/model_handler.py +2 -2
  149. mlrun/frameworks/tf_keras/__init__.py +10 -7
  150. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  151. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  152. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  153. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  154. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  155. mlrun/frameworks/tf_keras/model_server.py +6 -6
  156. mlrun/frameworks/xgboost/__init__.py +13 -13
  157. mlrun/frameworks/xgboost/model_handler.py +6 -6
  158. mlrun/k8s_utils.py +61 -17
  159. mlrun/launcher/__init__.py +1 -1
  160. mlrun/launcher/base.py +16 -15
  161. mlrun/launcher/client.py +13 -11
  162. mlrun/launcher/factory.py +1 -1
  163. mlrun/launcher/local.py +23 -13
  164. mlrun/launcher/remote.py +17 -10
  165. mlrun/lists.py +7 -6
  166. mlrun/model.py +478 -103
  167. mlrun/model_monitoring/__init__.py +1 -1
  168. mlrun/model_monitoring/api.py +163 -371
  169. mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
  170. mlrun/model_monitoring/applications/_application_steps.py +188 -0
  171. mlrun/model_monitoring/applications/base.py +108 -0
  172. mlrun/model_monitoring/applications/context.py +341 -0
  173. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  174. mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
  175. mlrun/model_monitoring/applications/results.py +99 -0
  176. mlrun/model_monitoring/controller.py +131 -278
  177. mlrun/model_monitoring/db/__init__.py +18 -0
  178. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  179. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  180. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  181. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  182. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  183. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  184. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  185. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  186. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  187. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  188. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  189. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  190. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  191. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  192. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  193. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
  194. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  195. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
  196. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  197. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  198. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  199. mlrun/model_monitoring/features_drift_table.py +134 -106
  200. mlrun/model_monitoring/helpers.py +199 -55
  201. mlrun/model_monitoring/metrics/__init__.py +13 -0
  202. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  203. mlrun/model_monitoring/model_endpoint.py +3 -2
  204. mlrun/model_monitoring/stream_processing.py +134 -398
  205. mlrun/model_monitoring/tracking_policy.py +9 -2
  206. mlrun/model_monitoring/writer.py +161 -125
  207. mlrun/package/__init__.py +6 -6
  208. mlrun/package/context_handler.py +5 -5
  209. mlrun/package/packager.py +7 -7
  210. mlrun/package/packagers/default_packager.py +8 -8
  211. mlrun/package/packagers/numpy_packagers.py +15 -15
  212. mlrun/package/packagers/pandas_packagers.py +5 -5
  213. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  214. mlrun/package/packagers_manager.py +19 -23
  215. mlrun/package/utils/_formatter.py +6 -6
  216. mlrun/package/utils/_pickler.py +2 -2
  217. mlrun/package/utils/_supported_format.py +4 -4
  218. mlrun/package/utils/log_hint_utils.py +2 -2
  219. mlrun/package/utils/type_hint_utils.py +4 -9
  220. mlrun/platforms/__init__.py +11 -10
  221. mlrun/platforms/iguazio.py +24 -203
  222. mlrun/projects/operations.py +52 -25
  223. mlrun/projects/pipelines.py +191 -197
  224. mlrun/projects/project.py +1227 -400
  225. mlrun/render.py +16 -19
  226. mlrun/run.py +209 -184
  227. mlrun/runtimes/__init__.py +83 -15
  228. mlrun/runtimes/base.py +51 -35
  229. mlrun/runtimes/daskjob.py +17 -10
  230. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  231. mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
  232. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  233. mlrun/runtimes/funcdoc.py +1 -29
  234. mlrun/runtimes/function_reference.py +1 -1
  235. mlrun/runtimes/kubejob.py +34 -128
  236. mlrun/runtimes/local.py +40 -11
  237. mlrun/runtimes/mpijob/__init__.py +0 -20
  238. mlrun/runtimes/mpijob/abstract.py +9 -10
  239. mlrun/runtimes/mpijob/v1.py +1 -1
  240. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  241. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  242. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  243. mlrun/runtimes/nuclio/application/application.py +758 -0
  244. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  245. mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
  246. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  247. mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
  248. mlrun/runtimes/pod.py +281 -101
  249. mlrun/runtimes/remotesparkjob.py +12 -9
  250. mlrun/runtimes/sparkjob/spark3job.py +67 -51
  251. mlrun/runtimes/utils.py +41 -75
  252. mlrun/secrets.py +9 -5
  253. mlrun/serving/__init__.py +8 -1
  254. mlrun/serving/remote.py +2 -7
  255. mlrun/serving/routers.py +85 -69
  256. mlrun/serving/server.py +69 -44
  257. mlrun/serving/states.py +209 -36
  258. mlrun/serving/utils.py +22 -14
  259. mlrun/serving/v1_serving.py +6 -7
  260. mlrun/serving/v2_serving.py +133 -54
  261. mlrun/track/tracker.py +2 -1
  262. mlrun/track/tracker_manager.py +3 -3
  263. mlrun/track/trackers/mlflow_tracker.py +6 -2
  264. mlrun/utils/async_http.py +6 -8
  265. mlrun/utils/azure_vault.py +1 -1
  266. mlrun/utils/clones.py +1 -2
  267. mlrun/utils/condition_evaluator.py +3 -3
  268. mlrun/utils/db.py +21 -3
  269. mlrun/utils/helpers.py +405 -225
  270. mlrun/utils/http.py +3 -6
  271. mlrun/utils/logger.py +112 -16
  272. mlrun/utils/notifications/notification/__init__.py +17 -13
  273. mlrun/utils/notifications/notification/base.py +50 -2
  274. mlrun/utils/notifications/notification/console.py +2 -0
  275. mlrun/utils/notifications/notification/git.py +24 -1
  276. mlrun/utils/notifications/notification/ipython.py +3 -1
  277. mlrun/utils/notifications/notification/slack.py +96 -21
  278. mlrun/utils/notifications/notification/webhook.py +59 -2
  279. mlrun/utils/notifications/notification_pusher.py +149 -30
  280. mlrun/utils/regex.py +9 -0
  281. mlrun/utils/retryer.py +208 -0
  282. mlrun/utils/singleton.py +1 -1
  283. mlrun/utils/v3io_clients.py +4 -6
  284. mlrun/utils/version/version.json +2 -2
  285. mlrun/utils/version/version.py +2 -6
  286. mlrun-1.7.0.dist-info/METADATA +378 -0
  287. mlrun-1.7.0.dist-info/RECORD +351 -0
  288. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
  289. mlrun/feature_store/retrieval/conversion.py +0 -273
  290. mlrun/kfpops.py +0 -868
  291. mlrun/model_monitoring/application.py +0 -310
  292. mlrun/model_monitoring/batch.py +0 -1095
  293. mlrun/model_monitoring/prometheus.py +0 -219
  294. mlrun/model_monitoring/stores/__init__.py +0 -111
  295. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
  296. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  297. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  298. mlrun/model_monitoring/stores/models/base.py +0 -84
  299. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  300. mlrun/platforms/other.py +0 -306
  301. mlrun-1.6.4rc8.dist-info/METADATA +0 -272
  302. mlrun-1.6.4rc8.dist-info/RECORD +0 -314
  303. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
  304. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
  305. {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
@@ -15,9 +15,8 @@ import copy
15
15
  import importlib.util
16
16
  import pathlib
17
17
  import sys
18
- import typing
19
18
  from datetime import datetime
20
- from typing import Any, Dict, List, Optional, Union
19
+ from typing import Any, Optional, Union
21
20
 
22
21
  import pandas as pd
23
22
  from deprecated import deprecated
@@ -103,7 +102,7 @@ def get_offline_features(
103
102
  entity_timestamp_column: str = None,
104
103
  target: DataTargetBase = None,
105
104
  run_config: RunConfig = None,
106
- drop_columns: List[str] = None,
105
+ drop_columns: list[str] = None,
107
106
  start_time: Union[str, datetime] = None,
108
107
  end_time: Union[str, datetime] = None,
109
108
  with_indexes: bool = False,
@@ -111,9 +110,10 @@ def get_offline_features(
111
110
  engine: str = None,
112
111
  engine_args: dict = None,
113
112
  query: str = None,
114
- order_by: Union[str, List[str]] = None,
113
+ order_by: Union[str, list[str]] = None,
115
114
  spark_service: str = None,
116
- timestamp_for_filtering: Union[str, Dict[str, str]] = None,
115
+ timestamp_for_filtering: Union[str, dict[str, str]] = None,
116
+ additional_filters: list = None,
117
117
  ):
118
118
  """retrieve offline feature vector results
119
119
 
@@ -137,7 +137,10 @@ def get_offline_features(
137
137
  ]
138
138
  vector = FeatureVector(features=features)
139
139
  resp = get_offline_features(
140
- vector, entity_rows=trades, entity_timestamp_column="time", query="ticker in ['GOOG'] and bid>100"
140
+ vector,
141
+ entity_rows=trades,
142
+ entity_timestamp_column="time",
143
+ query="ticker in ['GOOG'] and bid>100",
141
144
  )
142
145
  print(resp.to_dataframe())
143
146
  print(vector.get_stats_table())
@@ -173,6 +176,13 @@ def get_offline_features(
173
176
  By default, the filter executes on the timestamp_key of each feature set.
174
177
  Note: the time filtering is performed on each feature set before the
175
178
  merge process using start_time and end_time params.
179
+ :param additional_filters: List of additional_filter conditions as tuples.
180
+ Each tuple should be in the format (column_name, operator, value).
181
+ Supported operators: "=", ">=", "<=", ">", "<".
182
+ Example: [("Product", "=", "Computer")]
183
+ For all supported filters, please see:
184
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
185
+
176
186
 
177
187
  """
178
188
  return _get_offline_features(
@@ -192,6 +202,7 @@ def get_offline_features(
192
202
  order_by,
193
203
  spark_service,
194
204
  timestamp_for_filtering,
205
+ additional_filters,
195
206
  )
196
207
 
197
208
 
@@ -201,7 +212,7 @@ def _get_offline_features(
201
212
  entity_timestamp_column: str = None,
202
213
  target: DataTargetBase = None,
203
214
  run_config: RunConfig = None,
204
- drop_columns: List[str] = None,
215
+ drop_columns: list[str] = None,
205
216
  start_time: Union[str, datetime] = None,
206
217
  end_time: Union[str, datetime] = None,
207
218
  with_indexes: bool = False,
@@ -209,15 +220,21 @@ def _get_offline_features(
209
220
  engine: str = None,
210
221
  engine_args: dict = None,
211
222
  query: str = None,
212
- order_by: Union[str, List[str]] = None,
223
+ order_by: Union[str, list[str]] = None,
213
224
  spark_service: str = None,
214
- timestamp_for_filtering: Union[str, Dict[str, str]] = None,
225
+ timestamp_for_filtering: Union[str, dict[str, str]] = None,
226
+ additional_filters=None,
215
227
  ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
216
228
  if entity_rows is None and entity_timestamp_column is not None:
217
229
  raise mlrun.errors.MLRunInvalidArgumentError(
218
230
  "entity_timestamp_column param "
219
231
  "can not be specified without entity_rows param"
220
232
  )
233
+ if isinstance(target, BaseStoreTarget) and not target.support_pandas:
234
+ raise mlrun.errors.MLRunInvalidArgumentError(
235
+ f"get_offline_features does not support targets that do not support pandas engine."
236
+ f" Target kind: {target.kind}"
237
+ )
221
238
 
222
239
  if isinstance(feature_vector, FeatureVector):
223
240
  update_stats = True
@@ -250,6 +267,7 @@ def _get_offline_features(
250
267
  start_time=start_time,
251
268
  end_time=end_time,
252
269
  timestamp_for_filtering=timestamp_for_filtering,
270
+ additional_filters=additional_filters,
253
271
  )
254
272
 
255
273
  merger = merger_engine(feature_vector, **(engine_args or {}))
@@ -265,6 +283,7 @@ def _get_offline_features(
265
283
  update_stats=update_stats,
266
284
  query=query,
267
285
  order_by=order_by,
286
+ additional_filters=additional_filters,
268
287
  )
269
288
 
270
289
 
@@ -280,7 +299,7 @@ def get_online_feature_service(
280
299
  fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
281
300
  impute_policy: dict = None,
282
301
  update_stats: bool = False,
283
- entity_keys: List[str] = None,
302
+ entity_keys: list[str] = None,
284
303
  ):
285
304
  """initialize and return online feature vector service api,
286
305
  returns :py:class:`~mlrun.feature_store.OnlineVectorService`
@@ -308,7 +327,7 @@ def get_online_feature_service(
308
327
 
309
328
  Example::
310
329
 
311
- svc = get_online_feature_service(vector_uri, entity_keys=['ticker'])
330
+ svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
312
331
  try:
313
332
  resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
314
333
  print(resp)
@@ -361,7 +380,7 @@ def _get_online_feature_service(
361
380
  fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
362
381
  impute_policy: dict = None,
363
382
  update_stats: bool = False,
364
- entity_keys: List[str] = None,
383
+ entity_keys: list[str] = None,
365
384
  ) -> OnlineVectorService:
366
385
  if isinstance(feature_vector, FeatureVector):
367
386
  update_stats = True
@@ -413,7 +432,7 @@ def _rename_source_dataframe_columns(df: pd.DataFrame) -> pd.DataFrame:
413
432
  return df
414
433
 
415
434
 
416
- def _get_namespace(run_config: RunConfig) -> Dict[str, Any]:
435
+ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
417
436
  # if running locally, we need to import the file dynamically to get its namespace
418
437
  if run_config and run_config.local and run_config.function:
419
438
  filename = run_config.function.spec.filename
@@ -431,7 +450,7 @@ def _get_namespace(run_config: RunConfig) -> Dict[str, Any]:
431
450
  def ingest(
432
451
  featureset: Union[FeatureSet, str] = None,
433
452
  source=None,
434
- targets: List[DataTargetBase] = None,
453
+ targets: list[DataTargetBase] = None,
435
454
  namespace=None,
436
455
  return_df: bool = True,
437
456
  infer_options: InferOptions = InferOptions.default(),
@@ -457,7 +476,7 @@ def ingest(
457
476
  df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
458
477
 
459
478
  # for running as remote job
460
- config = RunConfig(image='mlrun/mlrun')
479
+ config = RunConfig(image="mlrun/mlrun")
461
480
  df = ingest(stocks_set, stocks, run_config=config)
462
481
 
463
482
  # specify source and targets
@@ -511,7 +530,7 @@ def ingest(
511
530
  def _ingest(
512
531
  featureset: Union[FeatureSet, str] = None,
513
532
  source=None,
514
- targets: List[DataTargetBase] = None,
533
+ targets: list[DataTargetBase] = None,
515
534
  namespace=None,
516
535
  return_df: bool = True,
517
536
  infer_options: InferOptions = InferOptions.default(),
@@ -876,7 +895,7 @@ def _preview(
876
895
  def _run_ingestion_job(
877
896
  featureset: Union[FeatureSet, str],
878
897
  source: DataSource = None,
879
- targets: List[DataTargetBase] = None,
898
+ targets: list[DataTargetBase] = None,
880
899
  name: str = None,
881
900
  infer_options: InferOptions = InferOptions.default(),
882
901
  run_config: RunConfig = None,
@@ -901,11 +920,11 @@ def _run_ingestion_job(
901
920
  def deploy_ingestion_service_v2(
902
921
  featureset: Union[FeatureSet, str],
903
922
  source: DataSource = None,
904
- targets: List[DataTargetBase] = None,
923
+ targets: list[DataTargetBase] = None,
905
924
  name: str = None,
906
925
  run_config: RunConfig = None,
907
926
  verbose=False,
908
- ) -> typing.Tuple[str, BaseRuntime]:
927
+ ) -> tuple[str, BaseRuntime]:
909
928
  """Start real-time ingestion service using nuclio function
910
929
 
911
930
  Deploy a real-time function implementing feature ingestion pipeline
@@ -944,11 +963,11 @@ def deploy_ingestion_service_v2(
944
963
  def _deploy_ingestion_service_v2(
945
964
  featureset: Union[FeatureSet, str],
946
965
  source: DataSource = None,
947
- targets: List[DataTargetBase] = None,
966
+ targets: list[DataTargetBase] = None,
948
967
  name: str = None,
949
968
  run_config: RunConfig = None,
950
969
  verbose=False,
951
- ) -> typing.Tuple[str, BaseRuntime]:
970
+ ) -> tuple[str, BaseRuntime]:
952
971
  if isinstance(featureset, str):
953
972
  featureset = get_feature_set_by_uri(featureset)
954
973
 
@@ -1003,58 +1022,11 @@ def _deploy_ingestion_service_v2(
1003
1022
  return function.deploy(), function
1004
1023
 
1005
1024
 
1006
- @deprecated(
1007
- version="1.5.0",
1008
- reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
1009
- category=FutureWarning,
1010
- )
1011
- def deploy_ingestion_service(
1012
- featureset: Union[FeatureSet, str],
1013
- source: DataSource = None,
1014
- targets: List[DataTargetBase] = None,
1015
- name: str = None,
1016
- run_config: RunConfig = None,
1017
- verbose=False,
1018
- ) -> str:
1019
- """Start real-time ingestion service using nuclio function
1020
-
1021
- Deploy a real-time function implementing feature ingestion pipeline
1022
- the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
1023
-
1024
- the `run_config` parameter allow specifying the function and job configuration,
1025
- see: :py:class:`~mlrun.feature_store.RunConfig`
1026
-
1027
- example::
1028
-
1029
- source = HTTPSource()
1030
- func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
1031
- config = RunConfig(function=func)
1032
- my_set.deploy_ingestion_service(source, run_config=config)
1033
-
1034
- :param featureset: feature set object or uri
1035
- :param source: data source object describing the online or offline source
1036
- :param targets: list of data target objects
1037
- :param name: name for the job/function
1038
- :param run_config: service runtime configuration (function object/uri, resources, etc..)
1039
- :param verbose: verbose log
1040
-
1041
- :return: URL to access the deployed ingestion service
1042
- """
1043
- endpoint, _ = featureset.deploy_ingestion_service(
1044
- source=source,
1045
- targets=targets,
1046
- name=name,
1047
- run_config=run_config,
1048
- verbose=verbose,
1049
- )
1050
- return endpoint
1051
-
1052
-
1053
1025
  def _ingest_with_spark(
1054
1026
  spark=None,
1055
1027
  featureset: Union[FeatureSet, str] = None,
1056
1028
  source: BaseSourceDriver = None,
1057
- targets: List[BaseStoreTarget] = None,
1029
+ targets: list[BaseStoreTarget] = None,
1058
1030
  infer_options: InferOptions = InferOptions.default(),
1059
1031
  mlrun_context=None,
1060
1032
  namespace=None,
@@ -1065,6 +1037,8 @@ def _ingest_with_spark(
1065
1037
  try:
1066
1038
  import pyspark.sql
1067
1039
 
1040
+ from mlrun.datastore.spark_utils import check_special_columns_exists
1041
+
1068
1042
  if spark is None or spark is True:
1069
1043
  # create spark context
1070
1044
 
@@ -1077,13 +1051,13 @@ def _ingest_with_spark(
1077
1051
 
1078
1052
  spark = (
1079
1053
  pyspark.sql.SparkSession.builder.appName(session_name)
1054
+ .config("spark.driver.memory", "2g")
1080
1055
  .config("spark.sql.session.timeZone", "UTC")
1081
1056
  .getOrCreate()
1082
1057
  )
1083
1058
  created_spark_context = True
1084
1059
 
1085
1060
  timestamp_key = featureset.spec.timestamp_key
1086
-
1087
1061
  if isinstance(source, pd.DataFrame):
1088
1062
  df = spark.createDataFrame(source)
1089
1063
  elif isinstance(source, pyspark.sql.DataFrame):
@@ -1113,6 +1087,12 @@ def _ingest_with_spark(
1113
1087
  target = get_target_driver(target, featureset)
1114
1088
  target.set_resource(featureset)
1115
1089
  if featureset.spec.passthrough and target.is_offline:
1090
+ check_special_columns_exists(
1091
+ spark_df=df,
1092
+ entities=featureset.spec.entities,
1093
+ timestamp_key=timestamp_key,
1094
+ label_column=featureset.spec.label_column,
1095
+ )
1116
1096
  continue
1117
1097
  spark_options = target.get_spark_options(
1118
1098
  key_columns, timestamp_key, overwrite
@@ -1122,9 +1102,21 @@ def _ingest_with_spark(
1122
1102
  df_to_write = target.prepare_spark_df(
1123
1103
  df_to_write, key_columns, timestamp_key, spark_options
1124
1104
  )
1105
+ write_format = spark_options.pop("format", None)
1106
+ # We can get to this point if the column exists in different letter cases,
1107
+ # so PySpark will be able to read it, but we still have to raise an exception for it.
1108
+
1109
+ # This check is here and not in to_spark_df because in spark_merger we can have a target
1110
+ # that has different letter cases than the source, like in SnowflakeTarget.
1111
+ check_special_columns_exists(
1112
+ spark_df=df_to_write,
1113
+ entities=featureset.spec.entities,
1114
+ timestamp_key=timestamp_key,
1115
+ label_column=featureset.spec.label_column,
1116
+ )
1125
1117
  if overwrite:
1126
1118
  write_spark_dataframe_with_options(
1127
- spark_options, df_to_write, "overwrite"
1119
+ spark_options, df_to_write, "overwrite", write_format=write_format
1128
1120
  )
1129
1121
  else:
1130
1122
  # appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
@@ -1132,7 +1124,7 @@ def _ingest_with_spark(
1132
1124
  df_to_write.persist()
1133
1125
  if df_to_write.count() > 0:
1134
1126
  write_spark_dataframe_with_options(
1135
- spark_options, df_to_write, "append"
1127
+ spark_options, df_to_write, "append", write_format=write_format
1136
1128
  )
1137
1129
  target.update_resource_status("ready")
1138
1130
 
@@ -1207,7 +1199,7 @@ def _infer_from_static_df(
1207
1199
  def set_task_params(
1208
1200
  featureset: FeatureSet,
1209
1201
  source: DataSource = None,
1210
- targets: List[DataTargetBase] = None,
1202
+ targets: list[DataTargetBase] = None,
1211
1203
  parameters: dict = None,
1212
1204
  infer_options: InferOptions = InferOptions.Null,
1213
1205
  overwrite=None,
@@ -37,17 +37,12 @@ def parse_feature_string(feature):
37
37
  raise mlrun.errors.MLRunInvalidArgumentError(
38
38
  f"feature {feature} must be {expected_message}"
39
39
  )
40
- splitted = feature.split(feature_separator)
41
- if len(splitted) > 2:
42
- raise mlrun.errors.MLRunInvalidArgumentError(
43
- f"feature {feature} must be {expected_message}, cannot have more than one '.'"
44
- )
45
- feature_set = splitted[0]
46
- feature_name = splitted[1]
47
- splitted = feature_name.split(" as ")
48
- if len(splitted) > 1:
49
- return feature_set.strip(), splitted[0].strip(), splitted[1].strip()
50
- return feature_set.strip(), feature_name.strip(), None
40
+ feature_set, feature_name = feature.rsplit(feature_separator, 1)
41
+ feature_set = feature_set.strip()
42
+ split_result = feature_name.split(" as ", 1)
43
+ feature_name = split_result[0].strip()
44
+ alias = split_result[1].strip() if len(split_result) > 1 else None
45
+ return feature_set, feature_name, alias
51
46
 
52
47
 
53
48
  def parse_project_name_from_feature_string(feature):
@@ -192,7 +187,7 @@ class RunConfig:
192
187
  owner=None,
193
188
  credentials: typing.Optional[mlrun.model.Credentials] = None,
194
189
  code: str = None,
195
- requirements: typing.Union[str, typing.List[str]] = None,
190
+ requirements: typing.Union[str, list[str]] = None,
196
191
  extra_spec: dict = None,
197
192
  auth_info=None,
198
193
  ):