mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show
  1. mlrun/__init__.py +26 -22
  2. mlrun/__main__.py +15 -16
  3. mlrun/alerts/alert.py +150 -15
  4. mlrun/api/schemas/__init__.py +1 -9
  5. mlrun/artifacts/__init__.py +2 -3
  6. mlrun/artifacts/base.py +62 -19
  7. mlrun/artifacts/dataset.py +17 -17
  8. mlrun/artifacts/document.py +454 -0
  9. mlrun/artifacts/manager.py +28 -18
  10. mlrun/artifacts/model.py +91 -59
  11. mlrun/artifacts/plots.py +2 -2
  12. mlrun/common/constants.py +8 -0
  13. mlrun/common/formatters/__init__.py +1 -0
  14. mlrun/common/formatters/artifact.py +1 -1
  15. mlrun/common/formatters/feature_set.py +2 -0
  16. mlrun/common/formatters/function.py +1 -0
  17. mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
  18. mlrun/common/formatters/pipeline.py +1 -2
  19. mlrun/common/formatters/project.py +9 -0
  20. mlrun/common/model_monitoring/__init__.py +0 -5
  21. mlrun/common/model_monitoring/helpers.py +12 -62
  22. mlrun/common/runtimes/constants.py +25 -4
  23. mlrun/common/schemas/__init__.py +9 -5
  24. mlrun/common/schemas/alert.py +114 -19
  25. mlrun/common/schemas/api_gateway.py +3 -3
  26. mlrun/common/schemas/artifact.py +22 -9
  27. mlrun/common/schemas/auth.py +8 -4
  28. mlrun/common/schemas/background_task.py +7 -7
  29. mlrun/common/schemas/client_spec.py +4 -4
  30. mlrun/common/schemas/clusterization_spec.py +2 -2
  31. mlrun/common/schemas/common.py +53 -3
  32. mlrun/common/schemas/constants.py +15 -0
  33. mlrun/common/schemas/datastore_profile.py +1 -1
  34. mlrun/common/schemas/feature_store.py +9 -9
  35. mlrun/common/schemas/frontend_spec.py +4 -4
  36. mlrun/common/schemas/function.py +10 -10
  37. mlrun/common/schemas/hub.py +1 -1
  38. mlrun/common/schemas/k8s.py +3 -3
  39. mlrun/common/schemas/memory_reports.py +3 -3
  40. mlrun/common/schemas/model_monitoring/__init__.py +4 -8
  41. mlrun/common/schemas/model_monitoring/constants.py +127 -46
  42. mlrun/common/schemas/model_monitoring/grafana.py +18 -12
  43. mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
  44. mlrun/common/schemas/notification.py +24 -3
  45. mlrun/common/schemas/object.py +1 -1
  46. mlrun/common/schemas/pagination.py +4 -4
  47. mlrun/common/schemas/partition.py +142 -0
  48. mlrun/common/schemas/pipeline.py +3 -3
  49. mlrun/common/schemas/project.py +26 -18
  50. mlrun/common/schemas/runs.py +3 -3
  51. mlrun/common/schemas/runtime_resource.py +5 -5
  52. mlrun/common/schemas/schedule.py +1 -1
  53. mlrun/common/schemas/secret.py +1 -1
  54. mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
  55. mlrun/common/schemas/tag.py +3 -3
  56. mlrun/common/schemas/workflow.py +6 -5
  57. mlrun/common/types.py +1 -0
  58. mlrun/config.py +157 -89
  59. mlrun/data_types/__init__.py +5 -3
  60. mlrun/data_types/infer.py +13 -3
  61. mlrun/data_types/spark.py +2 -1
  62. mlrun/datastore/__init__.py +59 -18
  63. mlrun/datastore/alibaba_oss.py +4 -1
  64. mlrun/datastore/azure_blob.py +4 -1
  65. mlrun/datastore/base.py +19 -24
  66. mlrun/datastore/datastore.py +10 -4
  67. mlrun/datastore/datastore_profile.py +178 -45
  68. mlrun/datastore/dbfs_store.py +4 -1
  69. mlrun/datastore/filestore.py +4 -1
  70. mlrun/datastore/google_cloud_storage.py +4 -1
  71. mlrun/datastore/hdfs.py +4 -1
  72. mlrun/datastore/inmem.py +4 -1
  73. mlrun/datastore/redis.py +4 -1
  74. mlrun/datastore/s3.py +14 -3
  75. mlrun/datastore/sources.py +89 -92
  76. mlrun/datastore/store_resources.py +7 -4
  77. mlrun/datastore/storeytargets.py +51 -16
  78. mlrun/datastore/targets.py +38 -31
  79. mlrun/datastore/utils.py +87 -4
  80. mlrun/datastore/v3io.py +4 -1
  81. mlrun/datastore/vectorstore.py +291 -0
  82. mlrun/datastore/wasbfs/fs.py +13 -12
  83. mlrun/db/base.py +286 -100
  84. mlrun/db/httpdb.py +1562 -490
  85. mlrun/db/nopdb.py +250 -83
  86. mlrun/errors.py +6 -2
  87. mlrun/execution.py +194 -50
  88. mlrun/feature_store/__init__.py +2 -10
  89. mlrun/feature_store/api.py +20 -458
  90. mlrun/feature_store/common.py +9 -9
  91. mlrun/feature_store/feature_set.py +20 -18
  92. mlrun/feature_store/feature_vector.py +105 -479
  93. mlrun/feature_store/feature_vector_utils.py +466 -0
  94. mlrun/feature_store/retrieval/base.py +15 -11
  95. mlrun/feature_store/retrieval/job.py +2 -1
  96. mlrun/feature_store/retrieval/storey_merger.py +1 -1
  97. mlrun/feature_store/steps.py +3 -3
  98. mlrun/features.py +30 -13
  99. mlrun/frameworks/__init__.py +1 -2
  100. mlrun/frameworks/_common/__init__.py +1 -2
  101. mlrun/frameworks/_common/artifacts_library.py +2 -2
  102. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  103. mlrun/frameworks/_common/model_handler.py +31 -31
  104. mlrun/frameworks/_common/producer.py +3 -1
  105. mlrun/frameworks/_dl_common/__init__.py +1 -2
  106. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  107. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  108. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  109. mlrun/frameworks/_ml_common/__init__.py +1 -2
  110. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  111. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  112. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  113. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  114. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  115. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  116. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  117. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  118. mlrun/frameworks/huggingface/__init__.py +1 -2
  119. mlrun/frameworks/huggingface/model_server.py +9 -9
  120. mlrun/frameworks/lgbm/__init__.py +47 -44
  121. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  122. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  123. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  124. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  125. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  126. mlrun/frameworks/lgbm/model_handler.py +15 -11
  127. mlrun/frameworks/lgbm/model_server.py +11 -7
  128. mlrun/frameworks/lgbm/utils.py +2 -2
  129. mlrun/frameworks/onnx/__init__.py +1 -2
  130. mlrun/frameworks/onnx/dataset.py +3 -3
  131. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  132. mlrun/frameworks/onnx/model_handler.py +7 -5
  133. mlrun/frameworks/onnx/model_server.py +8 -6
  134. mlrun/frameworks/parallel_coordinates.py +11 -11
  135. mlrun/frameworks/pytorch/__init__.py +22 -23
  136. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  137. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  138. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  139. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  140. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  141. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  142. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  143. mlrun/frameworks/pytorch/model_handler.py +21 -17
  144. mlrun/frameworks/pytorch/model_server.py +13 -9
  145. mlrun/frameworks/sklearn/__init__.py +19 -18
  146. mlrun/frameworks/sklearn/estimator.py +2 -2
  147. mlrun/frameworks/sklearn/metric.py +3 -3
  148. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  149. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  150. mlrun/frameworks/sklearn/model_handler.py +4 -3
  151. mlrun/frameworks/tf_keras/__init__.py +11 -12
  152. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  153. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  154. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  155. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  156. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  157. mlrun/frameworks/tf_keras/model_server.py +12 -8
  158. mlrun/frameworks/xgboost/__init__.py +19 -18
  159. mlrun/frameworks/xgboost/model_handler.py +13 -9
  160. mlrun/k8s_utils.py +2 -5
  161. mlrun/launcher/base.py +3 -4
  162. mlrun/launcher/client.py +2 -2
  163. mlrun/launcher/local.py +6 -2
  164. mlrun/launcher/remote.py +1 -1
  165. mlrun/lists.py +8 -4
  166. mlrun/model.py +132 -46
  167. mlrun/model_monitoring/__init__.py +3 -5
  168. mlrun/model_monitoring/api.py +113 -98
  169. mlrun/model_monitoring/applications/__init__.py +0 -5
  170. mlrun/model_monitoring/applications/_application_steps.py +81 -50
  171. mlrun/model_monitoring/applications/base.py +467 -14
  172. mlrun/model_monitoring/applications/context.py +212 -134
  173. mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
  174. mlrun/model_monitoring/applications/evidently/base.py +146 -0
  175. mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
  176. mlrun/model_monitoring/applications/results.py +67 -15
  177. mlrun/model_monitoring/controller.py +701 -315
  178. mlrun/model_monitoring/db/__init__.py +0 -2
  179. mlrun/model_monitoring/db/_schedules.py +242 -0
  180. mlrun/model_monitoring/db/_stats.py +189 -0
  181. mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
  182. mlrun/model_monitoring/db/tsdb/base.py +243 -49
  183. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
  184. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  185. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
  187. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  188. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
  189. mlrun/model_monitoring/helpers.py +356 -114
  190. mlrun/model_monitoring/stream_processing.py +190 -345
  191. mlrun/model_monitoring/tracking_policy.py +11 -4
  192. mlrun/model_monitoring/writer.py +49 -90
  193. mlrun/package/__init__.py +3 -6
  194. mlrun/package/context_handler.py +2 -2
  195. mlrun/package/packager.py +12 -9
  196. mlrun/package/packagers/__init__.py +0 -2
  197. mlrun/package/packagers/default_packager.py +14 -11
  198. mlrun/package/packagers/numpy_packagers.py +16 -7
  199. mlrun/package/packagers/pandas_packagers.py +18 -18
  200. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  201. mlrun/package/packagers_manager.py +35 -32
  202. mlrun/package/utils/__init__.py +0 -3
  203. mlrun/package/utils/_pickler.py +6 -6
  204. mlrun/platforms/__init__.py +47 -16
  205. mlrun/platforms/iguazio.py +4 -1
  206. mlrun/projects/operations.py +30 -30
  207. mlrun/projects/pipelines.py +116 -47
  208. mlrun/projects/project.py +1292 -329
  209. mlrun/render.py +5 -9
  210. mlrun/run.py +57 -14
  211. mlrun/runtimes/__init__.py +1 -3
  212. mlrun/runtimes/base.py +30 -22
  213. mlrun/runtimes/daskjob.py +9 -9
  214. mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
  215. mlrun/runtimes/function_reference.py +5 -2
  216. mlrun/runtimes/generators.py +3 -2
  217. mlrun/runtimes/kubejob.py +6 -7
  218. mlrun/runtimes/mounts.py +574 -0
  219. mlrun/runtimes/mpijob/__init__.py +0 -2
  220. mlrun/runtimes/mpijob/abstract.py +7 -6
  221. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  222. mlrun/runtimes/nuclio/application/application.py +11 -13
  223. mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
  224. mlrun/runtimes/nuclio/function.py +127 -70
  225. mlrun/runtimes/nuclio/serving.py +105 -37
  226. mlrun/runtimes/pod.py +159 -54
  227. mlrun/runtimes/remotesparkjob.py +3 -2
  228. mlrun/runtimes/sparkjob/__init__.py +0 -2
  229. mlrun/runtimes/sparkjob/spark3job.py +22 -12
  230. mlrun/runtimes/utils.py +7 -6
  231. mlrun/secrets.py +2 -2
  232. mlrun/serving/__init__.py +8 -0
  233. mlrun/serving/merger.py +7 -5
  234. mlrun/serving/remote.py +35 -22
  235. mlrun/serving/routers.py +186 -240
  236. mlrun/serving/server.py +41 -10
  237. mlrun/serving/states.py +432 -118
  238. mlrun/serving/utils.py +13 -2
  239. mlrun/serving/v1_serving.py +3 -2
  240. mlrun/serving/v2_serving.py +161 -203
  241. mlrun/track/__init__.py +1 -1
  242. mlrun/track/tracker.py +2 -2
  243. mlrun/track/trackers/mlflow_tracker.py +6 -5
  244. mlrun/utils/async_http.py +35 -22
  245. mlrun/utils/clones.py +7 -4
  246. mlrun/utils/helpers.py +511 -58
  247. mlrun/utils/logger.py +119 -13
  248. mlrun/utils/notifications/notification/__init__.py +22 -19
  249. mlrun/utils/notifications/notification/base.py +39 -15
  250. mlrun/utils/notifications/notification/console.py +6 -6
  251. mlrun/utils/notifications/notification/git.py +11 -11
  252. mlrun/utils/notifications/notification/ipython.py +10 -9
  253. mlrun/utils/notifications/notification/mail.py +176 -0
  254. mlrun/utils/notifications/notification/slack.py +16 -8
  255. mlrun/utils/notifications/notification/webhook.py +24 -8
  256. mlrun/utils/notifications/notification_pusher.py +191 -200
  257. mlrun/utils/regex.py +12 -2
  258. mlrun/utils/version/version.json +2 -2
  259. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
  260. mlrun-1.8.0.dist-info/RECORD +351 -0
  261. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
  262. mlrun/model_monitoring/applications/evidently_base.py +0 -137
  263. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  264. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  265. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  266. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  267. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  268. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  269. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  270. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  271. mlrun/model_monitoring/model_endpoint.py +0 -118
  272. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  273. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
  274. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
  275. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
@@ -19,18 +19,18 @@ from datetime import datetime
19
19
  from enum import Enum
20
20
  from typing import Union
21
21
 
22
- import numpy as np
23
22
  import pandas as pd
24
23
 
25
24
  import mlrun
26
25
 
27
26
  from ..config import config as mlconf
28
27
  from ..datastore import get_store_uri
29
- from ..datastore.targets import get_offline_target
28
+ from ..datastore.targets import BaseStoreTarget, get_offline_target
30
29
  from ..feature_store.common import (
31
30
  get_feature_set_by_uri,
32
31
  parse_feature_string,
33
32
  parse_project_name_from_feature_string,
33
+ verify_feature_vector_permissions,
34
34
  )
35
35
  from ..feature_store.feature_set import FeatureSet
36
36
  from ..features import Entity, Feature
@@ -47,6 +47,22 @@ from ..runtimes.function_reference import FunctionReference
47
47
  from ..serving.states import RootFlowStep
48
48
  from ..utils import StorePrefix
49
49
  from .common import RunConfig
50
+ from .feature_vector_utils import JoinGraph, OnlineVectorService
51
+ from .retrieval import get_merger, run_merge_job
52
+
53
+
54
+ def _features_to_vector_and_check_permissions(features: "FeatureVector", update_stats):
55
+ vector = features
56
+ if not vector.metadata.name:
57
+ raise mlrun.errors.MLRunInvalidArgumentError(
58
+ "feature vector name must be specified"
59
+ )
60
+ verify_feature_vector_permissions(
61
+ vector, mlrun.common.schemas.AuthorizationAction.update
62
+ )
63
+
64
+ vector.save()
65
+ return vector
50
66
 
51
67
 
52
68
  class FeatureVectorSpec(ModelObj):
@@ -201,251 +217,6 @@ class FeatureVectorStatus(ModelObj):
201
217
  self._features = ObjectList.from_list(Feature, features)
202
218
 
203
219
 
204
- class JoinGraph(ModelObj):
205
- """
206
- explain here about the class
207
- """
208
-
209
- default_graph_name = "$__join_graph_fv__$"
210
- first_join_type = "first"
211
- _dict_fields = ["name", "first_feature_set", "steps"]
212
-
213
- def __init__(
214
- self,
215
- name: str = None,
216
- first_feature_set: Union[str, FeatureSet] = None,
217
- ):
218
- """
219
- JoinGraph is a class that represents a graph of data joins between feature sets. It allows users to define
220
- data joins step by step, specifying the join type for each step. The graph can be used to build a sequence of
221
- joins that will be executed in order, allowing the creation of complex join operations between feature sets.
222
-
223
-
224
- Example:
225
- # Create a new JoinGraph and add steps for joining feature sets.
226
- join_graph = JoinGraph(name="my_join_graph", first_feature_set="featureset1")
227
- join_graph.inner("featureset2")
228
- join_graph.left("featureset3", asof_join=True)
229
-
230
-
231
- :param name: (str, optional) The name of the join graph. If not provided,
232
- a default name will be used.
233
- :param first_feature_set: (str or FeatureSet, optional) The first feature set to join. It can be
234
- specified either as a string representing the name of the feature set or as a
235
- FeatureSet object.
236
- """
237
- self.name = name or self.default_graph_name
238
- self._steps: ObjectList = None
239
- self._feature_sets = None
240
- if first_feature_set:
241
- self._start(first_feature_set)
242
-
243
- def inner(self, other_operand: typing.Union[str, FeatureSet]):
244
- """
245
- Specifies an inner join with the given feature set
246
-
247
- :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
248
-
249
- :return: JoinGraph: The updated JoinGraph object with the specified inner join.
250
- """
251
- return self._join_operands(other_operand, "inner")
252
-
253
- def outer(self, other_operand: typing.Union[str, FeatureSet]):
254
- """
255
- Specifies an outer join with the given feature set
256
-
257
- :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
258
- :return: JoinGraph: The updated JoinGraph object with the specified outer join.
259
- """
260
- return self._join_operands(other_operand, "outer")
261
-
262
- def left(self, other_operand: typing.Union[str, FeatureSet], asof_join):
263
- """
264
- Specifies a left join with the given feature set
265
-
266
- :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
267
- :param asof_join: (bool) A flag indicating whether to perform an as-of join.
268
-
269
- :return: JoinGraph: The updated JoinGraph object with the specified left join.
270
- """
271
- return self._join_operands(other_operand, "left", asof_join=asof_join)
272
-
273
- def right(self, other_operand: typing.Union[str, FeatureSet]):
274
- """
275
- Specifies a right join with the given feature set
276
-
277
- :param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
278
-
279
- :return: JoinGraph: The updated JoinGraph object with the specified right join.
280
- """
281
- return self._join_operands(other_operand, "right")
282
-
283
- def _join_operands(
284
- self,
285
- other_operand: typing.Union[str, FeatureSet],
286
- join_type: str,
287
- asof_join: bool = False,
288
- ):
289
- if isinstance(other_operand, FeatureSet):
290
- other_operand = other_operand.metadata.name
291
-
292
- first_key_num = len(self._steps.keys()) if self._steps else 0
293
- left_last_step_name, left_all_feature_sets = (
294
- self.last_step_name,
295
- self.all_feature_sets_names,
296
- )
297
- is_first_fs = (
298
- join_type == JoinGraph.first_join_type or left_all_feature_sets == self.name
299
- )
300
- # create_new_step
301
- new_step = _JoinStep(
302
- f"step_{first_key_num}",
303
- left_last_step_name if not is_first_fs else "",
304
- other_operand,
305
- left_all_feature_sets if not is_first_fs else [],
306
- other_operand,
307
- join_type,
308
- asof_join,
309
- )
310
-
311
- if self.steps is not None:
312
- self.steps.update(new_step)
313
- else:
314
- self.steps = [new_step]
315
- return self
316
-
317
- def _start(self, other_operand: typing.Union[str, FeatureSet]):
318
- return self._join_operands(other_operand, JoinGraph.first_join_type)
319
-
320
- def _init_all_join_keys(
321
- self, feature_set_objects, vector, entity_rows_keys: list[str] = None
322
- ):
323
- for step in self.steps:
324
- step.init_join_keys(feature_set_objects, vector, entity_rows_keys)
325
-
326
- @property
327
- def all_feature_sets_names(self):
328
- """
329
- Returns a list of all feature set names included in the join graph.
330
-
331
- :return: List[str]: A list of feature set names.
332
- """
333
- if self._steps:
334
- return self._steps[-1].left_feature_set_names + [
335
- self._steps[-1].right_feature_set_name
336
- ]
337
- else:
338
- return self.name
339
-
340
- @property
341
- def last_step_name(self):
342
- """
343
- Returns the name of the last step in the join graph.
344
-
345
- :return: str: The name of the last step.
346
- """
347
- if self._steps:
348
- return self._steps[-1].name
349
- else:
350
- return self.name
351
-
352
- @property
353
- def steps(self):
354
- """
355
- Returns the list of join steps as ObjectList, which can be used to iterate over the steps
356
- or access the properties of each step.
357
- :return: ObjectList: The list of join steps.
358
- """
359
- return self._steps
360
-
361
- @steps.setter
362
- def steps(self, steps):
363
- """
364
- Setter for the steps property. It allows updating the join steps.
365
-
366
- :param steps: (List[_JoinStep]) The list of join steps.
367
- """
368
- self._steps = ObjectList.from_list(child_class=_JoinStep, children=steps)
369
-
370
-
371
- class _JoinStep(ModelObj):
372
- def __init__(
373
- self,
374
- name: str = None,
375
- left_step_name: str = None,
376
- right_step_name: str = None,
377
- left_feature_set_names: Union[str, list[str]] = None,
378
- right_feature_set_name: str = None,
379
- join_type: str = "inner",
380
- asof_join: bool = False,
381
- ):
382
- self.name = name
383
- self.left_step_name = left_step_name
384
- self.right_step_name = right_step_name
385
- self.left_feature_set_names = (
386
- left_feature_set_names
387
- if left_feature_set_names is None
388
- or isinstance(left_feature_set_names, list)
389
- else [left_feature_set_names]
390
- )
391
- self.right_feature_set_name = right_feature_set_name
392
- self.join_type = join_type
393
- self.asof_join = asof_join
394
-
395
- self.left_keys = []
396
- self.right_keys = []
397
-
398
- def init_join_keys(
399
- self,
400
- feature_set_objects: ObjectList,
401
- vector,
402
- entity_rows_keys: list[str] = None,
403
- ):
404
- if feature_set_objects[self.right_feature_set_name].is_connectable_to_df(
405
- entity_rows_keys
406
- ):
407
- self.left_keys, self.right_keys = [
408
- list(
409
- feature_set_objects[
410
- self.right_feature_set_name
411
- ].spec.entities.keys()
412
- )
413
- ] * 2
414
-
415
- if (
416
- self.join_type == JoinGraph.first_join_type
417
- or not self.left_feature_set_names
418
- ):
419
- self.join_type = (
420
- "inner"
421
- if self.join_type == JoinGraph.first_join_type
422
- else self.join_type
423
- )
424
- return
425
-
426
- for left_fset in self.left_feature_set_names:
427
- current_left_keys = feature_set_objects[left_fset].extract_relation_keys(
428
- feature_set_objects[self.right_feature_set_name],
429
- vector.get_feature_set_relations(feature_set_objects[left_fset]),
430
- )
431
- current_right_keys = list(
432
- feature_set_objects[self.right_feature_set_name].spec.entities.keys()
433
- )
434
- for i in range(len(current_left_keys)):
435
- if (
436
- current_left_keys[i] not in self.left_keys
437
- and current_right_keys[i] not in self.right_keys
438
- ):
439
- self.left_keys.append(current_left_keys[i])
440
- self.right_keys.append(current_right_keys[i])
441
-
442
- if not self.left_keys:
443
- raise mlrun.errors.MLRunRuntimeError(
444
- f"{self.name} can't be preform due to undefined relation between "
445
- f"{self.left_feature_set_names} to {self.right_feature_set_name}"
446
- )
447
-
448
-
449
220
  class FixedWindowType(Enum):
450
221
  CurrentOpenWindow = 1
451
222
  LastClosedWindow = 2
@@ -479,7 +250,7 @@ class FeatureVector(ModelObj):
479
250
  description=None,
480
251
  with_indexes=None,
481
252
  join_graph: JoinGraph = None,
482
- relations: dict[str, dict[str, Union[Entity, str]]] = None,
253
+ relations: typing.Optional[dict[str, dict[str, Union[Entity, str]]]] = None,
483
254
  ):
484
255
  """Feature vector, specify selected features, their metadata and material views
485
256
 
@@ -727,21 +498,21 @@ class FeatureVector(ModelObj):
727
498
  def get_offline_features(
728
499
  self,
729
500
  entity_rows=None,
730
- entity_timestamp_column: str = None,
501
+ entity_timestamp_column: typing.Optional[str] = None,
731
502
  target: DataTargetBase = None,
732
503
  run_config: RunConfig = None,
733
- drop_columns: list[str] = None,
734
- start_time: Union[str, datetime] = None,
735
- end_time: Union[str, datetime] = None,
504
+ drop_columns: typing.Optional[list[str]] = None,
505
+ start_time: typing.Optional[Union[str, datetime]] = None,
506
+ end_time: typing.Optional[Union[str, datetime]] = None,
736
507
  with_indexes: bool = False,
737
- update_stats: bool = False,
738
- engine: str = None,
739
- engine_args: dict = None,
740
- query: str = None,
741
- order_by: Union[str, list[str]] = None,
742
- spark_service: str = None,
743
- timestamp_for_filtering: Union[str, dict[str, str]] = None,
744
- additional_filters: list = None,
508
+ update_stats: bool = True,
509
+ engine: typing.Optional[str] = None,
510
+ engine_args: typing.Optional[dict] = None,
511
+ query: typing.Optional[str] = None,
512
+ order_by: typing.Optional[Union[str, list[str]]] = None,
513
+ spark_service: typing.Optional[str] = None,
514
+ timestamp_for_filtering: typing.Optional[Union[str, dict[str, str]]] = None,
515
+ additional_filters: typing.Optional[list] = None,
745
516
  ):
746
517
  """retrieve offline feature vector results
747
518
 
@@ -784,8 +555,9 @@ class FeatureVector(ModelObj):
784
555
  columns. This property can be specified also in the feature vector spec
785
556
  (feature_vector.spec.with_indexes)
786
557
  (default False)
787
- :param update_stats: update features statistics from the requested feature sets on the vector.
788
- (default False).
558
+ :param update_stats: When set to True (default), updates feature statistics from the requested
559
+ feature sets on the vector, which requires 'update' permissions. When set to
560
+ False, uses read-only operations that only require 'read' permissions.
789
561
  :param engine: processing engine kind ("local", "dask", or "spark")
790
562
  :param engine_args: kwargs for the processing engine
791
563
  :param query: The query string used to filter rows on the output
@@ -806,35 +578,79 @@ class FeatureVector(ModelObj):
806
578
  https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
807
579
 
808
580
  """
581
+ if entity_rows is None and entity_timestamp_column is not None:
582
+ raise mlrun.errors.MLRunInvalidArgumentError(
583
+ "entity_timestamp_column param "
584
+ "can not be specified without entity_rows param"
585
+ )
586
+
587
+ if isinstance(target, BaseStoreTarget) and not target.support_pandas:
588
+ raise mlrun.errors.MLRunInvalidArgumentError(
589
+ f"get_offline_features does not support targets that do not support pandas engine."
590
+ f" Target kind: {target.kind}"
591
+ )
592
+
593
+ if update_stats:
594
+ feature_vector = _features_to_vector_and_check_permissions(
595
+ self, update_stats
596
+ )
597
+ else:
598
+ feature_vector = self
599
+ verify_feature_vector_permissions(
600
+ feature_vector, mlrun.common.schemas.AuthorizationAction.read
601
+ )
602
+
603
+ entity_timestamp_column = (
604
+ entity_timestamp_column or feature_vector.spec.timestamp_field
605
+ )
606
+
607
+ merger_engine = get_merger(engine)
608
+
609
+ if run_config and not run_config.local:
610
+ return run_merge_job(
611
+ feature_vector,
612
+ target,
613
+ merger_engine,
614
+ engine,
615
+ engine_args,
616
+ spark_service,
617
+ entity_rows,
618
+ entity_timestamp_column=entity_timestamp_column,
619
+ run_config=run_config,
620
+ drop_columns=drop_columns,
621
+ with_indexes=with_indexes,
622
+ query=query,
623
+ order_by=order_by,
624
+ start_time=start_time,
625
+ end_time=end_time,
626
+ timestamp_for_filtering=timestamp_for_filtering,
627
+ additional_filters=additional_filters,
628
+ )
809
629
 
810
- return mlrun.feature_store.api._get_offline_features(
811
- self,
630
+ merger = merger_engine(feature_vector, **(engine_args or {}))
631
+ return merger.start(
812
632
  entity_rows,
813
633
  entity_timestamp_column,
814
- target,
815
- run_config,
816
- drop_columns,
817
- start_time,
818
- end_time,
819
- with_indexes,
820
- update_stats,
821
- engine,
822
- engine_args,
823
- query,
824
- order_by,
825
- spark_service,
826
- timestamp_for_filtering,
827
- additional_filters,
634
+ target=target,
635
+ drop_columns=drop_columns,
636
+ start_time=start_time,
637
+ end_time=end_time,
638
+ timestamp_for_filtering=timestamp_for_filtering,
639
+ with_indexes=with_indexes,
640
+ update_stats=update_stats,
641
+ query=query,
642
+ order_by=order_by,
643
+ additional_filters=additional_filters,
828
644
  )
829
645
 
830
646
  def get_online_feature_service(
831
647
  self,
832
648
  run_config: RunConfig = None,
833
649
  fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
834
- impute_policy: dict = None,
650
+ impute_policy: typing.Optional[dict] = None,
835
651
  update_stats: bool = False,
836
- entity_keys: list[str] = None,
837
- ):
652
+ entity_keys: typing.Optional[list[str]] = None,
653
+ ) -> OnlineVectorService:
838
654
  """initialize and return online feature vector service api,
839
655
  returns :py:class:`~mlrun.feature_store.OnlineVectorService`
840
656
 
@@ -897,204 +713,14 @@ class FeatureVector(ModelObj):
897
713
  :return: Initialize the `OnlineVectorService`.
898
714
  Will be used in subclasses where `support_online=True`.
899
715
  """
900
- return mlrun.feature_store.api._get_online_feature_service(
901
- self,
902
- run_config,
903
- fixed_window_type,
904
- impute_policy,
905
- update_stats,
906
- entity_keys,
907
- )
908
-
909
-
910
- class OnlineVectorService:
911
- """get_online_feature_service response object"""
912
-
913
- def __init__(
914
- self,
915
- vector,
916
- graph,
917
- index_columns,
918
- impute_policy: dict = None,
919
- requested_columns: list[str] = None,
920
- ):
921
- self.vector = vector
922
- self.impute_policy = impute_policy or {}
923
-
924
- self._controller = graph.controller
925
- self._index_columns = index_columns
926
- self._impute_values = {}
927
- self._requested_columns = requested_columns
928
-
929
- def __enter__(self):
930
- return self
931
-
932
- def __exit__(self, exc_type, exc_val, exc_tb):
933
- self.close()
934
-
935
- def initialize(self):
936
- """internal, init the feature service and prep the imputing logic"""
937
- if not self.impute_policy:
938
- return
939
-
940
- impute_policy = copy(self.impute_policy)
941
- vector = self.vector
942
- feature_stats = vector.get_stats_table()
943
- self._impute_values = {}
944
-
945
- feature_keys = list(vector.status.features.keys())
946
- if vector.status.label_column in feature_keys:
947
- feature_keys.remove(vector.status.label_column)
948
-
949
- if "*" in impute_policy:
950
- value = impute_policy["*"]
951
- del impute_policy["*"]
952
-
953
- for name in feature_keys:
954
- if name not in impute_policy:
955
- if isinstance(value, str) and value.startswith("$"):
956
- self._impute_values[name] = feature_stats.loc[name, value[1:]]
957
- else:
958
- self._impute_values[name] = value
959
-
960
- for name, value in impute_policy.items():
961
- if name not in feature_keys:
962
- raise mlrun.errors.MLRunInvalidArgumentError(
963
- f"feature {name} in impute_policy but not in feature vector"
964
- )
965
- if isinstance(value, str) and value.startswith("$"):
966
- self._impute_values[name] = feature_stats.loc[name, value[1:]]
967
- else:
968
- self._impute_values[name] = value
969
-
970
- @property
971
- def status(self):
972
- """vector merger function status (ready, running, error)"""
973
- return "ready"
974
-
975
- def get(self, entity_rows: list[Union[dict, list]], as_list=False):
976
- """get feature vector given the provided entity inputs
977
-
978
- take a list of input vectors/rows and return a list of enriched feature vectors
979
- each input and/or output vector can be a list of values or a dictionary of field names and values,
980
- to return the vector as a list of values set the `as_list` to True.
981
-
982
- if the input is a list of list (vs a list of dict), the values in the list will correspond to the
983
- index/entity values, i.e. [["GOOG"], ["MSFT"]] means "GOOG" and "MSFT" are the index/entity fields.
984
-
985
- example::
986
-
987
- # accept list of dict, return list of dict
988
- svc = fstore.get_online_feature_service(vector)
989
- resp = svc.get([{"name": "joe"}, {"name": "mike"}])
990
-
991
- # accept list of list, return list of list
992
- svc = fstore.get_online_feature_service(vector, as_list=True)
993
- resp = svc.get([["joe"], ["mike"]])
994
-
995
- :param entity_rows: list of list/dict with input entity data/rows
996
- :param as_list: return a list of list (list input is required by many ML frameworks)
997
- """
998
- results = []
999
- futures = []
1000
- if isinstance(entity_rows, dict):
1001
- entity_rows = [entity_rows]
1002
-
1003
- # validate we have valid input struct
1004
- if (
1005
- not entity_rows
1006
- or not isinstance(entity_rows, list)
1007
- or not isinstance(entity_rows[0], (list, dict))
1008
- ):
1009
- raise mlrun.errors.MLRunInvalidArgumentError(
1010
- f"input data is of type {type(entity_rows)}. must be a list of lists or list of dicts"
1011
- )
716
+ feature_vector = _features_to_vector_and_check_permissions(self, True)
1012
717
 
1013
- # if list of list, convert to dicts (with the index columns as the dict keys)
1014
- if isinstance(entity_rows[0], list):
1015
- if not self._index_columns or len(entity_rows[0]) != len(
1016
- self._index_columns
1017
- ):
1018
- raise mlrun.errors.MLRunInvalidArgumentError(
1019
- "input list must be in the same size of the index_keys list"
1020
- )
1021
- index_range = range(len(self._index_columns))
1022
- entity_rows = [
1023
- {self._index_columns[i]: item[i] for i in index_range}
1024
- for item in entity_rows
1025
- ]
1026
-
1027
- for row in entity_rows:
1028
- futures.append(self._controller.emit(row, return_awaitable_result=True))
1029
-
1030
- for future in futures:
1031
- result = future.await_result()
1032
- data = result.body
1033
- if data:
1034
- actual_columns = data.keys()
1035
- if all([col in self._index_columns for col in actual_columns]):
1036
- # didn't get any data from the graph
1037
- results.append(None)
1038
- continue
1039
- for column in self._requested_columns:
1040
- if (
1041
- column not in actual_columns
1042
- and column != self.vector.status.label_column
1043
- ):
1044
- data[column] = None
1045
-
1046
- if self._impute_values:
1047
- for name in data.keys():
1048
- v = data[name]
1049
- if v is None or (
1050
- isinstance(v, float) and (np.isinf(v) or np.isnan(v))
1051
- ):
1052
- data[name] = self._impute_values.get(name, v)
1053
- if not self.vector.spec.with_indexes:
1054
- for name in self.vector.status.index_keys:
1055
- data.pop(name, None)
1056
- if not any(data.values()):
1057
- data = None
1058
-
1059
- if as_list and data:
1060
- data = [
1061
- data.get(key, None)
1062
- for key in self._requested_columns
1063
- if key != self.vector.status.label_column
1064
- ]
1065
- results.append(data)
1066
-
1067
- return results
1068
-
1069
- def close(self):
1070
- """terminate the async loop"""
1071
- self._controller.terminate()
1072
-
1073
-
1074
- class OfflineVectorResponse:
1075
- """get_offline_features response object"""
1076
-
1077
- def __init__(self, merger):
1078
- self._merger = merger
1079
- self.vector = merger.vector
718
+ engine_args = {"impute_policy": impute_policy}
719
+ merger_engine = get_merger("storey")
720
+ # todo: support remote service (using remote nuclio/mlrun function if run_config)
1080
721
 
1081
- @property
1082
- def status(self):
1083
- """vector prep job status (ready, running, error)"""
1084
- return self._merger.get_status()
1085
-
1086
- def to_dataframe(self, to_pandas=True):
1087
- """return result as dataframe"""
1088
- if self.status != "completed":
1089
- raise mlrun.errors.MLRunTaskNotReadyError(
1090
- "feature vector dataset is not ready"
1091
- )
1092
- return self._merger.get_df(to_pandas=to_pandas)
722
+ merger = merger_engine(feature_vector, **engine_args)
1093
723
 
1094
- def to_parquet(self, target_path, **kw):
1095
- """return results as parquet file"""
1096
- return self._merger.to_parquet(target_path, **kw)
1097
-
1098
- def to_csv(self, target_path, **kw):
1099
- """return results as csv file"""
1100
- return self._merger.to_csv(target_path, **kw)
724
+ return merger.init_online_vector_service(
725
+ entity_keys, fixed_window_type, update_stats=True
726
+ )