mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +40 -122
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +47 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +79 -47
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +74 -1
  13. mlrun/common/db/sql_session.py +5 -5
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +45 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +33 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +12 -3
  23. mlrun/common/model_monitoring/helpers.py +9 -5
  24. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  25. mlrun/common/schemas/__init__.py +31 -5
  26. mlrun/common/schemas/alert.py +202 -0
  27. mlrun/common/schemas/api_gateway.py +196 -0
  28. mlrun/common/schemas/artifact.py +25 -4
  29. mlrun/common/schemas/auth.py +16 -5
  30. mlrun/common/schemas/background_task.py +1 -1
  31. mlrun/common/schemas/client_spec.py +4 -2
  32. mlrun/common/schemas/common.py +7 -4
  33. mlrun/common/schemas/constants.py +3 -0
  34. mlrun/common/schemas/feature_store.py +74 -44
  35. mlrun/common/schemas/frontend_spec.py +15 -7
  36. mlrun/common/schemas/function.py +12 -1
  37. mlrun/common/schemas/hub.py +11 -18
  38. mlrun/common/schemas/memory_reports.py +2 -2
  39. mlrun/common/schemas/model_monitoring/__init__.py +20 -4
  40. mlrun/common/schemas/model_monitoring/constants.py +123 -42
  41. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  42. mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
  43. mlrun/common/schemas/notification.py +71 -14
  44. mlrun/common/schemas/object.py +2 -2
  45. mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
  46. mlrun/common/schemas/pipeline.py +8 -1
  47. mlrun/common/schemas/project.py +69 -18
  48. mlrun/common/schemas/runs.py +7 -1
  49. mlrun/common/schemas/runtime_resource.py +8 -12
  50. mlrun/common/schemas/schedule.py +4 -4
  51. mlrun/common/schemas/tag.py +1 -2
  52. mlrun/common/schemas/workflow.py +12 -4
  53. mlrun/common/types.py +14 -1
  54. mlrun/config.py +154 -69
  55. mlrun/data_types/data_types.py +6 -1
  56. mlrun/data_types/spark.py +2 -2
  57. mlrun/data_types/to_pandas.py +67 -37
  58. mlrun/datastore/__init__.py +6 -8
  59. mlrun/datastore/alibaba_oss.py +131 -0
  60. mlrun/datastore/azure_blob.py +143 -42
  61. mlrun/datastore/base.py +102 -58
  62. mlrun/datastore/datastore.py +34 -13
  63. mlrun/datastore/datastore_profile.py +146 -20
  64. mlrun/datastore/dbfs_store.py +3 -7
  65. mlrun/datastore/filestore.py +1 -4
  66. mlrun/datastore/google_cloud_storage.py +97 -33
  67. mlrun/datastore/hdfs.py +56 -0
  68. mlrun/datastore/inmem.py +6 -3
  69. mlrun/datastore/redis.py +7 -2
  70. mlrun/datastore/s3.py +34 -12
  71. mlrun/datastore/snowflake_utils.py +45 -0
  72. mlrun/datastore/sources.py +303 -111
  73. mlrun/datastore/spark_utils.py +31 -2
  74. mlrun/datastore/store_resources.py +9 -7
  75. mlrun/datastore/storeytargets.py +151 -0
  76. mlrun/datastore/targets.py +453 -176
  77. mlrun/datastore/utils.py +72 -58
  78. mlrun/datastore/v3io.py +6 -1
  79. mlrun/db/base.py +274 -41
  80. mlrun/db/factory.py +1 -1
  81. mlrun/db/httpdb.py +893 -225
  82. mlrun/db/nopdb.py +291 -33
  83. mlrun/errors.py +36 -6
  84. mlrun/execution.py +115 -42
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +65 -73
  87. mlrun/feature_store/common.py +7 -12
  88. mlrun/feature_store/feature_set.py +76 -55
  89. mlrun/feature_store/feature_vector.py +39 -31
  90. mlrun/feature_store/ingestion.py +7 -6
  91. mlrun/feature_store/retrieval/base.py +16 -11
  92. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  93. mlrun/feature_store/retrieval/job.py +13 -4
  94. mlrun/feature_store/retrieval/local_merger.py +2 -0
  95. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  96. mlrun/feature_store/steps.py +45 -34
  97. mlrun/features.py +11 -21
  98. mlrun/frameworks/_common/artifacts_library.py +9 -9
  99. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  100. mlrun/frameworks/_common/model_handler.py +48 -48
  101. mlrun/frameworks/_common/plan.py +5 -6
  102. mlrun/frameworks/_common/producer.py +3 -4
  103. mlrun/frameworks/_common/utils.py +5 -5
  104. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  105. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  106. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  107. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  108. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  109. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  110. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  111. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  112. mlrun/frameworks/_ml_common/plan.py +2 -2
  113. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  114. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  115. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  116. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  117. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  118. mlrun/frameworks/_ml_common/utils.py +4 -4
  119. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  120. mlrun/frameworks/huggingface/model_server.py +4 -4
  121. mlrun/frameworks/lgbm/__init__.py +33 -33
  122. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  123. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  124. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  125. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  126. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  127. mlrun/frameworks/lgbm/model_handler.py +10 -10
  128. mlrun/frameworks/lgbm/model_server.py +6 -6
  129. mlrun/frameworks/lgbm/utils.py +5 -5
  130. mlrun/frameworks/onnx/dataset.py +8 -8
  131. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  132. mlrun/frameworks/onnx/model_handler.py +6 -6
  133. mlrun/frameworks/onnx/model_server.py +7 -7
  134. mlrun/frameworks/parallel_coordinates.py +6 -6
  135. mlrun/frameworks/pytorch/__init__.py +18 -18
  136. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  137. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  138. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  139. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  140. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  141. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  142. mlrun/frameworks/pytorch/model_handler.py +17 -17
  143. mlrun/frameworks/pytorch/model_server.py +7 -7
  144. mlrun/frameworks/sklearn/__init__.py +13 -13
  145. mlrun/frameworks/sklearn/estimator.py +4 -4
  146. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  147. mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
  148. mlrun/frameworks/sklearn/model_handler.py +2 -2
  149. mlrun/frameworks/tf_keras/__init__.py +10 -7
  150. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  151. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  152. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  153. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  154. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  155. mlrun/frameworks/tf_keras/model_server.py +6 -6
  156. mlrun/frameworks/xgboost/__init__.py +13 -13
  157. mlrun/frameworks/xgboost/model_handler.py +6 -6
  158. mlrun/k8s_utils.py +61 -17
  159. mlrun/launcher/__init__.py +1 -1
  160. mlrun/launcher/base.py +16 -15
  161. mlrun/launcher/client.py +13 -11
  162. mlrun/launcher/factory.py +1 -1
  163. mlrun/launcher/local.py +23 -13
  164. mlrun/launcher/remote.py +17 -10
  165. mlrun/lists.py +7 -6
  166. mlrun/model.py +478 -103
  167. mlrun/model_monitoring/__init__.py +1 -1
  168. mlrun/model_monitoring/api.py +163 -371
  169. mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
  170. mlrun/model_monitoring/applications/_application_steps.py +188 -0
  171. mlrun/model_monitoring/applications/base.py +108 -0
  172. mlrun/model_monitoring/applications/context.py +341 -0
  173. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  174. mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
  175. mlrun/model_monitoring/applications/results.py +99 -0
  176. mlrun/model_monitoring/controller.py +131 -278
  177. mlrun/model_monitoring/db/__init__.py +18 -0
  178. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  179. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  180. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  181. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  182. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  183. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  184. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  185. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  186. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  187. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  188. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  189. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  190. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  191. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  192. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  193. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
  194. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  195. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
  196. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  197. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  198. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  199. mlrun/model_monitoring/features_drift_table.py +134 -106
  200. mlrun/model_monitoring/helpers.py +199 -55
  201. mlrun/model_monitoring/metrics/__init__.py +13 -0
  202. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  203. mlrun/model_monitoring/model_endpoint.py +3 -2
  204. mlrun/model_monitoring/stream_processing.py +131 -398
  205. mlrun/model_monitoring/tracking_policy.py +9 -2
  206. mlrun/model_monitoring/writer.py +161 -125
  207. mlrun/package/__init__.py +6 -6
  208. mlrun/package/context_handler.py +5 -5
  209. mlrun/package/packager.py +7 -7
  210. mlrun/package/packagers/default_packager.py +8 -8
  211. mlrun/package/packagers/numpy_packagers.py +15 -15
  212. mlrun/package/packagers/pandas_packagers.py +5 -5
  213. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  214. mlrun/package/packagers_manager.py +19 -23
  215. mlrun/package/utils/_formatter.py +6 -6
  216. mlrun/package/utils/_pickler.py +2 -2
  217. mlrun/package/utils/_supported_format.py +4 -4
  218. mlrun/package/utils/log_hint_utils.py +2 -2
  219. mlrun/package/utils/type_hint_utils.py +4 -9
  220. mlrun/platforms/__init__.py +11 -10
  221. mlrun/platforms/iguazio.py +24 -203
  222. mlrun/projects/operations.py +52 -25
  223. mlrun/projects/pipelines.py +191 -197
  224. mlrun/projects/project.py +1227 -400
  225. mlrun/render.py +16 -19
  226. mlrun/run.py +209 -184
  227. mlrun/runtimes/__init__.py +83 -15
  228. mlrun/runtimes/base.py +51 -35
  229. mlrun/runtimes/daskjob.py +17 -10
  230. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  231. mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
  232. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  233. mlrun/runtimes/funcdoc.py +1 -29
  234. mlrun/runtimes/function_reference.py +1 -1
  235. mlrun/runtimes/kubejob.py +34 -128
  236. mlrun/runtimes/local.py +40 -11
  237. mlrun/runtimes/mpijob/__init__.py +0 -20
  238. mlrun/runtimes/mpijob/abstract.py +9 -10
  239. mlrun/runtimes/mpijob/v1.py +1 -1
  240. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  241. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  242. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  243. mlrun/runtimes/nuclio/application/application.py +758 -0
  244. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  245. mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
  246. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  247. mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
  248. mlrun/runtimes/pod.py +281 -101
  249. mlrun/runtimes/remotesparkjob.py +12 -9
  250. mlrun/runtimes/sparkjob/spark3job.py +67 -51
  251. mlrun/runtimes/utils.py +41 -75
  252. mlrun/secrets.py +9 -5
  253. mlrun/serving/__init__.py +8 -1
  254. mlrun/serving/remote.py +2 -7
  255. mlrun/serving/routers.py +85 -69
  256. mlrun/serving/server.py +69 -44
  257. mlrun/serving/states.py +209 -36
  258. mlrun/serving/utils.py +22 -14
  259. mlrun/serving/v1_serving.py +6 -7
  260. mlrun/serving/v2_serving.py +129 -54
  261. mlrun/track/tracker.py +2 -1
  262. mlrun/track/tracker_manager.py +3 -3
  263. mlrun/track/trackers/mlflow_tracker.py +6 -2
  264. mlrun/utils/async_http.py +6 -8
  265. mlrun/utils/azure_vault.py +1 -1
  266. mlrun/utils/clones.py +1 -2
  267. mlrun/utils/condition_evaluator.py +3 -3
  268. mlrun/utils/db.py +21 -3
  269. mlrun/utils/helpers.py +405 -225
  270. mlrun/utils/http.py +3 -6
  271. mlrun/utils/logger.py +112 -16
  272. mlrun/utils/notifications/notification/__init__.py +17 -13
  273. mlrun/utils/notifications/notification/base.py +50 -2
  274. mlrun/utils/notifications/notification/console.py +2 -0
  275. mlrun/utils/notifications/notification/git.py +24 -1
  276. mlrun/utils/notifications/notification/ipython.py +3 -1
  277. mlrun/utils/notifications/notification/slack.py +96 -21
  278. mlrun/utils/notifications/notification/webhook.py +59 -2
  279. mlrun/utils/notifications/notification_pusher.py +149 -30
  280. mlrun/utils/regex.py +9 -0
  281. mlrun/utils/retryer.py +208 -0
  282. mlrun/utils/singleton.py +1 -1
  283. mlrun/utils/v3io_clients.py +4 -6
  284. mlrun/utils/version/version.json +2 -2
  285. mlrun/utils/version/version.py +2 -6
  286. mlrun-1.7.0.dist-info/METADATA +378 -0
  287. mlrun-1.7.0.dist-info/RECORD +351 -0
  288. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
  289. mlrun/feature_store/retrieval/conversion.py +0 -273
  290. mlrun/kfpops.py +0 -868
  291. mlrun/model_monitoring/application.py +0 -310
  292. mlrun/model_monitoring/batch.py +0 -1095
  293. mlrun/model_monitoring/prometheus.py +0 -219
  294. mlrun/model_monitoring/stores/__init__.py +0 -111
  295. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
  296. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  297. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  298. mlrun/model_monitoring/stores/models/base.py +0 -84
  299. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  300. mlrun/platforms/other.py +0 -306
  301. mlrun-1.6.4rc7.dist-info/METADATA +0 -272
  302. mlrun-1.6.4rc7.dist-info/RECORD +0 -314
  303. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
  304. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
  305. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  import warnings
15
15
  from datetime import datetime
16
- from typing import Dict, List, Optional, Tuple, Union
16
+ from typing import Optional, Union
17
17
 
18
18
  import pandas as pd
19
19
  from storey import EmitEveryEvent, EmitPolicy
@@ -119,9 +119,9 @@ class FeatureSetSpec(ModelObj):
119
119
 
120
120
  self.owner = owner
121
121
  self.description = description
122
- self.entities: List[Union[Entity, str]] = entities or []
123
- self.relations: Dict[str, Union[Entity, str]] = relations or {}
124
- self.features: List[Feature] = features or []
122
+ self.entities: list[Union[Entity, str]] = entities or []
123
+ self.relations: dict[str, Union[Entity, str]] = relations or {}
124
+ self.features: list[Feature] = features or []
125
125
  self.partition_keys = partition_keys or []
126
126
  self.timestamp_key = timestamp_key
127
127
  self.source = source
@@ -136,12 +136,12 @@ class FeatureSetSpec(ModelObj):
136
136
  self.with_default_targets = True
137
137
 
138
138
  @property
139
- def entities(self) -> List[Entity]:
139
+ def entities(self) -> list[Entity]:
140
140
  """feature set entities (indexes)"""
141
141
  return self._entities
142
142
 
143
143
  @entities.setter
144
- def entities(self, entities: List[Union[Entity, str]]):
144
+ def entities(self, entities: list[Union[Entity, str]]):
145
145
  if entities:
146
146
  # if the entity is a string, convert it to Entity class
147
147
  for i, entity in enumerate(entities):
@@ -163,21 +163,21 @@ class FeatureSetSpec(ModelObj):
163
163
  self._entities = ObjectList.from_list(Entity, entities)
164
164
 
165
165
  @property
166
- def features(self) -> List[Feature]:
166
+ def features(self) -> list[Feature]:
167
167
  """feature set features list"""
168
168
  return self._features
169
169
 
170
170
  @features.setter
171
- def features(self, features: List[Feature]):
171
+ def features(self, features: list[Feature]):
172
172
  self._features = ObjectList.from_list(Feature, features)
173
173
 
174
174
  @property
175
- def targets(self) -> List[DataTargetBase]:
175
+ def targets(self) -> list[DataTargetBase]:
176
176
  """list of desired targets (material storage)"""
177
177
  return self._targets
178
178
 
179
179
  @targets.setter
180
- def targets(self, targets: List[DataTargetBase]):
180
+ def targets(self, targets: list[DataTargetBase]):
181
181
  self._targets = ObjectList.from_list(DataTargetBase, targets)
182
182
 
183
183
  @property
@@ -230,12 +230,12 @@ class FeatureSetSpec(ModelObj):
230
230
  self._source = source
231
231
 
232
232
  @property
233
- def relations(self) -> Dict[str, Entity]:
233
+ def relations(self) -> dict[str, Entity]:
234
234
  """feature set relations dict"""
235
235
  return self._relations
236
236
 
237
237
  @relations.setter
238
- def relations(self, relations: Dict[str, Entity]):
238
+ def relations(self, relations: dict[str, Entity]):
239
239
  for col, ent in relations.items():
240
240
  if isinstance(ent, str):
241
241
  relations[col] = Entity(ent)
@@ -284,12 +284,12 @@ class FeatureSetStatus(ModelObj):
284
284
  self.run_uri = run_uri
285
285
 
286
286
  @property
287
- def targets(self) -> List[DataTarget]:
287
+ def targets(self) -> list[DataTarget]:
288
288
  """list of material storage targets + their status/path"""
289
289
  return self._targets
290
290
 
291
291
  @targets.setter
292
- def targets(self, targets: List[DataTarget]):
292
+ def targets(self, targets: list[DataTarget]):
293
293
  self._targets = ObjectList.from_list(DataTarget, targets)
294
294
 
295
295
  def update_target(self, target: DataTarget):
@@ -318,8 +318,6 @@ def emit_policy_to_dict(policy: EmitPolicy):
318
318
 
319
319
 
320
320
  class FeatureSet(ModelObj):
321
- """Feature set object, defines a set of features and their data pipeline"""
322
-
323
321
  kind = mlrun.common.schemas.ObjectKind.feature_set.value
324
322
  _dict_fields = ["kind", "metadata", "spec", "status"]
325
323
 
@@ -327,11 +325,11 @@ class FeatureSet(ModelObj):
327
325
  self,
328
326
  name: str = None,
329
327
  description: str = None,
330
- entities: List[Union[Entity, str]] = None,
328
+ entities: list[Union[Entity, str]] = None,
331
329
  timestamp_key: str = None,
332
330
  engine: str = None,
333
331
  label_column: str = None,
334
- relations: Dict[str, Union[Entity, str]] = None,
332
+ relations: dict[str, Union[Entity, str]] = None,
335
333
  passthrough: bool = None,
336
334
  ):
337
335
  """Feature set object, defines a set of features and their data pipeline
@@ -339,7 +337,10 @@ class FeatureSet(ModelObj):
339
337
  example::
340
338
 
341
339
  import mlrun.feature_store as fstore
342
- ticks = fstore.FeatureSet("ticks", entities=["stock"], timestamp_key="timestamp")
340
+
341
+ ticks = fstore.FeatureSet(
342
+ "ticks", entities=["stock"], timestamp_key="timestamp"
343
+ )
343
344
  ticks.ingest(df)
344
345
 
345
346
  :param name: name of the feature set
@@ -532,7 +533,7 @@ class FeatureSet(ModelObj):
532
533
  self, **(class_args if class_args is not None else {})
533
534
  )
534
535
 
535
- def purge_targets(self, target_names: List[str] = None, silent: bool = False):
536
+ def purge_targets(self, target_names: list[str] = None, silent: bool = False):
536
537
  """Delete data of specific targets
537
538
  :param target_names: List of names of targets to delete (default: delete all ingested targets)
538
539
  :param silent: Fail silently if target doesn't exist in featureset status"""
@@ -560,7 +561,7 @@ class FeatureSet(ModelObj):
560
561
 
561
562
  def update_targets_for_ingest(
562
563
  self,
563
- targets: List[DataTargetBase],
564
+ targets: list[DataTargetBase],
564
565
  overwrite: bool = None,
565
566
  ):
566
567
  if not targets:
@@ -581,7 +582,7 @@ class FeatureSet(ModelObj):
581
582
  update_targets_run_id_for_ingest(overwrite, targets, status_targets)
582
583
 
583
584
  def _reload_and_get_status_targets(
584
- self, target_names: List[str] = None, silent: bool = False
585
+ self, target_names: list[str] = None, silent: bool = False
585
586
  ):
586
587
  try:
587
588
  self.reload(update_spec=False)
@@ -602,9 +603,7 @@ class FeatureSet(ModelObj):
602
603
  pass
603
604
  else:
604
605
  raise mlrun.errors.MLRunNotFoundError(
605
- "Target not found in status (fset={0}, target={1})".format(
606
- self.metadata.name, target_name
607
- )
606
+ f"Target not found in status (fset={self.metadata.name}, target={target_name})"
608
607
  )
609
608
  else:
610
609
  targets = self.status.targets
@@ -621,7 +620,7 @@ class FeatureSet(ModelObj):
621
620
  name: str,
622
621
  value_type: mlrun.data_types.ValueType = None,
623
622
  description: str = None,
624
- labels: Optional[Dict[str, str]] = None,
623
+ labels: Optional[dict[str, str]] = None,
625
624
  ):
626
625
  """add/set an entity (dataset index)
627
626
 
@@ -629,12 +628,12 @@ class FeatureSet(ModelObj):
629
628
 
630
629
  import mlrun.feature_store as fstore
631
630
 
632
- ticks = fstore.FeatureSet("ticks",
633
- entities=["stock"],
634
- timestamp_key="timestamp")
635
- ticks.add_entity("country",
636
- mlrun.data_types.ValueType.STRING,
637
- description="stock country")
631
+ ticks = fstore.FeatureSet(
632
+ "ticks", entities=["stock"], timestamp_key="timestamp"
633
+ )
634
+ ticks.add_entity(
635
+ "country", mlrun.data_types.ValueType.STRING, description="stock country"
636
+ )
638
637
  ticks.add_entity("year", mlrun.data_types.ValueType.INT16)
639
638
  ticks.save()
640
639
 
@@ -654,13 +653,23 @@ class FeatureSet(ModelObj):
654
653
  import mlrun.feature_store as fstore
655
654
  from mlrun.features import Feature
656
655
 
657
- ticks = fstore.FeatureSet("ticks",
658
- entities=["stock"],
659
- timestamp_key="timestamp")
660
- ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.STRING,
661
- description="client consistency"),"ABC01")
662
- ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.FLOAT,
663
- description="client volatility"),"SAB")
656
+ ticks = fstore.FeatureSet(
657
+ "ticks", entities=["stock"], timestamp_key="timestamp"
658
+ )
659
+ ticks.add_feature(
660
+ Feature(
661
+ value_type=mlrun.data_types.ValueType.STRING,
662
+ description="client consistency",
663
+ ),
664
+ "ABC01",
665
+ )
666
+ ticks.add_feature(
667
+ Feature(
668
+ value_type=mlrun.data_types.ValueType.FLOAT,
669
+ description="client volatility",
670
+ ),
671
+ "SAB",
672
+ )
664
673
  ticks.save()
665
674
 
666
675
  :param feature: setting of Feature
@@ -864,15 +873,18 @@ class FeatureSet(ModelObj):
864
873
  example::
865
874
 
866
875
  import mlrun.feature_store as fstore
876
+
867
877
  ...
868
- ticks = fstore.FeatureSet("ticks",
869
- entities=["stock"],
870
- timestamp_key="timestamp")
871
- ticks.add_aggregation(name='priceN',
872
- column='price',
873
- operations=['avg'],
874
- windows=['1d'],
875
- period='1h')
878
+ ticks = fstore.FeatureSet(
879
+ "ticks", entities=["stock"], timestamp_key="timestamp"
880
+ )
881
+ ticks.add_aggregation(
882
+ name="priceN",
883
+ column="price",
884
+ operations=["avg"],
885
+ windows=["1d"],
886
+ period="1h",
887
+ )
876
888
  ticks.plot(rankdir="LR", with_targets=True)
877
889
 
878
890
  :param filename: target filepath for the graph image (None for the notebook)
@@ -905,6 +917,7 @@ class FeatureSet(ModelObj):
905
917
  start_time=None,
906
918
  end_time=None,
907
919
  time_column=None,
920
+ additional_filters=None,
908
921
  **kwargs,
909
922
  ):
910
923
  """return featureset (offline) data as dataframe
@@ -916,6 +929,12 @@ class FeatureSet(ModelObj):
916
929
  :param end_time: filter by end time
917
930
  :param time_column: specify the time column name in the file
918
931
  :param kwargs: additional reader (csv, parquet, ..) args
932
+ :param additional_filters: List of additional_filter conditions as tuples.
933
+ Each tuple should be in the format (column_name, operator, value).
934
+ Supported operators: "=", ">=", "<=", ">", "<".
935
+ Example: [("Product", "=", "Computer")]
936
+ For all supported filters, please see:
937
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
919
938
  :return: DataFrame
920
939
  """
921
940
  entities = list(self.spec.entities.keys())
@@ -934,6 +953,7 @@ class FeatureSet(ModelObj):
934
953
  start_time=start_time,
935
954
  end_time=end_time,
936
955
  time_field=time_column,
956
+ additional_filters=additional_filters,
937
957
  **kwargs,
938
958
  )
939
959
  # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
@@ -953,6 +973,7 @@ class FeatureSet(ModelObj):
953
973
  start_time=start_time,
954
974
  end_time=end_time,
955
975
  time_column=time_column,
976
+ additional_filters=additional_filters,
956
977
  **kwargs,
957
978
  )
958
979
  return result
@@ -983,7 +1004,7 @@ class FeatureSet(ModelObj):
983
1004
  def ingest(
984
1005
  self,
985
1006
  source=None,
986
- targets: List[DataTargetBase] = None,
1007
+ targets: list[DataTargetBase] = None,
987
1008
  namespace=None,
988
1009
  return_df: bool = True,
989
1010
  infer_options: InferOptions = InferOptions.default(),
@@ -1009,7 +1030,7 @@ class FeatureSet(ModelObj):
1009
1030
  df = stocks_set.ingest(stocks, infer_options=fstore.InferOptions.default())
1010
1031
 
1011
1032
  # for running as remote job
1012
- config = RunConfig(image='mlrun/mlrun')
1033
+ config = RunConfig(image="mlrun/mlrun")
1013
1034
  df = ingest(stocks_set, stocks, run_config=config)
1014
1035
 
1015
1036
  # specify source and targets
@@ -1085,11 +1106,11 @@ class FeatureSet(ModelObj):
1085
1106
  def deploy_ingestion_service(
1086
1107
  self,
1087
1108
  source: DataSource = None,
1088
- targets: List[DataTargetBase] = None,
1109
+ targets: list[DataTargetBase] = None,
1089
1110
  name: str = None,
1090
1111
  run_config: RunConfig = None,
1091
1112
  verbose=False,
1092
- ) -> Tuple[str, BaseRuntime]:
1113
+ ) -> tuple[str, BaseRuntime]:
1093
1114
  """Start real-time ingestion service using nuclio function
1094
1115
 
1095
1116
  Deploy a real-time function implementing feature ingestion pipeline
@@ -1122,7 +1143,7 @@ class FeatureSet(ModelObj):
1122
1143
  def extract_relation_keys(
1123
1144
  self,
1124
1145
  other_feature_set,
1125
- relations: Dict[str, Union[str, Entity]] = None,
1146
+ relations: dict[str, Union[str, Entity]] = None,
1126
1147
  ) -> list[str]:
1127
1148
  """
1128
1149
  Checks whether a feature set can be merged to the right of this feature set.
@@ -1189,10 +1210,10 @@ class SparkAggregateByKey(StepToDict):
1189
1210
 
1190
1211
  def __init__(
1191
1212
  self,
1192
- key_columns: List[str],
1213
+ key_columns: list[str],
1193
1214
  time_column: str,
1194
- aggregates: List[Dict],
1195
- emit_policy: Union[EmitPolicy, Dict] = None,
1215
+ aggregates: list[dict],
1216
+ emit_policy: Union[EmitPolicy, dict] = None,
1196
1217
  ):
1197
1218
  self.key_columns = key_columns
1198
1219
  self.time_column = time_column
@@ -17,7 +17,7 @@ import typing
17
17
  from copy import copy
18
18
  from datetime import datetime
19
19
  from enum import Enum
20
- from typing import Dict, List, Union
20
+ from typing import Union
21
21
 
22
22
  import numpy as np
23
23
  import pandas as pd
@@ -69,18 +69,16 @@ class FeatureVectorSpec(ModelObj):
69
69
  self._entity_fields: ObjectList = None
70
70
  self._entity_source: DataSource = None
71
71
  self._function: FunctionReference = None
72
- self._relations: typing.Dict[str, ObjectDict] = None
72
+ self._relations: dict[str, ObjectDict] = None
73
73
  self._join_graph: JoinGraph = None
74
74
 
75
75
  self.description = description
76
- self.features: List[str] = features or []
76
+ self.features: list[str] = features or []
77
77
  self.entity_source = entity_source
78
78
  self.entity_fields = entity_fields or []
79
79
  self.graph = graph
80
80
  self.join_graph = join_graph
81
- self.relations: typing.Dict[str, typing.Dict[str, Union[Entity, str]]] = (
82
- relations or {}
83
- )
81
+ self.relations: dict[str, dict[str, Union[Entity, str]]] = relations or {}
84
82
  self.timestamp_field = timestamp_field
85
83
  self.label_feature = label_feature
86
84
  self.with_indexes = with_indexes
@@ -97,12 +95,12 @@ class FeatureVectorSpec(ModelObj):
97
95
  self._entity_source = self._verify_dict(source, "entity_source", DataSource)
98
96
 
99
97
  @property
100
- def entity_fields(self) -> List[Feature]:
98
+ def entity_fields(self) -> list[Feature]:
101
99
  """the schema/metadata for the entity source fields"""
102
100
  return self._entity_fields
103
101
 
104
102
  @entity_fields.setter
105
- def entity_fields(self, entity_fields: List[Feature]):
103
+ def entity_fields(self, entity_fields: list[Feature]):
106
104
  self._entity_fields = ObjectList.from_list(Feature, entity_fields)
107
105
 
108
106
  @property
@@ -125,14 +123,12 @@ class FeatureVectorSpec(ModelObj):
125
123
  self._function = self._verify_dict(function, "function", FunctionReference)
126
124
 
127
125
  @property
128
- def relations(self) -> typing.Dict[str, ObjectDict]:
126
+ def relations(self) -> dict[str, ObjectDict]:
129
127
  """feature set relations dict"""
130
128
  return self._relations
131
129
 
132
130
  @relations.setter
133
- def relations(
134
- self, relations: typing.Dict[str, typing.Dict[str, Union[Entity, str]]]
135
- ):
131
+ def relations(self, relations: dict[str, dict[str, Union[Entity, str]]]):
136
132
  temp_relations = {}
137
133
  for fs_name, relation in relations.items():
138
134
  for col, ent in relation.items():
@@ -179,29 +175,29 @@ class FeatureVectorStatus(ModelObj):
179
175
  self.stats = stats or {}
180
176
  self.index_keys = index_keys
181
177
  self.preview = preview or []
182
- self.features: List[Feature] = features or []
178
+ self.features: list[Feature] = features or []
183
179
  self.run_uri = run_uri
184
180
  self.timestamp_key = timestamp_key
185
181
 
186
182
  @property
187
- def targets(self) -> List[DataTarget]:
183
+ def targets(self) -> list[DataTarget]:
188
184
  """list of material storage targets + their status/path"""
189
185
  return self._targets
190
186
 
191
187
  @targets.setter
192
- def targets(self, targets: List[DataTarget]):
188
+ def targets(self, targets: list[DataTarget]):
193
189
  self._targets = ObjectList.from_list(DataTarget, targets)
194
190
 
195
191
  def update_target(self, target: DataTarget):
196
192
  self._targets.update(target)
197
193
 
198
194
  @property
199
- def features(self) -> List[Feature]:
195
+ def features(self) -> list[Feature]:
200
196
  """list of features (result of joining features from the source feature sets)"""
201
197
  return self._features
202
198
 
203
199
  @features.setter
204
- def features(self, features: List[Feature]):
200
+ def features(self, features: list[Feature]):
205
201
  self._features = ObjectList.from_list(Feature, features)
206
202
 
207
203
 
@@ -378,7 +374,7 @@ class _JoinStep(ModelObj):
378
374
  name: str = None,
379
375
  left_step_name: str = None,
380
376
  right_step_name: str = None,
381
- left_feature_set_names: Union[str, List[str]] = None,
377
+ left_feature_set_names: Union[str, list[str]] = None,
382
378
  right_feature_set_name: str = None,
383
379
  join_type: str = "inner",
384
380
  asof_join: bool = False,
@@ -388,7 +384,8 @@ class _JoinStep(ModelObj):
388
384
  self.right_step_name = right_step_name
389
385
  self.left_feature_set_names = (
390
386
  left_feature_set_names
391
- if isinstance(left_feature_set_names, list)
387
+ if left_feature_set_names is None
388
+ or isinstance(left_feature_set_names, list)
392
389
  else [left_feature_set_names]
393
390
  )
394
391
  self.right_feature_set_name = right_feature_set_name
@@ -402,7 +399,7 @@ class _JoinStep(ModelObj):
402
399
  self,
403
400
  feature_set_objects: ObjectList,
404
401
  vector,
405
- entity_rows_keys: List[str] = None,
402
+ entity_rows_keys: list[str] = None,
406
403
  ):
407
404
  if feature_set_objects[self.right_feature_set_name].is_connectable_to_df(
408
405
  entity_rows_keys
@@ -482,21 +479,22 @@ class FeatureVector(ModelObj):
482
479
  description=None,
483
480
  with_indexes=None,
484
481
  join_graph: JoinGraph = None,
485
- relations: typing.Dict[str, typing.Dict[str, Union[Entity, str]]] = None,
482
+ relations: dict[str, dict[str, Union[Entity, str]]] = None,
486
483
  ):
487
484
  """Feature vector, specify selected features, their metadata and material views
488
485
 
489
486
  example::
490
487
 
491
488
  import mlrun.feature_store as fstore
489
+
492
490
  features = ["quotes.bid", "quotes.asks_sum_5h as asks_5h", "stocks.*"]
493
491
  vector = fstore.FeatureVector("my-vec", features)
494
492
 
495
493
  # get the vector as a dataframe
496
- df = fstore.get_offline_features(vector).to_dataframe()
494
+ df = vector.get_offline_features().to_dataframe()
497
495
 
498
496
  # return an online/real-time feature service
499
- svc = fstore.get_online_feature_service(vector, impute_policy={"*": "$mean"})
497
+ svc = vector.get_online_feature_service(impute_policy={"*": "$mean"})
500
498
  resp = svc.get([{"stock": "GOOG"}])
501
499
 
502
500
  :param name: List of names of targets to delete (default: delete all ingested targets)
@@ -732,7 +730,7 @@ class FeatureVector(ModelObj):
732
730
  entity_timestamp_column: str = None,
733
731
  target: DataTargetBase = None,
734
732
  run_config: RunConfig = None,
735
- drop_columns: List[str] = None,
733
+ drop_columns: list[str] = None,
736
734
  start_time: Union[str, datetime] = None,
737
735
  end_time: Union[str, datetime] = None,
738
736
  with_indexes: bool = False,
@@ -740,9 +738,10 @@ class FeatureVector(ModelObj):
740
738
  engine: str = None,
741
739
  engine_args: dict = None,
742
740
  query: str = None,
743
- order_by: Union[str, List[str]] = None,
741
+ order_by: Union[str, list[str]] = None,
744
742
  spark_service: str = None,
745
- timestamp_for_filtering: Union[str, Dict[str, str]] = None,
743
+ timestamp_for_filtering: Union[str, dict[str, str]] = None,
744
+ additional_filters: list = None,
746
745
  ):
747
746
  """retrieve offline feature vector results
748
747
 
@@ -799,6 +798,12 @@ class FeatureVector(ModelObj):
799
798
  By default, the filter executes on the timestamp_key of each feature set.
800
799
  Note: the time filtering is performed on each feature set before the
801
800
  merge process using start_time and end_time params.
801
+ :param additional_filters: List of additional_filter conditions as tuples.
802
+ Each tuple should be in the format (column_name, operator, value).
803
+ Supported operators: "=", ">=", "<=", ">", "<".
804
+ Example: [("Product", "=", "Computer")]
805
+ For all supported filters, please see:
806
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
802
807
 
803
808
  """
804
809
 
@@ -819,6 +824,7 @@ class FeatureVector(ModelObj):
819
824
  order_by,
820
825
  spark_service,
821
826
  timestamp_for_filtering,
827
+ additional_filters,
822
828
  )
823
829
 
824
830
  def get_online_feature_service(
@@ -827,7 +833,7 @@ class FeatureVector(ModelObj):
827
833
  fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
828
834
  impute_policy: dict = None,
829
835
  update_stats: bool = False,
830
- entity_keys: List[str] = None,
836
+ entity_keys: list[str] = None,
831
837
  ):
832
838
  """initialize and return online feature vector service api,
833
839
  returns :py:class:`~mlrun.feature_store.OnlineVectorService`
@@ -855,7 +861,7 @@ class FeatureVector(ModelObj):
855
861
 
856
862
  Example::
857
863
 
858
- svc = vector_uri.get_online_feature_service(entity_keys=['ticker'])
864
+ svc = vector_uri.get_online_feature_service(entity_keys=["ticker"])
859
865
  try:
860
866
  resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
861
867
  print(resp)
@@ -910,7 +916,7 @@ class OnlineVectorService:
910
916
  graph,
911
917
  index_columns,
912
918
  impute_policy: dict = None,
913
- requested_columns: List[str] = None,
919
+ requested_columns: list[str] = None,
914
920
  ):
915
921
  self.vector = vector
916
922
  self.impute_policy = impute_policy or {}
@@ -966,7 +972,7 @@ class OnlineVectorService:
966
972
  """vector merger function status (ready, running, error)"""
967
973
  return "ready"
968
974
 
969
- def get(self, entity_rows: List[Union[dict, list]], as_list=False):
975
+ def get(self, entity_rows: list[Union[dict, list]], as_list=False):
970
976
  """get feature vector given the provided entity inputs
971
977
 
972
978
  take a list of input vectors/rows and return a list of enriched feature vectors
@@ -1080,7 +1086,9 @@ class OfflineVectorResponse:
1080
1086
  def to_dataframe(self, to_pandas=True):
1081
1087
  """return result as dataframe"""
1082
1088
  if self.status != "completed":
1083
- raise mlrun.errors.MLRunTaskNotReady("feature vector dataset is not ready")
1089
+ raise mlrun.errors.MLRunTaskNotReadyError(
1090
+ "feature vector dataset is not ready"
1091
+ )
1084
1092
  return self._merger.get_df(to_pandas=to_pandas)
1085
1093
 
1086
1094
  def to_parquet(self, target_path, **kw):
@@ -17,6 +17,7 @@ import uuid
17
17
  import pandas as pd
18
18
 
19
19
  import mlrun
20
+ import mlrun.common.constants as mlrun_constants
20
21
  from mlrun.datastore.sources import get_source_from_dict, get_source_step
21
22
  from mlrun.datastore.targets import (
22
23
  add_target_steps,
@@ -263,13 +264,13 @@ def run_ingestion_job(name, featureset, run_config, schedule=None, spark_service
263
264
  out_path=featureset.spec.output_path,
264
265
  )
265
266
  task.spec.secret_sources = run_config.secret_sources
266
- task.set_label("job-type", "feature-ingest").set_label(
267
- "feature-set", featureset.uri
268
- )
267
+ task.set_label(
268
+ mlrun_constants.MLRunInternalLabels.job_type, "feature-ingest"
269
+ ).set_label("feature-set", featureset.uri)
269
270
  if run_config.owner:
270
- task.set_label("owner", run_config.owner).set_label(
271
- "v3io_user", run_config.owner
272
- )
271
+ task.set_label(
272
+ mlrun_constants.MLRunInternalLabels.owner, run_config.owner
273
+ ).set_label(mlrun_constants.MLRunInternalLabels.v3io_user, run_config.owner)
273
274
 
274
275
  # set run UID and save in the feature set status (linking the features et to the job)
275
276
  task.metadata.uid = uuid.uuid4().hex