mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show
  1. mlrun/__init__.py +26 -22
  2. mlrun/__main__.py +15 -16
  3. mlrun/alerts/alert.py +150 -15
  4. mlrun/api/schemas/__init__.py +1 -9
  5. mlrun/artifacts/__init__.py +2 -3
  6. mlrun/artifacts/base.py +62 -19
  7. mlrun/artifacts/dataset.py +17 -17
  8. mlrun/artifacts/document.py +454 -0
  9. mlrun/artifacts/manager.py +28 -18
  10. mlrun/artifacts/model.py +91 -59
  11. mlrun/artifacts/plots.py +2 -2
  12. mlrun/common/constants.py +8 -0
  13. mlrun/common/formatters/__init__.py +1 -0
  14. mlrun/common/formatters/artifact.py +1 -1
  15. mlrun/common/formatters/feature_set.py +2 -0
  16. mlrun/common/formatters/function.py +1 -0
  17. mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
  18. mlrun/common/formatters/pipeline.py +1 -2
  19. mlrun/common/formatters/project.py +9 -0
  20. mlrun/common/model_monitoring/__init__.py +0 -5
  21. mlrun/common/model_monitoring/helpers.py +12 -62
  22. mlrun/common/runtimes/constants.py +25 -4
  23. mlrun/common/schemas/__init__.py +9 -5
  24. mlrun/common/schemas/alert.py +114 -19
  25. mlrun/common/schemas/api_gateway.py +3 -3
  26. mlrun/common/schemas/artifact.py +22 -9
  27. mlrun/common/schemas/auth.py +8 -4
  28. mlrun/common/schemas/background_task.py +7 -7
  29. mlrun/common/schemas/client_spec.py +4 -4
  30. mlrun/common/schemas/clusterization_spec.py +2 -2
  31. mlrun/common/schemas/common.py +53 -3
  32. mlrun/common/schemas/constants.py +15 -0
  33. mlrun/common/schemas/datastore_profile.py +1 -1
  34. mlrun/common/schemas/feature_store.py +9 -9
  35. mlrun/common/schemas/frontend_spec.py +4 -4
  36. mlrun/common/schemas/function.py +10 -10
  37. mlrun/common/schemas/hub.py +1 -1
  38. mlrun/common/schemas/k8s.py +3 -3
  39. mlrun/common/schemas/memory_reports.py +3 -3
  40. mlrun/common/schemas/model_monitoring/__init__.py +4 -8
  41. mlrun/common/schemas/model_monitoring/constants.py +127 -46
  42. mlrun/common/schemas/model_monitoring/grafana.py +18 -12
  43. mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
  44. mlrun/common/schemas/notification.py +24 -3
  45. mlrun/common/schemas/object.py +1 -1
  46. mlrun/common/schemas/pagination.py +4 -4
  47. mlrun/common/schemas/partition.py +142 -0
  48. mlrun/common/schemas/pipeline.py +3 -3
  49. mlrun/common/schemas/project.py +26 -18
  50. mlrun/common/schemas/runs.py +3 -3
  51. mlrun/common/schemas/runtime_resource.py +5 -5
  52. mlrun/common/schemas/schedule.py +1 -1
  53. mlrun/common/schemas/secret.py +1 -1
  54. mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
  55. mlrun/common/schemas/tag.py +3 -3
  56. mlrun/common/schemas/workflow.py +6 -5
  57. mlrun/common/types.py +1 -0
  58. mlrun/config.py +157 -89
  59. mlrun/data_types/__init__.py +5 -3
  60. mlrun/data_types/infer.py +13 -3
  61. mlrun/data_types/spark.py +2 -1
  62. mlrun/datastore/__init__.py +59 -18
  63. mlrun/datastore/alibaba_oss.py +4 -1
  64. mlrun/datastore/azure_blob.py +4 -1
  65. mlrun/datastore/base.py +19 -24
  66. mlrun/datastore/datastore.py +10 -4
  67. mlrun/datastore/datastore_profile.py +178 -45
  68. mlrun/datastore/dbfs_store.py +4 -1
  69. mlrun/datastore/filestore.py +4 -1
  70. mlrun/datastore/google_cloud_storage.py +4 -1
  71. mlrun/datastore/hdfs.py +4 -1
  72. mlrun/datastore/inmem.py +4 -1
  73. mlrun/datastore/redis.py +4 -1
  74. mlrun/datastore/s3.py +14 -3
  75. mlrun/datastore/sources.py +89 -92
  76. mlrun/datastore/store_resources.py +7 -4
  77. mlrun/datastore/storeytargets.py +51 -16
  78. mlrun/datastore/targets.py +38 -31
  79. mlrun/datastore/utils.py +87 -4
  80. mlrun/datastore/v3io.py +4 -1
  81. mlrun/datastore/vectorstore.py +291 -0
  82. mlrun/datastore/wasbfs/fs.py +13 -12
  83. mlrun/db/base.py +286 -100
  84. mlrun/db/httpdb.py +1562 -490
  85. mlrun/db/nopdb.py +250 -83
  86. mlrun/errors.py +6 -2
  87. mlrun/execution.py +194 -50
  88. mlrun/feature_store/__init__.py +2 -10
  89. mlrun/feature_store/api.py +20 -458
  90. mlrun/feature_store/common.py +9 -9
  91. mlrun/feature_store/feature_set.py +20 -18
  92. mlrun/feature_store/feature_vector.py +105 -479
  93. mlrun/feature_store/feature_vector_utils.py +466 -0
  94. mlrun/feature_store/retrieval/base.py +15 -11
  95. mlrun/feature_store/retrieval/job.py +2 -1
  96. mlrun/feature_store/retrieval/storey_merger.py +1 -1
  97. mlrun/feature_store/steps.py +3 -3
  98. mlrun/features.py +30 -13
  99. mlrun/frameworks/__init__.py +1 -2
  100. mlrun/frameworks/_common/__init__.py +1 -2
  101. mlrun/frameworks/_common/artifacts_library.py +2 -2
  102. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  103. mlrun/frameworks/_common/model_handler.py +31 -31
  104. mlrun/frameworks/_common/producer.py +3 -1
  105. mlrun/frameworks/_dl_common/__init__.py +1 -2
  106. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  107. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  108. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  109. mlrun/frameworks/_ml_common/__init__.py +1 -2
  110. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  111. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  112. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  113. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  114. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  115. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  116. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  117. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  118. mlrun/frameworks/huggingface/__init__.py +1 -2
  119. mlrun/frameworks/huggingface/model_server.py +9 -9
  120. mlrun/frameworks/lgbm/__init__.py +47 -44
  121. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  122. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  123. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  124. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  125. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  126. mlrun/frameworks/lgbm/model_handler.py +15 -11
  127. mlrun/frameworks/lgbm/model_server.py +11 -7
  128. mlrun/frameworks/lgbm/utils.py +2 -2
  129. mlrun/frameworks/onnx/__init__.py +1 -2
  130. mlrun/frameworks/onnx/dataset.py +3 -3
  131. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  132. mlrun/frameworks/onnx/model_handler.py +7 -5
  133. mlrun/frameworks/onnx/model_server.py +8 -6
  134. mlrun/frameworks/parallel_coordinates.py +11 -11
  135. mlrun/frameworks/pytorch/__init__.py +22 -23
  136. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  137. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  138. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  139. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  140. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  141. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  142. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  143. mlrun/frameworks/pytorch/model_handler.py +21 -17
  144. mlrun/frameworks/pytorch/model_server.py +13 -9
  145. mlrun/frameworks/sklearn/__init__.py +19 -18
  146. mlrun/frameworks/sklearn/estimator.py +2 -2
  147. mlrun/frameworks/sklearn/metric.py +3 -3
  148. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  149. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  150. mlrun/frameworks/sklearn/model_handler.py +4 -3
  151. mlrun/frameworks/tf_keras/__init__.py +11 -12
  152. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  153. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  154. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  155. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  156. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  157. mlrun/frameworks/tf_keras/model_server.py +12 -8
  158. mlrun/frameworks/xgboost/__init__.py +19 -18
  159. mlrun/frameworks/xgboost/model_handler.py +13 -9
  160. mlrun/k8s_utils.py +2 -5
  161. mlrun/launcher/base.py +3 -4
  162. mlrun/launcher/client.py +2 -2
  163. mlrun/launcher/local.py +6 -2
  164. mlrun/launcher/remote.py +1 -1
  165. mlrun/lists.py +8 -4
  166. mlrun/model.py +132 -46
  167. mlrun/model_monitoring/__init__.py +3 -5
  168. mlrun/model_monitoring/api.py +113 -98
  169. mlrun/model_monitoring/applications/__init__.py +0 -5
  170. mlrun/model_monitoring/applications/_application_steps.py +81 -50
  171. mlrun/model_monitoring/applications/base.py +467 -14
  172. mlrun/model_monitoring/applications/context.py +212 -134
  173. mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
  174. mlrun/model_monitoring/applications/evidently/base.py +146 -0
  175. mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
  176. mlrun/model_monitoring/applications/results.py +67 -15
  177. mlrun/model_monitoring/controller.py +701 -315
  178. mlrun/model_monitoring/db/__init__.py +0 -2
  179. mlrun/model_monitoring/db/_schedules.py +242 -0
  180. mlrun/model_monitoring/db/_stats.py +189 -0
  181. mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
  182. mlrun/model_monitoring/db/tsdb/base.py +243 -49
  183. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
  184. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  185. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
  187. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  188. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
  189. mlrun/model_monitoring/helpers.py +356 -114
  190. mlrun/model_monitoring/stream_processing.py +190 -345
  191. mlrun/model_monitoring/tracking_policy.py +11 -4
  192. mlrun/model_monitoring/writer.py +49 -90
  193. mlrun/package/__init__.py +3 -6
  194. mlrun/package/context_handler.py +2 -2
  195. mlrun/package/packager.py +12 -9
  196. mlrun/package/packagers/__init__.py +0 -2
  197. mlrun/package/packagers/default_packager.py +14 -11
  198. mlrun/package/packagers/numpy_packagers.py +16 -7
  199. mlrun/package/packagers/pandas_packagers.py +18 -18
  200. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  201. mlrun/package/packagers_manager.py +35 -32
  202. mlrun/package/utils/__init__.py +0 -3
  203. mlrun/package/utils/_pickler.py +6 -6
  204. mlrun/platforms/__init__.py +47 -16
  205. mlrun/platforms/iguazio.py +4 -1
  206. mlrun/projects/operations.py +30 -30
  207. mlrun/projects/pipelines.py +116 -47
  208. mlrun/projects/project.py +1292 -329
  209. mlrun/render.py +5 -9
  210. mlrun/run.py +57 -14
  211. mlrun/runtimes/__init__.py +1 -3
  212. mlrun/runtimes/base.py +30 -22
  213. mlrun/runtimes/daskjob.py +9 -9
  214. mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
  215. mlrun/runtimes/function_reference.py +5 -2
  216. mlrun/runtimes/generators.py +3 -2
  217. mlrun/runtimes/kubejob.py +6 -7
  218. mlrun/runtimes/mounts.py +574 -0
  219. mlrun/runtimes/mpijob/__init__.py +0 -2
  220. mlrun/runtimes/mpijob/abstract.py +7 -6
  221. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  222. mlrun/runtimes/nuclio/application/application.py +11 -13
  223. mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
  224. mlrun/runtimes/nuclio/function.py +127 -70
  225. mlrun/runtimes/nuclio/serving.py +105 -37
  226. mlrun/runtimes/pod.py +159 -54
  227. mlrun/runtimes/remotesparkjob.py +3 -2
  228. mlrun/runtimes/sparkjob/__init__.py +0 -2
  229. mlrun/runtimes/sparkjob/spark3job.py +22 -12
  230. mlrun/runtimes/utils.py +7 -6
  231. mlrun/secrets.py +2 -2
  232. mlrun/serving/__init__.py +8 -0
  233. mlrun/serving/merger.py +7 -5
  234. mlrun/serving/remote.py +35 -22
  235. mlrun/serving/routers.py +186 -240
  236. mlrun/serving/server.py +41 -10
  237. mlrun/serving/states.py +432 -118
  238. mlrun/serving/utils.py +13 -2
  239. mlrun/serving/v1_serving.py +3 -2
  240. mlrun/serving/v2_serving.py +161 -203
  241. mlrun/track/__init__.py +1 -1
  242. mlrun/track/tracker.py +2 -2
  243. mlrun/track/trackers/mlflow_tracker.py +6 -5
  244. mlrun/utils/async_http.py +35 -22
  245. mlrun/utils/clones.py +7 -4
  246. mlrun/utils/helpers.py +511 -58
  247. mlrun/utils/logger.py +119 -13
  248. mlrun/utils/notifications/notification/__init__.py +22 -19
  249. mlrun/utils/notifications/notification/base.py +39 -15
  250. mlrun/utils/notifications/notification/console.py +6 -6
  251. mlrun/utils/notifications/notification/git.py +11 -11
  252. mlrun/utils/notifications/notification/ipython.py +10 -9
  253. mlrun/utils/notifications/notification/mail.py +176 -0
  254. mlrun/utils/notifications/notification/slack.py +16 -8
  255. mlrun/utils/notifications/notification/webhook.py +24 -8
  256. mlrun/utils/notifications/notification_pusher.py +191 -200
  257. mlrun/utils/regex.py +12 -2
  258. mlrun/utils/version/version.json +2 -2
  259. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
  260. mlrun-1.8.0.dist-info/RECORD +351 -0
  261. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
  262. mlrun/model_monitoring/applications/evidently_base.py +0 -137
  263. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  264. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  265. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  266. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  267. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  268. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  269. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  270. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  271. mlrun/model_monitoring/model_endpoint.py +0 -118
  272. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  273. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
  274. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
  275. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import copy
15
16
  import importlib.util
16
17
  import pathlib
@@ -19,7 +20,6 @@ from datetime import datetime
19
20
  from typing import Any, Optional, Union
20
21
 
21
22
  import pandas as pd
22
- from deprecated import deprecated
23
23
 
24
24
  import mlrun
25
25
  import mlrun.errors
@@ -47,362 +47,20 @@ from .common import (
47
47
  get_feature_vector_by_uri,
48
48
  verify_feature_set_exists,
49
49
  verify_feature_set_permissions,
50
- verify_feature_vector_permissions,
51
50
  )
52
51
  from .feature_set import FeatureSet
53
- from .feature_vector import (
54
- FeatureVector,
55
- FixedWindowType,
56
- OfflineVectorResponse,
57
- OnlineVectorService,
58
- )
59
52
  from .ingestion import (
60
53
  context_to_ingestion_params,
61
54
  init_featureset_graph,
62
55
  run_ingestion_job,
63
56
  run_spark_graph,
64
57
  )
65
- from .retrieval import RemoteVectorResponse, get_merger, run_merge_job
66
58
 
67
59
  _v3iofs = None
68
60
  spark_transform_handler = "transform"
69
61
  _TRANS_TABLE = str.maketrans({" ": "_", "(": "", ")": ""})
70
62
 
71
63
 
72
- def _features_to_vector_and_check_permissions(features, update_stats):
73
- if isinstance(features, str):
74
- vector = get_feature_vector_by_uri(features, update=update_stats)
75
- elif isinstance(features, FeatureVector):
76
- vector = features
77
- if not vector.metadata.name:
78
- raise mlrun.errors.MLRunInvalidArgumentError(
79
- "feature vector name must be specified"
80
- )
81
- verify_feature_vector_permissions(
82
- vector, mlrun.common.schemas.AuthorizationAction.update
83
- )
84
-
85
- vector.save()
86
- else:
87
- raise mlrun.errors.MLRunInvalidArgumentError(
88
- f"illegal features value/type ({type(features)})"
89
- )
90
- return vector
91
-
92
-
93
- @deprecated(
94
- version="1.6.0",
95
- reason="get_offline_features() will be removed in 1.8.0, please instead use "
96
- "get_feature_vector('store://feature_vector_name').get_offline_features()",
97
- category=FutureWarning,
98
- )
99
- def get_offline_features(
100
- feature_vector: Union[str, FeatureVector],
101
- entity_rows=None,
102
- entity_timestamp_column: str = None,
103
- target: DataTargetBase = None,
104
- run_config: RunConfig = None,
105
- drop_columns: list[str] = None,
106
- start_time: Union[str, datetime] = None,
107
- end_time: Union[str, datetime] = None,
108
- with_indexes: bool = False,
109
- update_stats: bool = False,
110
- engine: str = None,
111
- engine_args: dict = None,
112
- query: str = None,
113
- order_by: Union[str, list[str]] = None,
114
- spark_service: str = None,
115
- timestamp_for_filtering: Union[str, dict[str, str]] = None,
116
- additional_filters: list = None,
117
- ):
118
- """retrieve offline feature vector results
119
-
120
- specify a feature vector object/uri and retrieve the desired features, their metadata
121
- and statistics. returns :py:class:`~mlrun.feature_store.OfflineVectorResponse`,
122
- results can be returned as a dataframe or written to a target
123
-
124
- The start_time and end_time attributes allow filtering the data to a given time range, they accept
125
- string values or pandas `Timestamp` objects, string values can also be relative, for example:
126
- "now", "now - 1d2h", "now+5m", where a valid pandas Timedelta string follows the verb "now",
127
- for time alignment you can use the verb "floor" e.g. "now -1d floor 1H" will align the time to the last hour
128
- (the floor string is passed to pandas.Timestamp.floor(), can use D, H, T, S for day, hour, min, sec alignment).
129
- Another option to filter the data is by the `query` argument - can be seen in the example.
130
- example::
131
-
132
- features = [
133
- "stock-quotes.bid",
134
- "stock-quotes.asks_sum_5h",
135
- "stock-quotes.ask as mycol",
136
- "stocks.*",
137
- ]
138
- vector = FeatureVector(features=features)
139
- resp = get_offline_features(
140
- vector,
141
- entity_rows=trades,
142
- entity_timestamp_column="time",
143
- query="ticker in ['GOOG'] and bid>100",
144
- )
145
- print(resp.to_dataframe())
146
- print(vector.get_stats_table())
147
- resp.to_parquet("./out.parquet")
148
-
149
- :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires
150
- update permissions
151
- :param entity_rows: dataframe with entity rows to join with
152
- :param target: where to write the results to
153
- :param drop_columns: list of columns to drop from the final result
154
- :param entity_timestamp_column: timestamp column name in the entity rows dataframe. can be specified
155
- only if param entity_rows was specified.
156
- :param run_config: function and/or run configuration
157
- see :py:class:`~mlrun.feature_store.RunConfig`
158
- :param start_time: datetime, low limit of time needed to be filtered. Optional.
159
- :param end_time: datetime, high limit of time needed to be filtered. Optional.
160
- :param with_indexes: Return vector with/without the entities and the timestamp_key of the feature sets
161
- and with/without entity_timestamp_column and timestamp_for_filtering columns.
162
- This property can be specified also in the feature vector spec
163
- (feature_vector.spec.with_indexes)
164
- (default False)
165
- :param update_stats: update features statistics from the requested feature sets on the vector.
166
- (default False).
167
- :param engine: processing engine kind ("local", "dask", or "spark")
168
- :param engine_args: kwargs for the processing engine
169
- :param query: The query string used to filter rows on the output
170
- :param spark_service: Name of the spark service to be used (when using a remote-spark runtime)
171
- :param order_by: Name or list of names to order by. The name or the names in the list can be the
172
- feature name or the alias of the feature you pass in the feature list.
173
- :param timestamp_for_filtering: name of the column to filter by, can be str for all the feature sets or a
174
- dictionary ({<feature set name>: <timestamp column name>, ...})
175
- that indicates the timestamp column name for each feature set. Optional.
176
- By default, the filter executes on the timestamp_key of each feature set.
177
- Note: the time filtering is performed on each feature set before the
178
- merge process using start_time and end_time params.
179
- :param additional_filters: List of additional_filter conditions as tuples.
180
- Each tuple should be in the format (column_name, operator, value).
181
- Supported operators: "=", ">=", "<=", ">", "<".
182
- Example: [("Product", "=", "Computer")]
183
- For all supported filters, please see:
184
- https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
185
-
186
-
187
- """
188
- return _get_offline_features(
189
- feature_vector,
190
- entity_rows,
191
- entity_timestamp_column,
192
- target,
193
- run_config,
194
- drop_columns,
195
- start_time,
196
- end_time,
197
- with_indexes,
198
- update_stats,
199
- engine,
200
- engine_args,
201
- query,
202
- order_by,
203
- spark_service,
204
- timestamp_for_filtering,
205
- additional_filters,
206
- )
207
-
208
-
209
- def _get_offline_features(
210
- feature_vector: Union[str, FeatureVector],
211
- entity_rows=None,
212
- entity_timestamp_column: str = None,
213
- target: DataTargetBase = None,
214
- run_config: RunConfig = None,
215
- drop_columns: list[str] = None,
216
- start_time: Union[str, datetime] = None,
217
- end_time: Union[str, datetime] = None,
218
- with_indexes: bool = False,
219
- update_stats: bool = False,
220
- engine: str = None,
221
- engine_args: dict = None,
222
- query: str = None,
223
- order_by: Union[str, list[str]] = None,
224
- spark_service: str = None,
225
- timestamp_for_filtering: Union[str, dict[str, str]] = None,
226
- additional_filters=None,
227
- ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
228
- if entity_rows is None and entity_timestamp_column is not None:
229
- raise mlrun.errors.MLRunInvalidArgumentError(
230
- "entity_timestamp_column param "
231
- "can not be specified without entity_rows param"
232
- )
233
- if isinstance(target, BaseStoreTarget) and not target.support_pandas:
234
- raise mlrun.errors.MLRunInvalidArgumentError(
235
- f"get_offline_features does not support targets that do not support pandas engine."
236
- f" Target kind: {target.kind}"
237
- )
238
-
239
- if isinstance(feature_vector, FeatureVector):
240
- update_stats = True
241
-
242
- feature_vector = _features_to_vector_and_check_permissions(
243
- feature_vector, update_stats
244
- )
245
-
246
- entity_timestamp_column = (
247
- entity_timestamp_column or feature_vector.spec.timestamp_field
248
- )
249
-
250
- merger_engine = get_merger(engine)
251
-
252
- if run_config and not run_config.local:
253
- return run_merge_job(
254
- feature_vector,
255
- target,
256
- merger_engine,
257
- engine,
258
- engine_args,
259
- spark_service,
260
- entity_rows,
261
- entity_timestamp_column=entity_timestamp_column,
262
- run_config=run_config,
263
- drop_columns=drop_columns,
264
- with_indexes=with_indexes,
265
- query=query,
266
- order_by=order_by,
267
- start_time=start_time,
268
- end_time=end_time,
269
- timestamp_for_filtering=timestamp_for_filtering,
270
- additional_filters=additional_filters,
271
- )
272
-
273
- merger = merger_engine(feature_vector, **(engine_args or {}))
274
- return merger.start(
275
- entity_rows,
276
- entity_timestamp_column,
277
- target=target,
278
- drop_columns=drop_columns,
279
- start_time=start_time,
280
- end_time=end_time,
281
- timestamp_for_filtering=timestamp_for_filtering,
282
- with_indexes=with_indexes,
283
- update_stats=update_stats,
284
- query=query,
285
- order_by=order_by,
286
- additional_filters=additional_filters,
287
- )
288
-
289
-
290
- @deprecated(
291
- version="1.6.0",
292
- reason="get_online_feature_service() will be removed in 1.8.0, please instead use "
293
- "get_feature_vector('store://feature_vector_name').get_online_feature_service()",
294
- category=FutureWarning,
295
- )
296
- def get_online_feature_service(
297
- feature_vector: Union[str, FeatureVector],
298
- run_config: RunConfig = None,
299
- fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
300
- impute_policy: dict = None,
301
- update_stats: bool = False,
302
- entity_keys: list[str] = None,
303
- ):
304
- """initialize and return online feature vector service api,
305
- returns :py:class:`~mlrun.feature_store.OnlineVectorService`
306
-
307
- :**usage**:
308
- There are two ways to use the function:
309
-
310
- 1. As context manager
311
-
312
- Example::
313
-
314
- with get_online_feature_service(vector_uri) as svc:
315
- resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
316
- print(resp)
317
- resp = svc.get([{"ticker": "AAPL"}], as_list=True)
318
- print(resp)
319
-
320
- Example with imputing::
321
-
322
- with get_online_feature_service(vector_uri, entity_keys=['id'],
323
- impute_policy={"*": "$mean", "amount": 0)) as svc:
324
- resp = svc.get([{"id": "C123487"}])
325
-
326
- 2. as simple function, note that in that option you need to close the session.
327
-
328
- Example::
329
-
330
- svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
331
- try:
332
- resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
333
- print(resp)
334
- resp = svc.get([{"ticker": "AAPL"}], as_list=True)
335
- print(resp)
336
-
337
- finally:
338
- svc.close()
339
-
340
- Example with imputing::
341
-
342
- svc = get_online_feature_service(vector_uri, entity_keys=['id'],
343
- impute_policy={"*": "$mean", "amount": 0))
344
- try:
345
- resp = svc.get([{"id": "C123487"}])
346
- except Exception as e:
347
- handling exception...
348
- finally:
349
- svc.close()
350
-
351
- :param feature_vector: feature vector uri or FeatureVector object. passing feature vector obj requires update
352
- permissions.
353
- :param run_config: function and/or run configuration for remote jobs/services
354
- :param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the dict
355
- value indicate which value will be used in case the feature is NaN/empty, the replaced
356
- value can be fixed number for constants or $mean, $max, $min, $std, $count
357
- for statistical
358
- values. "*" is used to specify the default for all features, example: `{"*": "$mean"}`
359
- :param fixed_window_type: determines how to query the fixed window values which were previously inserted by ingest
360
- :param update_stats: update features statistics from the requested feature sets on the vector.
361
- Default: False.
362
- :param entity_keys: Entity list of the first feature_set in the vector.
363
- The indexes that are used to query the online service.
364
- :return: Initialize the `OnlineVectorService`.
365
- Will be used in subclasses where `support_online=True`.
366
- """
367
- return _get_online_feature_service(
368
- feature_vector,
369
- run_config,
370
- fixed_window_type,
371
- impute_policy,
372
- update_stats,
373
- entity_keys,
374
- )
375
-
376
-
377
- def _get_online_feature_service(
378
- feature_vector: Union[str, FeatureVector],
379
- run_config: RunConfig = None,
380
- fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
381
- impute_policy: dict = None,
382
- update_stats: bool = False,
383
- entity_keys: list[str] = None,
384
- ) -> OnlineVectorService:
385
- if isinstance(feature_vector, FeatureVector):
386
- update_stats = True
387
- feature_vector = _features_to_vector_and_check_permissions(
388
- feature_vector, update_stats
389
- )
390
-
391
- # Impute policies rely on statistics in many cases, so verifying that the fvec has stats in it
392
- if impute_policy and not feature_vector.status.stats:
393
- update_stats = True
394
-
395
- engine_args = {"impute_policy": impute_policy}
396
- merger_engine = get_merger("storey")
397
- # todo: support remote service (using remote nuclio/mlrun function if run_config)
398
-
399
- merger = merger_engine(feature_vector, **engine_args)
400
-
401
- return merger.init_online_vector_service(
402
- entity_keys, fixed_window_type, update_stats=update_stats
403
- )
404
-
405
-
406
64
  def norm_column_name(name: str) -> str:
407
65
  """
408
66
  Remove parentheses () and replace whitespaces with an underscore _.
@@ -448,14 +106,14 @@ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
448
106
 
449
107
 
450
108
  def ingest(
109
+ mlrun_context: Union["mlrun.MLrunProject", "mlrun.MLClientCtx"],
451
110
  featureset: Union[FeatureSet, str] = None,
452
111
  source=None,
453
- targets: list[DataTargetBase] = None,
112
+ targets: Optional[list[DataTargetBase]] = None,
454
113
  namespace=None,
455
114
  return_df: bool = True,
456
115
  infer_options: InferOptions = InferOptions.default(),
457
116
  run_config: RunConfig = None,
458
- mlrun_context=None,
459
117
  spark_context=None,
460
118
  overwrite=None,
461
119
  ) -> Optional[pd.DataFrame]:
@@ -484,6 +142,7 @@ def ingest(
484
142
  targets = [CSVTarget("mycsv", path="./mycsv.csv")]
485
143
  ingest(measurements, source, targets)
486
144
 
145
+ :param mlrun_context: mlrun context
487
146
  :param featureset: feature set object or featureset.uri. (uri must be of a feature set that is in the DB,
488
147
  call `.save()` if it's not)
489
148
  :param source: source dataframe or other sources (e.g. parquet source see:
@@ -496,7 +155,6 @@ def ingest(
496
155
  histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
497
156
  :param run_config: function and/or run configuration for remote jobs,
498
157
  see :py:class:`~mlrun.feature_store.RunConfig`
499
- :param mlrun_context: mlrun context (when running as a job), for internal use !
500
158
  :param spark_context: local spark session for spark ingestion, example for creating the spark context:
501
159
  `spark = SparkSession.builder.appName("Spark function").getOrCreate()`
502
160
  For remote spark ingestion, this should contain the remote spark service name
@@ -505,12 +163,9 @@ def ingest(
505
163
  False for scheduled ingest - does not delete the target)
506
164
  :return: if return_df is True, a dataframe will be returned based on the graph
507
165
  """
508
- if mlrun_context is None:
509
- deprecated(
510
- version="1.6.0",
511
- reason="Calling 'ingest' with mlrun_context=None is deprecated and will be removed in 1.8.0,\
512
- use 'FeatureSet.ingest()' instead",
513
- category=FutureWarning,
166
+ if not mlrun_context:
167
+ raise mlrun.errors.MLRunValueError(
168
+ "mlrun_context must be defined when calling ingest()"
514
169
  )
515
170
 
516
171
  return _ingest(
@@ -530,7 +185,7 @@ def ingest(
530
185
  def _ingest(
531
186
  featureset: Union[FeatureSet, str] = None,
532
187
  source=None,
533
- targets: list[DataTargetBase] = None,
188
+ targets: Optional[list[DataTargetBase]] = None,
534
189
  namespace=None,
535
190
  return_df: bool = True,
536
191
  infer_options: InferOptions = InferOptions.default(),
@@ -775,61 +430,14 @@ def _ingest(
775
430
  return df
776
431
 
777
432
 
778
- @deprecated(
779
- version="1.6.0",
780
- reason="'preview' will be removed in 1.8.0, use 'FeatureSet.preview()' instead",
781
- category=FutureWarning,
782
- )
783
- def preview(
784
- featureset: FeatureSet,
785
- source,
786
- entity_columns: list = None,
787
- namespace=None,
788
- options: InferOptions = None,
789
- verbose: bool = False,
790
- sample_size: int = None,
791
- ) -> pd.DataFrame:
792
- """run the ingestion pipeline with local DataFrame/file data and infer features schema and stats
793
-
794
- example::
795
-
796
- quotes_set = FeatureSet("stock-quotes", entities=[Entity("ticker")])
797
- quotes_set.add_aggregation("ask", ["sum", "max"], ["1h", "5h"], "10m")
798
- quotes_set.add_aggregation("bid", ["min", "max"], ["1h"], "10m")
799
- df = preview(
800
- quotes_set,
801
- quotes_df,
802
- entity_columns=["ticker"],
803
- )
804
-
805
- :param featureset: feature set object or uri
806
- :param source: source dataframe or csv/parquet file path
807
- :param entity_columns: list of entity (index) column names
808
- :param namespace: namespace or module containing graph classes
809
- :param options: schema (for discovery of entities, features in featureset), index, stats,
810
- histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
811
- :param verbose: verbose log
812
- :param sample_size: num of rows to sample from the dataset (for large datasets)
813
- """
814
- return _preview(
815
- featureset,
816
- source,
817
- entity_columns,
818
- namespace,
819
- options,
820
- verbose,
821
- sample_size,
822
- )
823
-
824
-
825
433
  def _preview(
826
434
  featureset: FeatureSet,
827
435
  source,
828
- entity_columns: list = None,
436
+ entity_columns: Optional[list] = None,
829
437
  namespace=None,
830
438
  options: InferOptions = None,
831
439
  verbose: bool = False,
832
- sample_size: int = None,
440
+ sample_size: Optional[int] = None,
833
441
  ) -> pd.DataFrame:
834
442
  if isinstance(source, pd.DataFrame):
835
443
  source = _rename_source_dataframe_columns(source)
@@ -895,8 +503,8 @@ def _preview(
895
503
  def _run_ingestion_job(
896
504
  featureset: Union[FeatureSet, str],
897
505
  source: DataSource = None,
898
- targets: list[DataTargetBase] = None,
899
- name: str = None,
506
+ targets: Optional[list[DataTargetBase]] = None,
507
+ name: Optional[str] = None,
900
508
  infer_options: InferOptions = InferOptions.default(),
901
509
  run_config: RunConfig = None,
902
510
  ):
@@ -911,60 +519,11 @@ def _run_ingestion_job(
911
519
  return run_ingestion_job(name, featureset, run_config, source.schedule)
912
520
 
913
521
 
914
- @deprecated(
915
- version="1.6.0",
916
- reason="'deploy_ingestion_service_v2' will be removed in 1.8.0, "
917
- "use 'FeatureSet.deploy_ingestion_service()' instead",
918
- category=FutureWarning,
919
- )
920
- def deploy_ingestion_service_v2(
921
- featureset: Union[FeatureSet, str],
922
- source: DataSource = None,
923
- targets: list[DataTargetBase] = None,
924
- name: str = None,
925
- run_config: RunConfig = None,
926
- verbose=False,
927
- ) -> tuple[str, BaseRuntime]:
928
- """Start real-time ingestion service using nuclio function
929
-
930
- Deploy a real-time function implementing feature ingestion pipeline
931
- the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
932
-
933
- the `run_config` parameter allow specifying the function and job configuration,
934
- see: :py:class:`~mlrun.feature_store.RunConfig`
935
-
936
- example::
937
-
938
- source = HTTPSource()
939
- func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
940
- config = RunConfig(function=func)
941
- deploy_ingestion_service_v2(my_set, source, run_config=config)
942
-
943
- :param featureset: feature set object or uri
944
- :param source: data source object describing the online or offline source
945
- :param targets: list of data target objects
946
- :param name: name for the job/function
947
- :param run_config: service runtime configuration (function object/uri, resources, etc..)
948
- :param verbose: verbose log
949
-
950
- :return: URL to access the deployed ingestion service, and the function that was deployed (which will
951
- differ from the function passed in via the run_config parameter).
952
- """
953
- return _deploy_ingestion_service_v2(
954
- featureset,
955
- source,
956
- targets,
957
- name,
958
- run_config,
959
- verbose,
960
- )
961
-
962
-
963
522
  def _deploy_ingestion_service_v2(
964
523
  featureset: Union[FeatureSet, str],
965
524
  source: DataSource = None,
966
- targets: list[DataTargetBase] = None,
967
- name: str = None,
525
+ targets: Optional[list[DataTargetBase]] = None,
526
+ name: Optional[str] = None,
968
527
  run_config: RunConfig = None,
969
528
  verbose=False,
970
529
  ) -> tuple[str, BaseRuntime]:
@@ -1010,6 +569,9 @@ def _deploy_ingestion_service_v2(
1010
569
  function.metadata.name = function.metadata.name or name
1011
570
 
1012
571
  function.spec.graph = featureset.spec.graph
572
+ function.spec.graph.engine = (
573
+ "async" if featureset.spec.engine == "storey" else "sync"
574
+ )
1013
575
  function.spec.parameters = run_config.parameters
1014
576
  function.spec.graph_initializer = (
1015
577
  "mlrun.feature_store.ingestion.featureset_initializer"
@@ -1026,7 +588,7 @@ def _ingest_with_spark(
1026
588
  spark=None,
1027
589
  featureset: Union[FeatureSet, str] = None,
1028
590
  source: BaseSourceDriver = None,
1029
- targets: list[BaseStoreTarget] = None,
591
+ targets: Optional[list[BaseStoreTarget]] = None,
1030
592
  infer_options: InferOptions = InferOptions.default(),
1031
593
  mlrun_context=None,
1032
594
  namespace=None,
@@ -1199,8 +761,8 @@ def _infer_from_static_df(
1199
761
  def set_task_params(
1200
762
  featureset: FeatureSet,
1201
763
  source: DataSource = None,
1202
- targets: list[DataTargetBase] = None,
1203
- parameters: dict = None,
764
+ targets: Optional[list[DataTargetBase]] = None,
765
+ parameters: Optional[dict] = None,
1204
766
  infer_options: InferOptions = InferOptions.Null,
1205
767
  overwrite=None,
1206
768
  ):
@@ -178,17 +178,17 @@ class RunConfig:
178
178
  def __init__(
179
179
  self,
180
180
  function: typing.Union[str, FunctionReference, BaseRuntime] = None,
181
- local: bool = None,
182
- image: str = None,
183
- kind: str = None,
184
- handler: str = None,
185
- parameters: dict = None,
186
- watch: bool = None,
181
+ local: typing.Optional[bool] = None,
182
+ image: typing.Optional[str] = None,
183
+ kind: typing.Optional[str] = None,
184
+ handler: typing.Optional[str] = None,
185
+ parameters: typing.Optional[dict] = None,
186
+ watch: typing.Optional[bool] = None,
187
187
  owner=None,
188
188
  credentials: typing.Optional[mlrun.model.Credentials] = None,
189
- code: str = None,
190
- requirements: typing.Union[str, list[str]] = None,
191
- extra_spec: dict = None,
189
+ code: typing.Optional[str] = None,
190
+ requirements: typing.Optional[typing.Union[str, list[str]]] = None,
191
+ extra_spec: typing.Optional[dict] = None,
192
192
  auth_info=None,
193
193
  ):
194
194
  """class for holding function and run specs for jobs and serving functions