mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +134 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +133 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc8.dist-info/METADATA +0 -272
- mlrun-1.6.4rc8.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import warnings
|
|
15
15
|
from datetime import datetime
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Optional, Union
|
|
17
17
|
|
|
18
18
|
import pandas as pd
|
|
19
19
|
from storey import EmitEveryEvent, EmitPolicy
|
|
@@ -119,9 +119,9 @@ class FeatureSetSpec(ModelObj):
|
|
|
119
119
|
|
|
120
120
|
self.owner = owner
|
|
121
121
|
self.description = description
|
|
122
|
-
self.entities:
|
|
123
|
-
self.relations:
|
|
124
|
-
self.features:
|
|
122
|
+
self.entities: list[Union[Entity, str]] = entities or []
|
|
123
|
+
self.relations: dict[str, Union[Entity, str]] = relations or {}
|
|
124
|
+
self.features: list[Feature] = features or []
|
|
125
125
|
self.partition_keys = partition_keys or []
|
|
126
126
|
self.timestamp_key = timestamp_key
|
|
127
127
|
self.source = source
|
|
@@ -136,12 +136,12 @@ class FeatureSetSpec(ModelObj):
|
|
|
136
136
|
self.with_default_targets = True
|
|
137
137
|
|
|
138
138
|
@property
|
|
139
|
-
def entities(self) ->
|
|
139
|
+
def entities(self) -> list[Entity]:
|
|
140
140
|
"""feature set entities (indexes)"""
|
|
141
141
|
return self._entities
|
|
142
142
|
|
|
143
143
|
@entities.setter
|
|
144
|
-
def entities(self, entities:
|
|
144
|
+
def entities(self, entities: list[Union[Entity, str]]):
|
|
145
145
|
if entities:
|
|
146
146
|
# if the entity is a string, convert it to Entity class
|
|
147
147
|
for i, entity in enumerate(entities):
|
|
@@ -163,21 +163,21 @@ class FeatureSetSpec(ModelObj):
|
|
|
163
163
|
self._entities = ObjectList.from_list(Entity, entities)
|
|
164
164
|
|
|
165
165
|
@property
|
|
166
|
-
def features(self) ->
|
|
166
|
+
def features(self) -> list[Feature]:
|
|
167
167
|
"""feature set features list"""
|
|
168
168
|
return self._features
|
|
169
169
|
|
|
170
170
|
@features.setter
|
|
171
|
-
def features(self, features:
|
|
171
|
+
def features(self, features: list[Feature]):
|
|
172
172
|
self._features = ObjectList.from_list(Feature, features)
|
|
173
173
|
|
|
174
174
|
@property
|
|
175
|
-
def targets(self) ->
|
|
175
|
+
def targets(self) -> list[DataTargetBase]:
|
|
176
176
|
"""list of desired targets (material storage)"""
|
|
177
177
|
return self._targets
|
|
178
178
|
|
|
179
179
|
@targets.setter
|
|
180
|
-
def targets(self, targets:
|
|
180
|
+
def targets(self, targets: list[DataTargetBase]):
|
|
181
181
|
self._targets = ObjectList.from_list(DataTargetBase, targets)
|
|
182
182
|
|
|
183
183
|
@property
|
|
@@ -230,12 +230,12 @@ class FeatureSetSpec(ModelObj):
|
|
|
230
230
|
self._source = source
|
|
231
231
|
|
|
232
232
|
@property
|
|
233
|
-
def relations(self) ->
|
|
233
|
+
def relations(self) -> dict[str, Entity]:
|
|
234
234
|
"""feature set relations dict"""
|
|
235
235
|
return self._relations
|
|
236
236
|
|
|
237
237
|
@relations.setter
|
|
238
|
-
def relations(self, relations:
|
|
238
|
+
def relations(self, relations: dict[str, Entity]):
|
|
239
239
|
for col, ent in relations.items():
|
|
240
240
|
if isinstance(ent, str):
|
|
241
241
|
relations[col] = Entity(ent)
|
|
@@ -284,12 +284,12 @@ class FeatureSetStatus(ModelObj):
|
|
|
284
284
|
self.run_uri = run_uri
|
|
285
285
|
|
|
286
286
|
@property
|
|
287
|
-
def targets(self) ->
|
|
287
|
+
def targets(self) -> list[DataTarget]:
|
|
288
288
|
"""list of material storage targets + their status/path"""
|
|
289
289
|
return self._targets
|
|
290
290
|
|
|
291
291
|
@targets.setter
|
|
292
|
-
def targets(self, targets:
|
|
292
|
+
def targets(self, targets: list[DataTarget]):
|
|
293
293
|
self._targets = ObjectList.from_list(DataTarget, targets)
|
|
294
294
|
|
|
295
295
|
def update_target(self, target: DataTarget):
|
|
@@ -318,8 +318,6 @@ def emit_policy_to_dict(policy: EmitPolicy):
|
|
|
318
318
|
|
|
319
319
|
|
|
320
320
|
class FeatureSet(ModelObj):
|
|
321
|
-
"""Feature set object, defines a set of features and their data pipeline"""
|
|
322
|
-
|
|
323
321
|
kind = mlrun.common.schemas.ObjectKind.feature_set.value
|
|
324
322
|
_dict_fields = ["kind", "metadata", "spec", "status"]
|
|
325
323
|
|
|
@@ -327,11 +325,11 @@ class FeatureSet(ModelObj):
|
|
|
327
325
|
self,
|
|
328
326
|
name: str = None,
|
|
329
327
|
description: str = None,
|
|
330
|
-
entities:
|
|
328
|
+
entities: list[Union[Entity, str]] = None,
|
|
331
329
|
timestamp_key: str = None,
|
|
332
330
|
engine: str = None,
|
|
333
331
|
label_column: str = None,
|
|
334
|
-
relations:
|
|
332
|
+
relations: dict[str, Union[Entity, str]] = None,
|
|
335
333
|
passthrough: bool = None,
|
|
336
334
|
):
|
|
337
335
|
"""Feature set object, defines a set of features and their data pipeline
|
|
@@ -339,7 +337,10 @@ class FeatureSet(ModelObj):
|
|
|
339
337
|
example::
|
|
340
338
|
|
|
341
339
|
import mlrun.feature_store as fstore
|
|
342
|
-
|
|
340
|
+
|
|
341
|
+
ticks = fstore.FeatureSet(
|
|
342
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
343
|
+
)
|
|
343
344
|
ticks.ingest(df)
|
|
344
345
|
|
|
345
346
|
:param name: name of the feature set
|
|
@@ -532,7 +533,7 @@ class FeatureSet(ModelObj):
|
|
|
532
533
|
self, **(class_args if class_args is not None else {})
|
|
533
534
|
)
|
|
534
535
|
|
|
535
|
-
def purge_targets(self, target_names:
|
|
536
|
+
def purge_targets(self, target_names: list[str] = None, silent: bool = False):
|
|
536
537
|
"""Delete data of specific targets
|
|
537
538
|
:param target_names: List of names of targets to delete (default: delete all ingested targets)
|
|
538
539
|
:param silent: Fail silently if target doesn't exist in featureset status"""
|
|
@@ -560,7 +561,7 @@ class FeatureSet(ModelObj):
|
|
|
560
561
|
|
|
561
562
|
def update_targets_for_ingest(
|
|
562
563
|
self,
|
|
563
|
-
targets:
|
|
564
|
+
targets: list[DataTargetBase],
|
|
564
565
|
overwrite: bool = None,
|
|
565
566
|
):
|
|
566
567
|
if not targets:
|
|
@@ -581,7 +582,7 @@ class FeatureSet(ModelObj):
|
|
|
581
582
|
update_targets_run_id_for_ingest(overwrite, targets, status_targets)
|
|
582
583
|
|
|
583
584
|
def _reload_and_get_status_targets(
|
|
584
|
-
self, target_names:
|
|
585
|
+
self, target_names: list[str] = None, silent: bool = False
|
|
585
586
|
):
|
|
586
587
|
try:
|
|
587
588
|
self.reload(update_spec=False)
|
|
@@ -602,9 +603,7 @@ class FeatureSet(ModelObj):
|
|
|
602
603
|
pass
|
|
603
604
|
else:
|
|
604
605
|
raise mlrun.errors.MLRunNotFoundError(
|
|
605
|
-
"Target not found in status (fset={
|
|
606
|
-
self.metadata.name, target_name
|
|
607
|
-
)
|
|
606
|
+
f"Target not found in status (fset={self.metadata.name}, target={target_name})"
|
|
608
607
|
)
|
|
609
608
|
else:
|
|
610
609
|
targets = self.status.targets
|
|
@@ -621,7 +620,7 @@ class FeatureSet(ModelObj):
|
|
|
621
620
|
name: str,
|
|
622
621
|
value_type: mlrun.data_types.ValueType = None,
|
|
623
622
|
description: str = None,
|
|
624
|
-
labels: Optional[
|
|
623
|
+
labels: Optional[dict[str, str]] = None,
|
|
625
624
|
):
|
|
626
625
|
"""add/set an entity (dataset index)
|
|
627
626
|
|
|
@@ -629,12 +628,12 @@ class FeatureSet(ModelObj):
|
|
|
629
628
|
|
|
630
629
|
import mlrun.feature_store as fstore
|
|
631
630
|
|
|
632
|
-
ticks = fstore.FeatureSet(
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
ticks.add_entity(
|
|
636
|
-
|
|
637
|
-
|
|
631
|
+
ticks = fstore.FeatureSet(
|
|
632
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
633
|
+
)
|
|
634
|
+
ticks.add_entity(
|
|
635
|
+
"country", mlrun.data_types.ValueType.STRING, description="stock country"
|
|
636
|
+
)
|
|
638
637
|
ticks.add_entity("year", mlrun.data_types.ValueType.INT16)
|
|
639
638
|
ticks.save()
|
|
640
639
|
|
|
@@ -654,13 +653,23 @@ class FeatureSet(ModelObj):
|
|
|
654
653
|
import mlrun.feature_store as fstore
|
|
655
654
|
from mlrun.features import Feature
|
|
656
655
|
|
|
657
|
-
ticks = fstore.FeatureSet(
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
ticks.add_feature(
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
656
|
+
ticks = fstore.FeatureSet(
|
|
657
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
658
|
+
)
|
|
659
|
+
ticks.add_feature(
|
|
660
|
+
Feature(
|
|
661
|
+
value_type=mlrun.data_types.ValueType.STRING,
|
|
662
|
+
description="client consistency",
|
|
663
|
+
),
|
|
664
|
+
"ABC01",
|
|
665
|
+
)
|
|
666
|
+
ticks.add_feature(
|
|
667
|
+
Feature(
|
|
668
|
+
value_type=mlrun.data_types.ValueType.FLOAT,
|
|
669
|
+
description="client volatility",
|
|
670
|
+
),
|
|
671
|
+
"SAB",
|
|
672
|
+
)
|
|
664
673
|
ticks.save()
|
|
665
674
|
|
|
666
675
|
:param feature: setting of Feature
|
|
@@ -864,15 +873,18 @@ class FeatureSet(ModelObj):
|
|
|
864
873
|
example::
|
|
865
874
|
|
|
866
875
|
import mlrun.feature_store as fstore
|
|
876
|
+
|
|
867
877
|
...
|
|
868
|
-
ticks = fstore.FeatureSet(
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
ticks.add_aggregation(
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
878
|
+
ticks = fstore.FeatureSet(
|
|
879
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
880
|
+
)
|
|
881
|
+
ticks.add_aggregation(
|
|
882
|
+
name="priceN",
|
|
883
|
+
column="price",
|
|
884
|
+
operations=["avg"],
|
|
885
|
+
windows=["1d"],
|
|
886
|
+
period="1h",
|
|
887
|
+
)
|
|
876
888
|
ticks.plot(rankdir="LR", with_targets=True)
|
|
877
889
|
|
|
878
890
|
:param filename: target filepath for the graph image (None for the notebook)
|
|
@@ -905,6 +917,7 @@ class FeatureSet(ModelObj):
|
|
|
905
917
|
start_time=None,
|
|
906
918
|
end_time=None,
|
|
907
919
|
time_column=None,
|
|
920
|
+
additional_filters=None,
|
|
908
921
|
**kwargs,
|
|
909
922
|
):
|
|
910
923
|
"""return featureset (offline) data as dataframe
|
|
@@ -916,6 +929,12 @@ class FeatureSet(ModelObj):
|
|
|
916
929
|
:param end_time: filter by end time
|
|
917
930
|
:param time_column: specify the time column name in the file
|
|
918
931
|
:param kwargs: additional reader (csv, parquet, ..) args
|
|
932
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
933
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
934
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
935
|
+
Example: [("Product", "=", "Computer")]
|
|
936
|
+
For all supported filters, please see:
|
|
937
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
919
938
|
:return: DataFrame
|
|
920
939
|
"""
|
|
921
940
|
entities = list(self.spec.entities.keys())
|
|
@@ -934,6 +953,7 @@ class FeatureSet(ModelObj):
|
|
|
934
953
|
start_time=start_time,
|
|
935
954
|
end_time=end_time,
|
|
936
955
|
time_field=time_column,
|
|
956
|
+
additional_filters=additional_filters,
|
|
937
957
|
**kwargs,
|
|
938
958
|
)
|
|
939
959
|
# to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
|
|
@@ -953,6 +973,7 @@ class FeatureSet(ModelObj):
|
|
|
953
973
|
start_time=start_time,
|
|
954
974
|
end_time=end_time,
|
|
955
975
|
time_column=time_column,
|
|
976
|
+
additional_filters=additional_filters,
|
|
956
977
|
**kwargs,
|
|
957
978
|
)
|
|
958
979
|
return result
|
|
@@ -983,7 +1004,7 @@ class FeatureSet(ModelObj):
|
|
|
983
1004
|
def ingest(
|
|
984
1005
|
self,
|
|
985
1006
|
source=None,
|
|
986
|
-
targets:
|
|
1007
|
+
targets: list[DataTargetBase] = None,
|
|
987
1008
|
namespace=None,
|
|
988
1009
|
return_df: bool = True,
|
|
989
1010
|
infer_options: InferOptions = InferOptions.default(),
|
|
@@ -1009,7 +1030,7 @@ class FeatureSet(ModelObj):
|
|
|
1009
1030
|
df = stocks_set.ingest(stocks, infer_options=fstore.InferOptions.default())
|
|
1010
1031
|
|
|
1011
1032
|
# for running as remote job
|
|
1012
|
-
config = RunConfig(image=
|
|
1033
|
+
config = RunConfig(image="mlrun/mlrun")
|
|
1013
1034
|
df = ingest(stocks_set, stocks, run_config=config)
|
|
1014
1035
|
|
|
1015
1036
|
# specify source and targets
|
|
@@ -1085,11 +1106,11 @@ class FeatureSet(ModelObj):
|
|
|
1085
1106
|
def deploy_ingestion_service(
|
|
1086
1107
|
self,
|
|
1087
1108
|
source: DataSource = None,
|
|
1088
|
-
targets:
|
|
1109
|
+
targets: list[DataTargetBase] = None,
|
|
1089
1110
|
name: str = None,
|
|
1090
1111
|
run_config: RunConfig = None,
|
|
1091
1112
|
verbose=False,
|
|
1092
|
-
) ->
|
|
1113
|
+
) -> tuple[str, BaseRuntime]:
|
|
1093
1114
|
"""Start real-time ingestion service using nuclio function
|
|
1094
1115
|
|
|
1095
1116
|
Deploy a real-time function implementing feature ingestion pipeline
|
|
@@ -1122,7 +1143,7 @@ class FeatureSet(ModelObj):
|
|
|
1122
1143
|
def extract_relation_keys(
|
|
1123
1144
|
self,
|
|
1124
1145
|
other_feature_set,
|
|
1125
|
-
relations:
|
|
1146
|
+
relations: dict[str, Union[str, Entity]] = None,
|
|
1126
1147
|
) -> list[str]:
|
|
1127
1148
|
"""
|
|
1128
1149
|
Checks whether a feature set can be merged to the right of this feature set.
|
|
@@ -1189,10 +1210,10 @@ class SparkAggregateByKey(StepToDict):
|
|
|
1189
1210
|
|
|
1190
1211
|
def __init__(
|
|
1191
1212
|
self,
|
|
1192
|
-
key_columns:
|
|
1213
|
+
key_columns: list[str],
|
|
1193
1214
|
time_column: str,
|
|
1194
|
-
aggregates:
|
|
1195
|
-
emit_policy: Union[EmitPolicy,
|
|
1215
|
+
aggregates: list[dict],
|
|
1216
|
+
emit_policy: Union[EmitPolicy, dict] = None,
|
|
1196
1217
|
):
|
|
1197
1218
|
self.key_columns = key_columns
|
|
1198
1219
|
self.time_column = time_column
|
|
@@ -17,7 +17,7 @@ import typing
|
|
|
17
17
|
from copy import copy
|
|
18
18
|
from datetime import datetime
|
|
19
19
|
from enum import Enum
|
|
20
|
-
from typing import
|
|
20
|
+
from typing import Union
|
|
21
21
|
|
|
22
22
|
import numpy as np
|
|
23
23
|
import pandas as pd
|
|
@@ -69,18 +69,16 @@ class FeatureVectorSpec(ModelObj):
|
|
|
69
69
|
self._entity_fields: ObjectList = None
|
|
70
70
|
self._entity_source: DataSource = None
|
|
71
71
|
self._function: FunctionReference = None
|
|
72
|
-
self._relations:
|
|
72
|
+
self._relations: dict[str, ObjectDict] = None
|
|
73
73
|
self._join_graph: JoinGraph = None
|
|
74
74
|
|
|
75
75
|
self.description = description
|
|
76
|
-
self.features:
|
|
76
|
+
self.features: list[str] = features or []
|
|
77
77
|
self.entity_source = entity_source
|
|
78
78
|
self.entity_fields = entity_fields or []
|
|
79
79
|
self.graph = graph
|
|
80
80
|
self.join_graph = join_graph
|
|
81
|
-
self.relations:
|
|
82
|
-
relations or {}
|
|
83
|
-
)
|
|
81
|
+
self.relations: dict[str, dict[str, Union[Entity, str]]] = relations or {}
|
|
84
82
|
self.timestamp_field = timestamp_field
|
|
85
83
|
self.label_feature = label_feature
|
|
86
84
|
self.with_indexes = with_indexes
|
|
@@ -97,12 +95,12 @@ class FeatureVectorSpec(ModelObj):
|
|
|
97
95
|
self._entity_source = self._verify_dict(source, "entity_source", DataSource)
|
|
98
96
|
|
|
99
97
|
@property
|
|
100
|
-
def entity_fields(self) ->
|
|
98
|
+
def entity_fields(self) -> list[Feature]:
|
|
101
99
|
"""the schema/metadata for the entity source fields"""
|
|
102
100
|
return self._entity_fields
|
|
103
101
|
|
|
104
102
|
@entity_fields.setter
|
|
105
|
-
def entity_fields(self, entity_fields:
|
|
103
|
+
def entity_fields(self, entity_fields: list[Feature]):
|
|
106
104
|
self._entity_fields = ObjectList.from_list(Feature, entity_fields)
|
|
107
105
|
|
|
108
106
|
@property
|
|
@@ -125,14 +123,12 @@ class FeatureVectorSpec(ModelObj):
|
|
|
125
123
|
self._function = self._verify_dict(function, "function", FunctionReference)
|
|
126
124
|
|
|
127
125
|
@property
|
|
128
|
-
def relations(self) ->
|
|
126
|
+
def relations(self) -> dict[str, ObjectDict]:
|
|
129
127
|
"""feature set relations dict"""
|
|
130
128
|
return self._relations
|
|
131
129
|
|
|
132
130
|
@relations.setter
|
|
133
|
-
def relations(
|
|
134
|
-
self, relations: typing.Dict[str, typing.Dict[str, Union[Entity, str]]]
|
|
135
|
-
):
|
|
131
|
+
def relations(self, relations: dict[str, dict[str, Union[Entity, str]]]):
|
|
136
132
|
temp_relations = {}
|
|
137
133
|
for fs_name, relation in relations.items():
|
|
138
134
|
for col, ent in relation.items():
|
|
@@ -179,29 +175,29 @@ class FeatureVectorStatus(ModelObj):
|
|
|
179
175
|
self.stats = stats or {}
|
|
180
176
|
self.index_keys = index_keys
|
|
181
177
|
self.preview = preview or []
|
|
182
|
-
self.features:
|
|
178
|
+
self.features: list[Feature] = features or []
|
|
183
179
|
self.run_uri = run_uri
|
|
184
180
|
self.timestamp_key = timestamp_key
|
|
185
181
|
|
|
186
182
|
@property
|
|
187
|
-
def targets(self) ->
|
|
183
|
+
def targets(self) -> list[DataTarget]:
|
|
188
184
|
"""list of material storage targets + their status/path"""
|
|
189
185
|
return self._targets
|
|
190
186
|
|
|
191
187
|
@targets.setter
|
|
192
|
-
def targets(self, targets:
|
|
188
|
+
def targets(self, targets: list[DataTarget]):
|
|
193
189
|
self._targets = ObjectList.from_list(DataTarget, targets)
|
|
194
190
|
|
|
195
191
|
def update_target(self, target: DataTarget):
|
|
196
192
|
self._targets.update(target)
|
|
197
193
|
|
|
198
194
|
@property
|
|
199
|
-
def features(self) ->
|
|
195
|
+
def features(self) -> list[Feature]:
|
|
200
196
|
"""list of features (result of joining features from the source feature sets)"""
|
|
201
197
|
return self._features
|
|
202
198
|
|
|
203
199
|
@features.setter
|
|
204
|
-
def features(self, features:
|
|
200
|
+
def features(self, features: list[Feature]):
|
|
205
201
|
self._features = ObjectList.from_list(Feature, features)
|
|
206
202
|
|
|
207
203
|
|
|
@@ -378,7 +374,7 @@ class _JoinStep(ModelObj):
|
|
|
378
374
|
name: str = None,
|
|
379
375
|
left_step_name: str = None,
|
|
380
376
|
right_step_name: str = None,
|
|
381
|
-
left_feature_set_names: Union[str,
|
|
377
|
+
left_feature_set_names: Union[str, list[str]] = None,
|
|
382
378
|
right_feature_set_name: str = None,
|
|
383
379
|
join_type: str = "inner",
|
|
384
380
|
asof_join: bool = False,
|
|
@@ -388,7 +384,8 @@ class _JoinStep(ModelObj):
|
|
|
388
384
|
self.right_step_name = right_step_name
|
|
389
385
|
self.left_feature_set_names = (
|
|
390
386
|
left_feature_set_names
|
|
391
|
-
if
|
|
387
|
+
if left_feature_set_names is None
|
|
388
|
+
or isinstance(left_feature_set_names, list)
|
|
392
389
|
else [left_feature_set_names]
|
|
393
390
|
)
|
|
394
391
|
self.right_feature_set_name = right_feature_set_name
|
|
@@ -402,7 +399,7 @@ class _JoinStep(ModelObj):
|
|
|
402
399
|
self,
|
|
403
400
|
feature_set_objects: ObjectList,
|
|
404
401
|
vector,
|
|
405
|
-
entity_rows_keys:
|
|
402
|
+
entity_rows_keys: list[str] = None,
|
|
406
403
|
):
|
|
407
404
|
if feature_set_objects[self.right_feature_set_name].is_connectable_to_df(
|
|
408
405
|
entity_rows_keys
|
|
@@ -482,21 +479,22 @@ class FeatureVector(ModelObj):
|
|
|
482
479
|
description=None,
|
|
483
480
|
with_indexes=None,
|
|
484
481
|
join_graph: JoinGraph = None,
|
|
485
|
-
relations:
|
|
482
|
+
relations: dict[str, dict[str, Union[Entity, str]]] = None,
|
|
486
483
|
):
|
|
487
484
|
"""Feature vector, specify selected features, their metadata and material views
|
|
488
485
|
|
|
489
486
|
example::
|
|
490
487
|
|
|
491
488
|
import mlrun.feature_store as fstore
|
|
489
|
+
|
|
492
490
|
features = ["quotes.bid", "quotes.asks_sum_5h as asks_5h", "stocks.*"]
|
|
493
491
|
vector = fstore.FeatureVector("my-vec", features)
|
|
494
492
|
|
|
495
493
|
# get the vector as a dataframe
|
|
496
|
-
df =
|
|
494
|
+
df = vector.get_offline_features().to_dataframe()
|
|
497
495
|
|
|
498
496
|
# return an online/real-time feature service
|
|
499
|
-
svc =
|
|
497
|
+
svc = vector.get_online_feature_service(impute_policy={"*": "$mean"})
|
|
500
498
|
resp = svc.get([{"stock": "GOOG"}])
|
|
501
499
|
|
|
502
500
|
:param name: List of names of targets to delete (default: delete all ingested targets)
|
|
@@ -732,7 +730,7 @@ class FeatureVector(ModelObj):
|
|
|
732
730
|
entity_timestamp_column: str = None,
|
|
733
731
|
target: DataTargetBase = None,
|
|
734
732
|
run_config: RunConfig = None,
|
|
735
|
-
drop_columns:
|
|
733
|
+
drop_columns: list[str] = None,
|
|
736
734
|
start_time: Union[str, datetime] = None,
|
|
737
735
|
end_time: Union[str, datetime] = None,
|
|
738
736
|
with_indexes: bool = False,
|
|
@@ -740,9 +738,10 @@ class FeatureVector(ModelObj):
|
|
|
740
738
|
engine: str = None,
|
|
741
739
|
engine_args: dict = None,
|
|
742
740
|
query: str = None,
|
|
743
|
-
order_by: Union[str,
|
|
741
|
+
order_by: Union[str, list[str]] = None,
|
|
744
742
|
spark_service: str = None,
|
|
745
|
-
timestamp_for_filtering: Union[str,
|
|
743
|
+
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
744
|
+
additional_filters: list = None,
|
|
746
745
|
):
|
|
747
746
|
"""retrieve offline feature vector results
|
|
748
747
|
|
|
@@ -799,6 +798,12 @@ class FeatureVector(ModelObj):
|
|
|
799
798
|
By default, the filter executes on the timestamp_key of each feature set.
|
|
800
799
|
Note: the time filtering is performed on each feature set before the
|
|
801
800
|
merge process using start_time and end_time params.
|
|
801
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
802
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
803
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
804
|
+
Example: [("Product", "=", "Computer")]
|
|
805
|
+
For all supported filters, please see:
|
|
806
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
802
807
|
|
|
803
808
|
"""
|
|
804
809
|
|
|
@@ -819,6 +824,7 @@ class FeatureVector(ModelObj):
|
|
|
819
824
|
order_by,
|
|
820
825
|
spark_service,
|
|
821
826
|
timestamp_for_filtering,
|
|
827
|
+
additional_filters,
|
|
822
828
|
)
|
|
823
829
|
|
|
824
830
|
def get_online_feature_service(
|
|
@@ -827,7 +833,7 @@ class FeatureVector(ModelObj):
|
|
|
827
833
|
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
828
834
|
impute_policy: dict = None,
|
|
829
835
|
update_stats: bool = False,
|
|
830
|
-
entity_keys:
|
|
836
|
+
entity_keys: list[str] = None,
|
|
831
837
|
):
|
|
832
838
|
"""initialize and return online feature vector service api,
|
|
833
839
|
returns :py:class:`~mlrun.feature_store.OnlineVectorService`
|
|
@@ -855,7 +861,7 @@ class FeatureVector(ModelObj):
|
|
|
855
861
|
|
|
856
862
|
Example::
|
|
857
863
|
|
|
858
|
-
svc = vector_uri.get_online_feature_service(entity_keys=[
|
|
864
|
+
svc = vector_uri.get_online_feature_service(entity_keys=["ticker"])
|
|
859
865
|
try:
|
|
860
866
|
resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
|
|
861
867
|
print(resp)
|
|
@@ -910,7 +916,7 @@ class OnlineVectorService:
|
|
|
910
916
|
graph,
|
|
911
917
|
index_columns,
|
|
912
918
|
impute_policy: dict = None,
|
|
913
|
-
requested_columns:
|
|
919
|
+
requested_columns: list[str] = None,
|
|
914
920
|
):
|
|
915
921
|
self.vector = vector
|
|
916
922
|
self.impute_policy = impute_policy or {}
|
|
@@ -966,7 +972,7 @@ class OnlineVectorService:
|
|
|
966
972
|
"""vector merger function status (ready, running, error)"""
|
|
967
973
|
return "ready"
|
|
968
974
|
|
|
969
|
-
def get(self, entity_rows:
|
|
975
|
+
def get(self, entity_rows: list[Union[dict, list]], as_list=False):
|
|
970
976
|
"""get feature vector given the provided entity inputs
|
|
971
977
|
|
|
972
978
|
take a list of input vectors/rows and return a list of enriched feature vectors
|
|
@@ -1080,7 +1086,9 @@ class OfflineVectorResponse:
|
|
|
1080
1086
|
def to_dataframe(self, to_pandas=True):
|
|
1081
1087
|
"""return result as dataframe"""
|
|
1082
1088
|
if self.status != "completed":
|
|
1083
|
-
raise mlrun.errors.
|
|
1089
|
+
raise mlrun.errors.MLRunTaskNotReadyError(
|
|
1090
|
+
"feature vector dataset is not ready"
|
|
1091
|
+
)
|
|
1084
1092
|
return self._merger.get_df(to_pandas=to_pandas)
|
|
1085
1093
|
|
|
1086
1094
|
def to_parquet(self, target_path, **kw):
|
mlrun/feature_store/ingestion.py
CHANGED
|
@@ -17,6 +17,7 @@ import uuid
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
|
|
19
19
|
import mlrun
|
|
20
|
+
import mlrun.common.constants as mlrun_constants
|
|
20
21
|
from mlrun.datastore.sources import get_source_from_dict, get_source_step
|
|
21
22
|
from mlrun.datastore.targets import (
|
|
22
23
|
add_target_steps,
|
|
@@ -263,13 +264,13 @@ def run_ingestion_job(name, featureset, run_config, schedule=None, spark_service
|
|
|
263
264
|
out_path=featureset.spec.output_path,
|
|
264
265
|
)
|
|
265
266
|
task.spec.secret_sources = run_config.secret_sources
|
|
266
|
-
task.set_label(
|
|
267
|
-
"feature-
|
|
268
|
-
)
|
|
267
|
+
task.set_label(
|
|
268
|
+
mlrun_constants.MLRunInternalLabels.job_type, "feature-ingest"
|
|
269
|
+
).set_label("feature-set", featureset.uri)
|
|
269
270
|
if run_config.owner:
|
|
270
|
-
task.set_label(
|
|
271
|
-
|
|
272
|
-
)
|
|
271
|
+
task.set_label(
|
|
272
|
+
mlrun_constants.MLRunInternalLabels.owner, run_config.owner
|
|
273
|
+
).set_label(mlrun_constants.MLRunInternalLabels.v3io_user, run_config.owner)
|
|
273
274
|
|
|
274
275
|
# set run UID and save in the feature set status (linking the features et to the job)
|
|
275
276
|
task.metadata.uid = uuid.uuid4().hex
|