mlrun 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +26 -22
- mlrun/__main__.py +15 -16
- mlrun/alerts/alert.py +150 -15
- mlrun/api/schemas/__init__.py +1 -9
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +62 -19
- mlrun/artifacts/dataset.py +17 -17
- mlrun/artifacts/document.py +454 -0
- mlrun/artifacts/manager.py +28 -18
- mlrun/artifacts/model.py +91 -59
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -1
- mlrun/common/formatters/feature_set.py +2 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +12 -62
- mlrun/common/runtimes/constants.py +25 -4
- mlrun/common/schemas/__init__.py +9 -5
- mlrun/common/schemas/alert.py +114 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +22 -9
- mlrun/common/schemas/auth.py +8 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +4 -4
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +4 -8
- mlrun/common/schemas/model_monitoring/constants.py +127 -46
- mlrun/common/schemas/model_monitoring/grafana.py +18 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +142 -0
- mlrun/common/schemas/pipeline.py +3 -3
- mlrun/common/schemas/project.py +26 -18
- mlrun/common/schemas/runs.py +3 -3
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +6 -5
- mlrun/common/types.py +1 -0
- mlrun/config.py +157 -89
- mlrun/data_types/__init__.py +5 -3
- mlrun/data_types/infer.py +13 -3
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +59 -18
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +19 -24
- mlrun/datastore/datastore.py +10 -4
- mlrun/datastore/datastore_profile.py +178 -45
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +14 -3
- mlrun/datastore/sources.py +89 -92
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/storeytargets.py +51 -16
- mlrun/datastore/targets.py +38 -31
- mlrun/datastore/utils.py +87 -4
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +291 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +286 -100
- mlrun/db/httpdb.py +1562 -490
- mlrun/db/nopdb.py +250 -83
- mlrun/errors.py +6 -2
- mlrun/execution.py +194 -50
- mlrun/feature_store/__init__.py +2 -10
- mlrun/feature_store/api.py +20 -458
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +105 -479
- mlrun/feature_store/feature_vector_utils.py +466 -0
- mlrun/feature_store/retrieval/base.py +15 -11
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/retrieval/storey_merger.py +1 -1
- mlrun/feature_store/steps.py +3 -3
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +31 -31
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +6 -2
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +8 -4
- mlrun/model.py +132 -46
- mlrun/model_monitoring/__init__.py +3 -5
- mlrun/model_monitoring/api.py +113 -98
- mlrun/model_monitoring/applications/__init__.py +0 -5
- mlrun/model_monitoring/applications/_application_steps.py +81 -50
- mlrun/model_monitoring/applications/base.py +467 -14
- mlrun/model_monitoring/applications/context.py +212 -134
- mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
- mlrun/model_monitoring/applications/evidently/base.py +146 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
- mlrun/model_monitoring/applications/results.py +67 -15
- mlrun/model_monitoring/controller.py +701 -315
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +242 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
- mlrun/model_monitoring/db/tsdb/base.py +243 -49
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
- mlrun/model_monitoring/helpers.py +356 -114
- mlrun/model_monitoring/stream_processing.py +190 -345
- mlrun/model_monitoring/tracking_policy.py +11 -4
- mlrun/model_monitoring/writer.py +49 -90
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +2 -2
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +35 -32
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +30 -30
- mlrun/projects/pipelines.py +116 -47
- mlrun/projects/project.py +1292 -329
- mlrun/render.py +5 -9
- mlrun/run.py +57 -14
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +30 -22
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
- mlrun/runtimes/function_reference.py +5 -2
- mlrun/runtimes/generators.py +3 -2
- mlrun/runtimes/kubejob.py +6 -7
- mlrun/runtimes/mounts.py +574 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -13
- mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
- mlrun/runtimes/nuclio/function.py +127 -70
- mlrun/runtimes/nuclio/serving.py +105 -37
- mlrun/runtimes/pod.py +159 -54
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +22 -12
- mlrun/runtimes/utils.py +7 -6
- mlrun/secrets.py +2 -2
- mlrun/serving/__init__.py +8 -0
- mlrun/serving/merger.py +7 -5
- mlrun/serving/remote.py +35 -22
- mlrun/serving/routers.py +186 -240
- mlrun/serving/server.py +41 -10
- mlrun/serving/states.py +432 -118
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +161 -203
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +35 -22
- mlrun/utils/clones.py +7 -4
- mlrun/utils/helpers.py +511 -58
- mlrun/utils/logger.py +119 -13
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +39 -15
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +16 -8
- mlrun/utils/notifications/notification/webhook.py +24 -8
- mlrun/utils/notifications/notification_pusher.py +191 -200
- mlrun/utils/regex.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/METADATA +69 -54
- mlrun-1.8.0.dist-info/RECORD +351 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/applications/evidently_base.py +0 -137
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.2rc4.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
|
@@ -19,18 +19,18 @@ from datetime import datetime
|
|
|
19
19
|
from enum import Enum
|
|
20
20
|
from typing import Union
|
|
21
21
|
|
|
22
|
-
import numpy as np
|
|
23
22
|
import pandas as pd
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
|
|
27
26
|
from ..config import config as mlconf
|
|
28
27
|
from ..datastore import get_store_uri
|
|
29
|
-
from ..datastore.targets import get_offline_target
|
|
28
|
+
from ..datastore.targets import BaseStoreTarget, get_offline_target
|
|
30
29
|
from ..feature_store.common import (
|
|
31
30
|
get_feature_set_by_uri,
|
|
32
31
|
parse_feature_string,
|
|
33
32
|
parse_project_name_from_feature_string,
|
|
33
|
+
verify_feature_vector_permissions,
|
|
34
34
|
)
|
|
35
35
|
from ..feature_store.feature_set import FeatureSet
|
|
36
36
|
from ..features import Entity, Feature
|
|
@@ -47,6 +47,22 @@ from ..runtimes.function_reference import FunctionReference
|
|
|
47
47
|
from ..serving.states import RootFlowStep
|
|
48
48
|
from ..utils import StorePrefix
|
|
49
49
|
from .common import RunConfig
|
|
50
|
+
from .feature_vector_utils import JoinGraph, OnlineVectorService
|
|
51
|
+
from .retrieval import get_merger, run_merge_job
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _features_to_vector_and_check_permissions(features: "FeatureVector", update_stats):
|
|
55
|
+
vector = features
|
|
56
|
+
if not vector.metadata.name:
|
|
57
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
58
|
+
"feature vector name must be specified"
|
|
59
|
+
)
|
|
60
|
+
verify_feature_vector_permissions(
|
|
61
|
+
vector, mlrun.common.schemas.AuthorizationAction.update
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
vector.save()
|
|
65
|
+
return vector
|
|
50
66
|
|
|
51
67
|
|
|
52
68
|
class FeatureVectorSpec(ModelObj):
|
|
@@ -201,251 +217,6 @@ class FeatureVectorStatus(ModelObj):
|
|
|
201
217
|
self._features = ObjectList.from_list(Feature, features)
|
|
202
218
|
|
|
203
219
|
|
|
204
|
-
class JoinGraph(ModelObj):
|
|
205
|
-
"""
|
|
206
|
-
explain here about the class
|
|
207
|
-
"""
|
|
208
|
-
|
|
209
|
-
default_graph_name = "$__join_graph_fv__$"
|
|
210
|
-
first_join_type = "first"
|
|
211
|
-
_dict_fields = ["name", "first_feature_set", "steps"]
|
|
212
|
-
|
|
213
|
-
def __init__(
|
|
214
|
-
self,
|
|
215
|
-
name: str = None,
|
|
216
|
-
first_feature_set: Union[str, FeatureSet] = None,
|
|
217
|
-
):
|
|
218
|
-
"""
|
|
219
|
-
JoinGraph is a class that represents a graph of data joins between feature sets. It allows users to define
|
|
220
|
-
data joins step by step, specifying the join type for each step. The graph can be used to build a sequence of
|
|
221
|
-
joins that will be executed in order, allowing the creation of complex join operations between feature sets.
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
Example:
|
|
225
|
-
# Create a new JoinGraph and add steps for joining feature sets.
|
|
226
|
-
join_graph = JoinGraph(name="my_join_graph", first_feature_set="featureset1")
|
|
227
|
-
join_graph.inner("featureset2")
|
|
228
|
-
join_graph.left("featureset3", asof_join=True)
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
:param name: (str, optional) The name of the join graph. If not provided,
|
|
232
|
-
a default name will be used.
|
|
233
|
-
:param first_feature_set: (str or FeatureSet, optional) The first feature set to join. It can be
|
|
234
|
-
specified either as a string representing the name of the feature set or as a
|
|
235
|
-
FeatureSet object.
|
|
236
|
-
"""
|
|
237
|
-
self.name = name or self.default_graph_name
|
|
238
|
-
self._steps: ObjectList = None
|
|
239
|
-
self._feature_sets = None
|
|
240
|
-
if first_feature_set:
|
|
241
|
-
self._start(first_feature_set)
|
|
242
|
-
|
|
243
|
-
def inner(self, other_operand: typing.Union[str, FeatureSet]):
|
|
244
|
-
"""
|
|
245
|
-
Specifies an inner join with the given feature set
|
|
246
|
-
|
|
247
|
-
:param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
|
|
248
|
-
|
|
249
|
-
:return: JoinGraph: The updated JoinGraph object with the specified inner join.
|
|
250
|
-
"""
|
|
251
|
-
return self._join_operands(other_operand, "inner")
|
|
252
|
-
|
|
253
|
-
def outer(self, other_operand: typing.Union[str, FeatureSet]):
|
|
254
|
-
"""
|
|
255
|
-
Specifies an outer join with the given feature set
|
|
256
|
-
|
|
257
|
-
:param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
|
|
258
|
-
:return: JoinGraph: The updated JoinGraph object with the specified outer join.
|
|
259
|
-
"""
|
|
260
|
-
return self._join_operands(other_operand, "outer")
|
|
261
|
-
|
|
262
|
-
def left(self, other_operand: typing.Union[str, FeatureSet], asof_join):
|
|
263
|
-
"""
|
|
264
|
-
Specifies a left join with the given feature set
|
|
265
|
-
|
|
266
|
-
:param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
|
|
267
|
-
:param asof_join: (bool) A flag indicating whether to perform an as-of join.
|
|
268
|
-
|
|
269
|
-
:return: JoinGraph: The updated JoinGraph object with the specified left join.
|
|
270
|
-
"""
|
|
271
|
-
return self._join_operands(other_operand, "left", asof_join=asof_join)
|
|
272
|
-
|
|
273
|
-
def right(self, other_operand: typing.Union[str, FeatureSet]):
|
|
274
|
-
"""
|
|
275
|
-
Specifies a right join with the given feature set
|
|
276
|
-
|
|
277
|
-
:param other_operand: (str or FeatureSet) The name of the feature set or a FeatureSet object to join with.
|
|
278
|
-
|
|
279
|
-
:return: JoinGraph: The updated JoinGraph object with the specified right join.
|
|
280
|
-
"""
|
|
281
|
-
return self._join_operands(other_operand, "right")
|
|
282
|
-
|
|
283
|
-
def _join_operands(
|
|
284
|
-
self,
|
|
285
|
-
other_operand: typing.Union[str, FeatureSet],
|
|
286
|
-
join_type: str,
|
|
287
|
-
asof_join: bool = False,
|
|
288
|
-
):
|
|
289
|
-
if isinstance(other_operand, FeatureSet):
|
|
290
|
-
other_operand = other_operand.metadata.name
|
|
291
|
-
|
|
292
|
-
first_key_num = len(self._steps.keys()) if self._steps else 0
|
|
293
|
-
left_last_step_name, left_all_feature_sets = (
|
|
294
|
-
self.last_step_name,
|
|
295
|
-
self.all_feature_sets_names,
|
|
296
|
-
)
|
|
297
|
-
is_first_fs = (
|
|
298
|
-
join_type == JoinGraph.first_join_type or left_all_feature_sets == self.name
|
|
299
|
-
)
|
|
300
|
-
# create_new_step
|
|
301
|
-
new_step = _JoinStep(
|
|
302
|
-
f"step_{first_key_num}",
|
|
303
|
-
left_last_step_name if not is_first_fs else "",
|
|
304
|
-
other_operand,
|
|
305
|
-
left_all_feature_sets if not is_first_fs else [],
|
|
306
|
-
other_operand,
|
|
307
|
-
join_type,
|
|
308
|
-
asof_join,
|
|
309
|
-
)
|
|
310
|
-
|
|
311
|
-
if self.steps is not None:
|
|
312
|
-
self.steps.update(new_step)
|
|
313
|
-
else:
|
|
314
|
-
self.steps = [new_step]
|
|
315
|
-
return self
|
|
316
|
-
|
|
317
|
-
def _start(self, other_operand: typing.Union[str, FeatureSet]):
|
|
318
|
-
return self._join_operands(other_operand, JoinGraph.first_join_type)
|
|
319
|
-
|
|
320
|
-
def _init_all_join_keys(
|
|
321
|
-
self, feature_set_objects, vector, entity_rows_keys: list[str] = None
|
|
322
|
-
):
|
|
323
|
-
for step in self.steps:
|
|
324
|
-
step.init_join_keys(feature_set_objects, vector, entity_rows_keys)
|
|
325
|
-
|
|
326
|
-
@property
|
|
327
|
-
def all_feature_sets_names(self):
|
|
328
|
-
"""
|
|
329
|
-
Returns a list of all feature set names included in the join graph.
|
|
330
|
-
|
|
331
|
-
:return: List[str]: A list of feature set names.
|
|
332
|
-
"""
|
|
333
|
-
if self._steps:
|
|
334
|
-
return self._steps[-1].left_feature_set_names + [
|
|
335
|
-
self._steps[-1].right_feature_set_name
|
|
336
|
-
]
|
|
337
|
-
else:
|
|
338
|
-
return self.name
|
|
339
|
-
|
|
340
|
-
@property
|
|
341
|
-
def last_step_name(self):
|
|
342
|
-
"""
|
|
343
|
-
Returns the name of the last step in the join graph.
|
|
344
|
-
|
|
345
|
-
:return: str: The name of the last step.
|
|
346
|
-
"""
|
|
347
|
-
if self._steps:
|
|
348
|
-
return self._steps[-1].name
|
|
349
|
-
else:
|
|
350
|
-
return self.name
|
|
351
|
-
|
|
352
|
-
@property
|
|
353
|
-
def steps(self):
|
|
354
|
-
"""
|
|
355
|
-
Returns the list of join steps as ObjectList, which can be used to iterate over the steps
|
|
356
|
-
or access the properties of each step.
|
|
357
|
-
:return: ObjectList: The list of join steps.
|
|
358
|
-
"""
|
|
359
|
-
return self._steps
|
|
360
|
-
|
|
361
|
-
@steps.setter
|
|
362
|
-
def steps(self, steps):
|
|
363
|
-
"""
|
|
364
|
-
Setter for the steps property. It allows updating the join steps.
|
|
365
|
-
|
|
366
|
-
:param steps: (List[_JoinStep]) The list of join steps.
|
|
367
|
-
"""
|
|
368
|
-
self._steps = ObjectList.from_list(child_class=_JoinStep, children=steps)
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
class _JoinStep(ModelObj):
|
|
372
|
-
def __init__(
|
|
373
|
-
self,
|
|
374
|
-
name: str = None,
|
|
375
|
-
left_step_name: str = None,
|
|
376
|
-
right_step_name: str = None,
|
|
377
|
-
left_feature_set_names: Union[str, list[str]] = None,
|
|
378
|
-
right_feature_set_name: str = None,
|
|
379
|
-
join_type: str = "inner",
|
|
380
|
-
asof_join: bool = False,
|
|
381
|
-
):
|
|
382
|
-
self.name = name
|
|
383
|
-
self.left_step_name = left_step_name
|
|
384
|
-
self.right_step_name = right_step_name
|
|
385
|
-
self.left_feature_set_names = (
|
|
386
|
-
left_feature_set_names
|
|
387
|
-
if left_feature_set_names is None
|
|
388
|
-
or isinstance(left_feature_set_names, list)
|
|
389
|
-
else [left_feature_set_names]
|
|
390
|
-
)
|
|
391
|
-
self.right_feature_set_name = right_feature_set_name
|
|
392
|
-
self.join_type = join_type
|
|
393
|
-
self.asof_join = asof_join
|
|
394
|
-
|
|
395
|
-
self.left_keys = []
|
|
396
|
-
self.right_keys = []
|
|
397
|
-
|
|
398
|
-
def init_join_keys(
|
|
399
|
-
self,
|
|
400
|
-
feature_set_objects: ObjectList,
|
|
401
|
-
vector,
|
|
402
|
-
entity_rows_keys: list[str] = None,
|
|
403
|
-
):
|
|
404
|
-
if feature_set_objects[self.right_feature_set_name].is_connectable_to_df(
|
|
405
|
-
entity_rows_keys
|
|
406
|
-
):
|
|
407
|
-
self.left_keys, self.right_keys = [
|
|
408
|
-
list(
|
|
409
|
-
feature_set_objects[
|
|
410
|
-
self.right_feature_set_name
|
|
411
|
-
].spec.entities.keys()
|
|
412
|
-
)
|
|
413
|
-
] * 2
|
|
414
|
-
|
|
415
|
-
if (
|
|
416
|
-
self.join_type == JoinGraph.first_join_type
|
|
417
|
-
or not self.left_feature_set_names
|
|
418
|
-
):
|
|
419
|
-
self.join_type = (
|
|
420
|
-
"inner"
|
|
421
|
-
if self.join_type == JoinGraph.first_join_type
|
|
422
|
-
else self.join_type
|
|
423
|
-
)
|
|
424
|
-
return
|
|
425
|
-
|
|
426
|
-
for left_fset in self.left_feature_set_names:
|
|
427
|
-
current_left_keys = feature_set_objects[left_fset].extract_relation_keys(
|
|
428
|
-
feature_set_objects[self.right_feature_set_name],
|
|
429
|
-
vector.get_feature_set_relations(feature_set_objects[left_fset]),
|
|
430
|
-
)
|
|
431
|
-
current_right_keys = list(
|
|
432
|
-
feature_set_objects[self.right_feature_set_name].spec.entities.keys()
|
|
433
|
-
)
|
|
434
|
-
for i in range(len(current_left_keys)):
|
|
435
|
-
if (
|
|
436
|
-
current_left_keys[i] not in self.left_keys
|
|
437
|
-
and current_right_keys[i] not in self.right_keys
|
|
438
|
-
):
|
|
439
|
-
self.left_keys.append(current_left_keys[i])
|
|
440
|
-
self.right_keys.append(current_right_keys[i])
|
|
441
|
-
|
|
442
|
-
if not self.left_keys:
|
|
443
|
-
raise mlrun.errors.MLRunRuntimeError(
|
|
444
|
-
f"{self.name} can't be preform due to undefined relation between "
|
|
445
|
-
f"{self.left_feature_set_names} to {self.right_feature_set_name}"
|
|
446
|
-
)
|
|
447
|
-
|
|
448
|
-
|
|
449
220
|
class FixedWindowType(Enum):
|
|
450
221
|
CurrentOpenWindow = 1
|
|
451
222
|
LastClosedWindow = 2
|
|
@@ -479,7 +250,7 @@ class FeatureVector(ModelObj):
|
|
|
479
250
|
description=None,
|
|
480
251
|
with_indexes=None,
|
|
481
252
|
join_graph: JoinGraph = None,
|
|
482
|
-
relations: dict[str, dict[str, Union[Entity, str]]] = None,
|
|
253
|
+
relations: typing.Optional[dict[str, dict[str, Union[Entity, str]]]] = None,
|
|
483
254
|
):
|
|
484
255
|
"""Feature vector, specify selected features, their metadata and material views
|
|
485
256
|
|
|
@@ -727,21 +498,21 @@ class FeatureVector(ModelObj):
|
|
|
727
498
|
def get_offline_features(
|
|
728
499
|
self,
|
|
729
500
|
entity_rows=None,
|
|
730
|
-
entity_timestamp_column: str = None,
|
|
501
|
+
entity_timestamp_column: typing.Optional[str] = None,
|
|
731
502
|
target: DataTargetBase = None,
|
|
732
503
|
run_config: RunConfig = None,
|
|
733
|
-
drop_columns: list[str] = None,
|
|
734
|
-
start_time: Union[str, datetime] = None,
|
|
735
|
-
end_time: Union[str, datetime] = None,
|
|
504
|
+
drop_columns: typing.Optional[list[str]] = None,
|
|
505
|
+
start_time: typing.Optional[Union[str, datetime]] = None,
|
|
506
|
+
end_time: typing.Optional[Union[str, datetime]] = None,
|
|
736
507
|
with_indexes: bool = False,
|
|
737
|
-
update_stats: bool =
|
|
738
|
-
engine: str = None,
|
|
739
|
-
engine_args: dict = None,
|
|
740
|
-
query: str = None,
|
|
741
|
-
order_by: Union[str, list[str]] = None,
|
|
742
|
-
spark_service: str = None,
|
|
743
|
-
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
744
|
-
additional_filters: list = None,
|
|
508
|
+
update_stats: bool = True,
|
|
509
|
+
engine: typing.Optional[str] = None,
|
|
510
|
+
engine_args: typing.Optional[dict] = None,
|
|
511
|
+
query: typing.Optional[str] = None,
|
|
512
|
+
order_by: typing.Optional[Union[str, list[str]]] = None,
|
|
513
|
+
spark_service: typing.Optional[str] = None,
|
|
514
|
+
timestamp_for_filtering: typing.Optional[Union[str, dict[str, str]]] = None,
|
|
515
|
+
additional_filters: typing.Optional[list] = None,
|
|
745
516
|
):
|
|
746
517
|
"""retrieve offline feature vector results
|
|
747
518
|
|
|
@@ -784,8 +555,9 @@ class FeatureVector(ModelObj):
|
|
|
784
555
|
columns. This property can be specified also in the feature vector spec
|
|
785
556
|
(feature_vector.spec.with_indexes)
|
|
786
557
|
(default False)
|
|
787
|
-
:param update_stats:
|
|
788
|
-
|
|
558
|
+
:param update_stats: When set to True (default), updates feature statistics from the requested
|
|
559
|
+
feature sets on the vector, which requires 'update' permissions. When set to
|
|
560
|
+
False, uses read-only operations that only require 'read' permissions.
|
|
789
561
|
:param engine: processing engine kind ("local", "dask", or "spark")
|
|
790
562
|
:param engine_args: kwargs for the processing engine
|
|
791
563
|
:param query: The query string used to filter rows on the output
|
|
@@ -806,35 +578,79 @@ class FeatureVector(ModelObj):
|
|
|
806
578
|
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
807
579
|
|
|
808
580
|
"""
|
|
581
|
+
if entity_rows is None and entity_timestamp_column is not None:
|
|
582
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
583
|
+
"entity_timestamp_column param "
|
|
584
|
+
"can not be specified without entity_rows param"
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
if isinstance(target, BaseStoreTarget) and not target.support_pandas:
|
|
588
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
589
|
+
f"get_offline_features does not support targets that do not support pandas engine."
|
|
590
|
+
f" Target kind: {target.kind}"
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
if update_stats:
|
|
594
|
+
feature_vector = _features_to_vector_and_check_permissions(
|
|
595
|
+
self, update_stats
|
|
596
|
+
)
|
|
597
|
+
else:
|
|
598
|
+
feature_vector = self
|
|
599
|
+
verify_feature_vector_permissions(
|
|
600
|
+
feature_vector, mlrun.common.schemas.AuthorizationAction.read
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
entity_timestamp_column = (
|
|
604
|
+
entity_timestamp_column or feature_vector.spec.timestamp_field
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
merger_engine = get_merger(engine)
|
|
608
|
+
|
|
609
|
+
if run_config and not run_config.local:
|
|
610
|
+
return run_merge_job(
|
|
611
|
+
feature_vector,
|
|
612
|
+
target,
|
|
613
|
+
merger_engine,
|
|
614
|
+
engine,
|
|
615
|
+
engine_args,
|
|
616
|
+
spark_service,
|
|
617
|
+
entity_rows,
|
|
618
|
+
entity_timestamp_column=entity_timestamp_column,
|
|
619
|
+
run_config=run_config,
|
|
620
|
+
drop_columns=drop_columns,
|
|
621
|
+
with_indexes=with_indexes,
|
|
622
|
+
query=query,
|
|
623
|
+
order_by=order_by,
|
|
624
|
+
start_time=start_time,
|
|
625
|
+
end_time=end_time,
|
|
626
|
+
timestamp_for_filtering=timestamp_for_filtering,
|
|
627
|
+
additional_filters=additional_filters,
|
|
628
|
+
)
|
|
809
629
|
|
|
810
|
-
|
|
811
|
-
|
|
630
|
+
merger = merger_engine(feature_vector, **(engine_args or {}))
|
|
631
|
+
return merger.start(
|
|
812
632
|
entity_rows,
|
|
813
633
|
entity_timestamp_column,
|
|
814
|
-
target,
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
with_indexes,
|
|
820
|
-
update_stats,
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
order_by,
|
|
825
|
-
spark_service,
|
|
826
|
-
timestamp_for_filtering,
|
|
827
|
-
additional_filters,
|
|
634
|
+
target=target,
|
|
635
|
+
drop_columns=drop_columns,
|
|
636
|
+
start_time=start_time,
|
|
637
|
+
end_time=end_time,
|
|
638
|
+
timestamp_for_filtering=timestamp_for_filtering,
|
|
639
|
+
with_indexes=with_indexes,
|
|
640
|
+
update_stats=update_stats,
|
|
641
|
+
query=query,
|
|
642
|
+
order_by=order_by,
|
|
643
|
+
additional_filters=additional_filters,
|
|
828
644
|
)
|
|
829
645
|
|
|
830
646
|
def get_online_feature_service(
|
|
831
647
|
self,
|
|
832
648
|
run_config: RunConfig = None,
|
|
833
649
|
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
834
|
-
impute_policy: dict = None,
|
|
650
|
+
impute_policy: typing.Optional[dict] = None,
|
|
835
651
|
update_stats: bool = False,
|
|
836
|
-
entity_keys: list[str] = None,
|
|
837
|
-
):
|
|
652
|
+
entity_keys: typing.Optional[list[str]] = None,
|
|
653
|
+
) -> OnlineVectorService:
|
|
838
654
|
"""initialize and return online feature vector service api,
|
|
839
655
|
returns :py:class:`~mlrun.feature_store.OnlineVectorService`
|
|
840
656
|
|
|
@@ -897,204 +713,14 @@ class FeatureVector(ModelObj):
|
|
|
897
713
|
:return: Initialize the `OnlineVectorService`.
|
|
898
714
|
Will be used in subclasses where `support_online=True`.
|
|
899
715
|
"""
|
|
900
|
-
|
|
901
|
-
self,
|
|
902
|
-
run_config,
|
|
903
|
-
fixed_window_type,
|
|
904
|
-
impute_policy,
|
|
905
|
-
update_stats,
|
|
906
|
-
entity_keys,
|
|
907
|
-
)
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
class OnlineVectorService:
|
|
911
|
-
"""get_online_feature_service response object"""
|
|
912
|
-
|
|
913
|
-
def __init__(
|
|
914
|
-
self,
|
|
915
|
-
vector,
|
|
916
|
-
graph,
|
|
917
|
-
index_columns,
|
|
918
|
-
impute_policy: dict = None,
|
|
919
|
-
requested_columns: list[str] = None,
|
|
920
|
-
):
|
|
921
|
-
self.vector = vector
|
|
922
|
-
self.impute_policy = impute_policy or {}
|
|
923
|
-
|
|
924
|
-
self._controller = graph.controller
|
|
925
|
-
self._index_columns = index_columns
|
|
926
|
-
self._impute_values = {}
|
|
927
|
-
self._requested_columns = requested_columns
|
|
928
|
-
|
|
929
|
-
def __enter__(self):
|
|
930
|
-
return self
|
|
931
|
-
|
|
932
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
933
|
-
self.close()
|
|
934
|
-
|
|
935
|
-
def initialize(self):
|
|
936
|
-
"""internal, init the feature service and prep the imputing logic"""
|
|
937
|
-
if not self.impute_policy:
|
|
938
|
-
return
|
|
939
|
-
|
|
940
|
-
impute_policy = copy(self.impute_policy)
|
|
941
|
-
vector = self.vector
|
|
942
|
-
feature_stats = vector.get_stats_table()
|
|
943
|
-
self._impute_values = {}
|
|
944
|
-
|
|
945
|
-
feature_keys = list(vector.status.features.keys())
|
|
946
|
-
if vector.status.label_column in feature_keys:
|
|
947
|
-
feature_keys.remove(vector.status.label_column)
|
|
948
|
-
|
|
949
|
-
if "*" in impute_policy:
|
|
950
|
-
value = impute_policy["*"]
|
|
951
|
-
del impute_policy["*"]
|
|
952
|
-
|
|
953
|
-
for name in feature_keys:
|
|
954
|
-
if name not in impute_policy:
|
|
955
|
-
if isinstance(value, str) and value.startswith("$"):
|
|
956
|
-
self._impute_values[name] = feature_stats.loc[name, value[1:]]
|
|
957
|
-
else:
|
|
958
|
-
self._impute_values[name] = value
|
|
959
|
-
|
|
960
|
-
for name, value in impute_policy.items():
|
|
961
|
-
if name not in feature_keys:
|
|
962
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
963
|
-
f"feature {name} in impute_policy but not in feature vector"
|
|
964
|
-
)
|
|
965
|
-
if isinstance(value, str) and value.startswith("$"):
|
|
966
|
-
self._impute_values[name] = feature_stats.loc[name, value[1:]]
|
|
967
|
-
else:
|
|
968
|
-
self._impute_values[name] = value
|
|
969
|
-
|
|
970
|
-
@property
|
|
971
|
-
def status(self):
|
|
972
|
-
"""vector merger function status (ready, running, error)"""
|
|
973
|
-
return "ready"
|
|
974
|
-
|
|
975
|
-
def get(self, entity_rows: list[Union[dict, list]], as_list=False):
|
|
976
|
-
"""get feature vector given the provided entity inputs
|
|
977
|
-
|
|
978
|
-
take a list of input vectors/rows and return a list of enriched feature vectors
|
|
979
|
-
each input and/or output vector can be a list of values or a dictionary of field names and values,
|
|
980
|
-
to return the vector as a list of values set the `as_list` to True.
|
|
981
|
-
|
|
982
|
-
if the input is a list of list (vs a list of dict), the values in the list will correspond to the
|
|
983
|
-
index/entity values, i.e. [["GOOG"], ["MSFT"]] means "GOOG" and "MSFT" are the index/entity fields.
|
|
984
|
-
|
|
985
|
-
example::
|
|
986
|
-
|
|
987
|
-
# accept list of dict, return list of dict
|
|
988
|
-
svc = fstore.get_online_feature_service(vector)
|
|
989
|
-
resp = svc.get([{"name": "joe"}, {"name": "mike"}])
|
|
990
|
-
|
|
991
|
-
# accept list of list, return list of list
|
|
992
|
-
svc = fstore.get_online_feature_service(vector, as_list=True)
|
|
993
|
-
resp = svc.get([["joe"], ["mike"]])
|
|
994
|
-
|
|
995
|
-
:param entity_rows: list of list/dict with input entity data/rows
|
|
996
|
-
:param as_list: return a list of list (list input is required by many ML frameworks)
|
|
997
|
-
"""
|
|
998
|
-
results = []
|
|
999
|
-
futures = []
|
|
1000
|
-
if isinstance(entity_rows, dict):
|
|
1001
|
-
entity_rows = [entity_rows]
|
|
1002
|
-
|
|
1003
|
-
# validate we have valid input struct
|
|
1004
|
-
if (
|
|
1005
|
-
not entity_rows
|
|
1006
|
-
or not isinstance(entity_rows, list)
|
|
1007
|
-
or not isinstance(entity_rows[0], (list, dict))
|
|
1008
|
-
):
|
|
1009
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1010
|
-
f"input data is of type {type(entity_rows)}. must be a list of lists or list of dicts"
|
|
1011
|
-
)
|
|
716
|
+
feature_vector = _features_to_vector_and_check_permissions(self, True)
|
|
1012
717
|
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
self._index_columns
|
|
1017
|
-
):
|
|
1018
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1019
|
-
"input list must be in the same size of the index_keys list"
|
|
1020
|
-
)
|
|
1021
|
-
index_range = range(len(self._index_columns))
|
|
1022
|
-
entity_rows = [
|
|
1023
|
-
{self._index_columns[i]: item[i] for i in index_range}
|
|
1024
|
-
for item in entity_rows
|
|
1025
|
-
]
|
|
1026
|
-
|
|
1027
|
-
for row in entity_rows:
|
|
1028
|
-
futures.append(self._controller.emit(row, return_awaitable_result=True))
|
|
1029
|
-
|
|
1030
|
-
for future in futures:
|
|
1031
|
-
result = future.await_result()
|
|
1032
|
-
data = result.body
|
|
1033
|
-
if data:
|
|
1034
|
-
actual_columns = data.keys()
|
|
1035
|
-
if all([col in self._index_columns for col in actual_columns]):
|
|
1036
|
-
# didn't get any data from the graph
|
|
1037
|
-
results.append(None)
|
|
1038
|
-
continue
|
|
1039
|
-
for column in self._requested_columns:
|
|
1040
|
-
if (
|
|
1041
|
-
column not in actual_columns
|
|
1042
|
-
and column != self.vector.status.label_column
|
|
1043
|
-
):
|
|
1044
|
-
data[column] = None
|
|
1045
|
-
|
|
1046
|
-
if self._impute_values:
|
|
1047
|
-
for name in data.keys():
|
|
1048
|
-
v = data[name]
|
|
1049
|
-
if v is None or (
|
|
1050
|
-
isinstance(v, float) and (np.isinf(v) or np.isnan(v))
|
|
1051
|
-
):
|
|
1052
|
-
data[name] = self._impute_values.get(name, v)
|
|
1053
|
-
if not self.vector.spec.with_indexes:
|
|
1054
|
-
for name in self.vector.status.index_keys:
|
|
1055
|
-
data.pop(name, None)
|
|
1056
|
-
if not any(data.values()):
|
|
1057
|
-
data = None
|
|
1058
|
-
|
|
1059
|
-
if as_list and data:
|
|
1060
|
-
data = [
|
|
1061
|
-
data.get(key, None)
|
|
1062
|
-
for key in self._requested_columns
|
|
1063
|
-
if key != self.vector.status.label_column
|
|
1064
|
-
]
|
|
1065
|
-
results.append(data)
|
|
1066
|
-
|
|
1067
|
-
return results
|
|
1068
|
-
|
|
1069
|
-
def close(self):
|
|
1070
|
-
"""terminate the async loop"""
|
|
1071
|
-
self._controller.terminate()
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
class OfflineVectorResponse:
|
|
1075
|
-
"""get_offline_features response object"""
|
|
1076
|
-
|
|
1077
|
-
def __init__(self, merger):
|
|
1078
|
-
self._merger = merger
|
|
1079
|
-
self.vector = merger.vector
|
|
718
|
+
engine_args = {"impute_policy": impute_policy}
|
|
719
|
+
merger_engine = get_merger("storey")
|
|
720
|
+
# todo: support remote service (using remote nuclio/mlrun function if run_config)
|
|
1080
721
|
|
|
1081
|
-
|
|
1082
|
-
def status(self):
|
|
1083
|
-
"""vector prep job status (ready, running, error)"""
|
|
1084
|
-
return self._merger.get_status()
|
|
1085
|
-
|
|
1086
|
-
def to_dataframe(self, to_pandas=True):
|
|
1087
|
-
"""return result as dataframe"""
|
|
1088
|
-
if self.status != "completed":
|
|
1089
|
-
raise mlrun.errors.MLRunTaskNotReadyError(
|
|
1090
|
-
"feature vector dataset is not ready"
|
|
1091
|
-
)
|
|
1092
|
-
return self._merger.get_df(to_pandas=to_pandas)
|
|
722
|
+
merger = merger_engine(feature_vector, **engine_args)
|
|
1093
723
|
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
def to_csv(self, target_path, **kw):
|
|
1099
|
-
"""return results as csv file"""
|
|
1100
|
-
return self._merger.to_csv(target_path, **kw)
|
|
724
|
+
return merger.init_online_vector_service(
|
|
725
|
+
entity_keys, fixed_window_type, update_stats=True
|
|
726
|
+
)
|