mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -88,6 +88,7 @@ class BaseMerger(abc.ABC):
|
|
|
88
88
|
update_stats=None,
|
|
89
89
|
query=None,
|
|
90
90
|
order_by=None,
|
|
91
|
+
additional_filters=None,
|
|
91
92
|
):
|
|
92
93
|
self._target = target
|
|
93
94
|
|
|
@@ -134,6 +135,7 @@ class BaseMerger(abc.ABC):
|
|
|
134
135
|
timestamp_for_filtering=timestamp_for_filtering,
|
|
135
136
|
query=query,
|
|
136
137
|
order_by=order_by,
|
|
138
|
+
additional_filters=additional_filters,
|
|
137
139
|
)
|
|
138
140
|
|
|
139
141
|
def _write_to_offline_target(self, timestamp_key=None):
|
|
@@ -186,6 +188,7 @@ class BaseMerger(abc.ABC):
|
|
|
186
188
|
timestamp_for_filtering=None,
|
|
187
189
|
query=None,
|
|
188
190
|
order_by=None,
|
|
191
|
+
additional_filters=None,
|
|
189
192
|
):
|
|
190
193
|
self._create_engine_env()
|
|
191
194
|
|
|
@@ -212,7 +215,7 @@ class BaseMerger(abc.ABC):
|
|
|
212
215
|
feature_sets.append(None)
|
|
213
216
|
join_types.append(None)
|
|
214
217
|
|
|
215
|
-
|
|
218
|
+
timestamp_filtered = False
|
|
216
219
|
for step in join_graph.steps:
|
|
217
220
|
name = step.right_feature_set_name
|
|
218
221
|
feature_set = feature_set_objects[name]
|
|
@@ -250,7 +253,7 @@ class BaseMerger(abc.ABC):
|
|
|
250
253
|
if self._drop_indexes:
|
|
251
254
|
self._append_drop_column(time_column)
|
|
252
255
|
if (start_time or end_time) and time_column:
|
|
253
|
-
|
|
256
|
+
timestamp_filtered = True
|
|
254
257
|
|
|
255
258
|
df = self._get_engine_df(
|
|
256
259
|
feature_set,
|
|
@@ -259,6 +262,7 @@ class BaseMerger(abc.ABC):
|
|
|
259
262
|
start_time if time_column else None,
|
|
260
263
|
end_time if time_column else None,
|
|
261
264
|
time_column,
|
|
265
|
+
additional_filters,
|
|
262
266
|
)
|
|
263
267
|
|
|
264
268
|
fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
|
|
@@ -302,8 +306,8 @@ class BaseMerger(abc.ABC):
|
|
|
302
306
|
new_columns.append((column, alias))
|
|
303
307
|
self._update_alias(dictionary={name: alias for name, alias in new_columns})
|
|
304
308
|
|
|
305
|
-
# None of the feature sets was filtered as required
|
|
306
|
-
if not
|
|
309
|
+
# None of the feature sets was timestamp filtered as required
|
|
310
|
+
if not timestamp_filtered and (start_time or end_time):
|
|
307
311
|
raise mlrun.errors.MLRunRuntimeError(
|
|
308
312
|
"start_time and end_time can only be provided in conjunction with "
|
|
309
313
|
"a timestamp column, or when the at least one feature_set has a timestamp key"
|
|
@@ -540,8 +544,8 @@ class BaseMerger(abc.ABC):
|
|
|
540
544
|
self,
|
|
541
545
|
name: str,
|
|
542
546
|
order: int,
|
|
543
|
-
left_keys:
|
|
544
|
-
right_keys:
|
|
547
|
+
left_keys: list[str] = None,
|
|
548
|
+
right_keys: list[str] = None,
|
|
545
549
|
):
|
|
546
550
|
self.name = name
|
|
547
551
|
self.left_keys = left_keys if left_keys is not None else []
|
|
@@ -750,11 +754,12 @@ class BaseMerger(abc.ABC):
|
|
|
750
754
|
def _get_engine_df(
|
|
751
755
|
self,
|
|
752
756
|
feature_set: FeatureSet,
|
|
753
|
-
feature_set_name:
|
|
754
|
-
column_names:
|
|
757
|
+
feature_set_name: list[str],
|
|
758
|
+
column_names: list[str] = None,
|
|
755
759
|
start_time: typing.Union[str, datetime] = None,
|
|
756
760
|
end_time: typing.Union[str, datetime] = None,
|
|
757
761
|
time_column: typing.Optional[str] = None,
|
|
762
|
+
additional_filters=None,
|
|
758
763
|
):
|
|
759
764
|
"""
|
|
760
765
|
Return the feature_set data frame according to the args
|
|
@@ -773,8 +778,8 @@ class BaseMerger(abc.ABC):
|
|
|
773
778
|
def _rename_columns_and_select(
|
|
774
779
|
self,
|
|
775
780
|
df,
|
|
776
|
-
rename_col_dict:
|
|
777
|
-
columns:
|
|
781
|
+
rename_col_dict: dict[str, str],
|
|
782
|
+
columns: list[str] = None,
|
|
778
783
|
):
|
|
779
784
|
"""
|
|
780
785
|
rename the columns of the df according to rename_col_dict, and select only `columns` if it is not none
|
|
@@ -801,7 +806,7 @@ class BaseMerger(abc.ABC):
|
|
|
801
806
|
"""
|
|
802
807
|
raise NotImplementedError
|
|
803
808
|
|
|
804
|
-
def _order_by(self, order_by_active:
|
|
809
|
+
def _order_by(self, order_by_active: list[str]):
|
|
805
810
|
"""
|
|
806
811
|
Order by `order_by_active` along all axis.
|
|
807
812
|
|
|
@@ -145,6 +145,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
145
145
|
start_time=None,
|
|
146
146
|
end_time=None,
|
|
147
147
|
time_column=None,
|
|
148
|
+
additional_filters=None,
|
|
148
149
|
):
|
|
149
150
|
import dask.dataframe as dd
|
|
150
151
|
|
|
@@ -155,6 +156,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
155
156
|
end_time=end_time,
|
|
156
157
|
time_column=time_column,
|
|
157
158
|
index=False,
|
|
159
|
+
additional_filters=additional_filters,
|
|
158
160
|
)
|
|
159
161
|
|
|
160
162
|
return self._reset_index(df).persist()
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import uuid
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
|
+
import mlrun.common.constants as mlrun_constants
|
|
18
19
|
from mlrun.config import config as mlconf
|
|
19
20
|
from mlrun.model import DataTargetBase, new_task
|
|
20
21
|
from mlrun.runtimes.function_reference import FunctionReference
|
|
@@ -42,6 +43,7 @@ def run_merge_job(
|
|
|
42
43
|
start_time=None,
|
|
43
44
|
end_time=None,
|
|
44
45
|
timestamp_for_filtering=None,
|
|
46
|
+
additional_filters=None,
|
|
45
47
|
):
|
|
46
48
|
name = vector.metadata.name
|
|
47
49
|
if not target or not hasattr(target, "to_dict"):
|
|
@@ -116,11 +118,14 @@ def run_merge_job(
|
|
|
116
118
|
"end_time": end_time,
|
|
117
119
|
"timestamp_for_filtering": timestamp_for_filtering,
|
|
118
120
|
"engine_args": engine_args,
|
|
121
|
+
"additional_filters": additional_filters,
|
|
119
122
|
},
|
|
120
123
|
inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
|
|
121
124
|
)
|
|
122
125
|
task.spec.secret_sources = run_config.secret_sources
|
|
123
|
-
task.set_label(
|
|
126
|
+
task.set_label(
|
|
127
|
+
mlrun_constants.MLRunInternalLabels.job_type, "feature-merge"
|
|
128
|
+
).set_label(mlrun_constants.MLRunInternalLabels.feature_vector, vector.uri)
|
|
124
129
|
task.metadata.uid = uuid.uuid4().hex
|
|
125
130
|
vector.status.run_uri = task.metadata.uid
|
|
126
131
|
vector.save()
|
|
@@ -151,7 +156,9 @@ class RemoteVectorResponse:
|
|
|
151
156
|
|
|
152
157
|
def _is_ready(self):
|
|
153
158
|
if self.status != "completed":
|
|
154
|
-
raise mlrun.errors.
|
|
159
|
+
raise mlrun.errors.MLRunTaskNotReadyError(
|
|
160
|
+
"feature vector dataset is not ready"
|
|
161
|
+
)
|
|
155
162
|
self.vector.reload()
|
|
156
163
|
|
|
157
164
|
def to_dataframe(self, columns=None, df_module=None, **kwargs):
|
|
@@ -176,6 +183,7 @@ class RemoteVectorResponse:
|
|
|
176
183
|
file_format = kwargs.get("format")
|
|
177
184
|
if not file_format:
|
|
178
185
|
file_format = self.run.status.results["target"]["kind"]
|
|
186
|
+
|
|
179
187
|
df = mlrun.get_dataitem(self.target_uri).as_df(
|
|
180
188
|
columns=columns, df_module=df_module, format=file_format, **kwargs
|
|
181
189
|
)
|
|
@@ -196,7 +204,8 @@ import mlrun.feature_store.retrieval
|
|
|
196
204
|
from mlrun.datastore.targets import get_target_driver
|
|
197
205
|
def merge_handler(context, vector_uri, target, entity_rows=None,
|
|
198
206
|
entity_timestamp_column=None, drop_columns=None, with_indexes=None, query=None,
|
|
199
|
-
engine_args=None, order_by=None, start_time=None, end_time=None, timestamp_for_filtering=None
|
|
207
|
+
engine_args=None, order_by=None, start_time=None, end_time=None, timestamp_for_filtering=None,
|
|
208
|
+
additional_filters=None):
|
|
200
209
|
vector = context.get_store_resource(vector_uri)
|
|
201
210
|
store_target = get_target_driver(target, vector)
|
|
202
211
|
if entity_rows:
|
|
@@ -206,7 +215,7 @@ def merge_handler(context, vector_uri, target, entity_rows=None,
|
|
|
206
215
|
merger = mlrun.feature_store.retrieval.{{{engine}}}(vector, **(engine_args or {}))
|
|
207
216
|
merger.start(entity_rows, entity_timestamp_column, store_target, drop_columns, with_indexes=with_indexes,
|
|
208
217
|
query=query, order_by=order_by, start_time=start_time, end_time=end_time,
|
|
209
|
-
timestamp_for_filtering=timestamp_for_filtering)
|
|
218
|
+
timestamp_for_filtering=timestamp_for_filtering, additional_filters=additional_filters)
|
|
210
219
|
|
|
211
220
|
target = vector.status.targets[store_target.name].to_dict()
|
|
212
221
|
context.log_result('feature_vector', vector.uri)
|
|
@@ -114,12 +114,14 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
114
114
|
start_time=None,
|
|
115
115
|
end_time=None,
|
|
116
116
|
time_column=None,
|
|
117
|
+
additional_filters=None,
|
|
117
118
|
):
|
|
118
119
|
df = feature_set.to_dataframe(
|
|
119
120
|
columns=column_names,
|
|
120
121
|
start_time=start_time,
|
|
121
122
|
end_time=end_time,
|
|
122
123
|
time_column=time_column,
|
|
124
|
+
additional_filters=additional_filters,
|
|
123
125
|
)
|
|
124
126
|
if df.index.names[0]:
|
|
125
127
|
df.reset_index(inplace=True)
|
|
@@ -12,16 +12,17 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
|
|
16
|
-
import semver
|
|
15
|
+
|
|
17
16
|
|
|
18
17
|
import mlrun
|
|
18
|
+
from mlrun.data_types.to_pandas import spark_df_to_pandas
|
|
19
|
+
from mlrun.datastore.sources import ParquetSource
|
|
19
20
|
from mlrun.datastore.targets import get_offline_target
|
|
21
|
+
from mlrun.runtimes import RemoteSparkRuntime
|
|
22
|
+
from mlrun.runtimes.sparkjob import Spark3Runtime
|
|
23
|
+
from mlrun.utils.helpers import additional_filters_warning
|
|
20
24
|
|
|
21
|
-
from ...runtimes import RemoteSparkRuntime
|
|
22
|
-
from ...runtimes.sparkjob import Spark3Runtime
|
|
23
25
|
from .base import BaseMerger
|
|
24
|
-
from .conversion import PandasConversionMixin
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class SparkFeatureMerger(BaseMerger):
|
|
@@ -166,29 +167,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
166
167
|
def get_df(self, to_pandas=True):
|
|
167
168
|
if to_pandas:
|
|
168
169
|
if self._pandas_df is None:
|
|
169
|
-
df = self._result_df
|
|
170
|
-
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
171
|
-
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
172
|
-
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
173
|
-
if semver.parse(pd.__version__)["major"] >= 2:
|
|
174
|
-
import pyspark.sql.functions as pyspark_functions
|
|
175
|
-
|
|
176
|
-
type_conversion_dict = {}
|
|
177
|
-
for field in df.schema.fields:
|
|
178
|
-
if str(field.dataType) == "TimestampType":
|
|
179
|
-
df = df.withColumn(
|
|
180
|
-
field.name,
|
|
181
|
-
pyspark_functions.date_format(
|
|
182
|
-
pyspark_functions.to_timestamp(field.name),
|
|
183
|
-
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
184
|
-
),
|
|
185
|
-
)
|
|
186
|
-
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
187
|
-
df = PandasConversionMixin.toPandas(df)
|
|
188
|
-
if type_conversion_dict:
|
|
189
|
-
df = df.astype(type_conversion_dict)
|
|
190
|
-
else:
|
|
191
|
-
df = PandasConversionMixin.toPandas(df)
|
|
170
|
+
df = spark_df_to_pandas(self._result_df)
|
|
192
171
|
self._pandas_df = df
|
|
193
172
|
self._set_indexes(self._pandas_df)
|
|
194
173
|
return self._pandas_df
|
|
@@ -209,9 +188,13 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
209
188
|
|
|
210
189
|
if self.spark is None:
|
|
211
190
|
# create spark context
|
|
212
|
-
self.spark =
|
|
213
|
-
|
|
214
|
-
|
|
191
|
+
self.spark = (
|
|
192
|
+
SparkSession.builder.appName(
|
|
193
|
+
f"vector-merger-{self.vector.metadata.name}"
|
|
194
|
+
)
|
|
195
|
+
.config("spark.driver.memory", "2g")
|
|
196
|
+
.getOrCreate()
|
|
197
|
+
)
|
|
215
198
|
|
|
216
199
|
def _get_engine_df(
|
|
217
200
|
self,
|
|
@@ -221,6 +204,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
221
204
|
start_time=None,
|
|
222
205
|
end_time=None,
|
|
223
206
|
time_column=None,
|
|
207
|
+
additional_filters=None,
|
|
224
208
|
):
|
|
225
209
|
source_kwargs = {}
|
|
226
210
|
if feature_set.spec.passthrough:
|
|
@@ -231,6 +215,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
231
215
|
source_kind = feature_set.spec.source.kind
|
|
232
216
|
source_path = feature_set.spec.source.path
|
|
233
217
|
source_kwargs.update(feature_set.spec.source.attributes)
|
|
218
|
+
source_kwargs.pop("additional_filters", None)
|
|
234
219
|
else:
|
|
235
220
|
target = get_offline_target(feature_set)
|
|
236
221
|
if not target:
|
|
@@ -239,17 +224,24 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
239
224
|
)
|
|
240
225
|
source_kind = target.kind
|
|
241
226
|
source_path = target.get_target_path()
|
|
242
|
-
|
|
227
|
+
source_kwargs = target.source_spark_attributes
|
|
243
228
|
# handling case where there are multiple feature sets and user creates vector where
|
|
244
229
|
# entity_timestamp_column is from a specific feature set (can't be entity timestamp)
|
|
245
230
|
source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
|
|
246
231
|
|
|
232
|
+
if source_driver != ParquetSource:
|
|
233
|
+
additional_filters_warning(additional_filters, source_driver)
|
|
234
|
+
additional_filters = None
|
|
235
|
+
additional_filters_dict = (
|
|
236
|
+
{"additional_filters": additional_filters} if additional_filters else {}
|
|
237
|
+
)
|
|
247
238
|
source = source_driver(
|
|
248
239
|
name=self.vector.metadata.name,
|
|
249
240
|
path=source_path,
|
|
250
241
|
time_field=time_column,
|
|
251
242
|
start_time=start_time,
|
|
252
243
|
end_time=end_time,
|
|
244
|
+
**additional_filters_dict,
|
|
253
245
|
**source_kwargs,
|
|
254
246
|
)
|
|
255
247
|
|
mlrun/feature_store/steps.py
CHANGED
|
@@ -16,7 +16,7 @@ import math
|
|
|
16
16
|
import re
|
|
17
17
|
import uuid
|
|
18
18
|
from collections import OrderedDict
|
|
19
|
-
from typing import Any,
|
|
19
|
+
from typing import Any, Optional, Union
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
import pandas as pd
|
|
@@ -92,8 +92,6 @@ class MLRunStep(MapClass):
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
class FeaturesetValidator(StepToDict, MLRunStep):
|
|
95
|
-
"""Validate feature values according to the feature set validation policy"""
|
|
96
|
-
|
|
97
95
|
def __init__(self, featureset=None, columns=None, name=None, **kwargs):
|
|
98
96
|
"""Validate feature values according to the feature set validation policy
|
|
99
97
|
|
|
@@ -152,11 +150,9 @@ class FeaturesetValidator(StepToDict, MLRunStep):
|
|
|
152
150
|
|
|
153
151
|
|
|
154
152
|
class MapValues(StepToDict, MLRunStep):
|
|
155
|
-
"""Map column values to new values"""
|
|
156
|
-
|
|
157
153
|
def __init__(
|
|
158
154
|
self,
|
|
159
|
-
mapping:
|
|
155
|
+
mapping: dict[str, dict[Union[str, int, bool], Any]],
|
|
160
156
|
with_original_features: bool = False,
|
|
161
157
|
suffix: str = "mapped",
|
|
162
158
|
**kwargs,
|
|
@@ -166,13 +162,19 @@ class MapValues(StepToDict, MLRunStep):
|
|
|
166
162
|
example::
|
|
167
163
|
|
|
168
164
|
# replace the value "U" with '0' in the age column
|
|
169
|
-
graph.to(MapValues(mapping={
|
|
165
|
+
graph.to(MapValues(mapping={"age": {"U": "0"}}, with_original_features=True))
|
|
170
166
|
|
|
171
167
|
# replace integers, example
|
|
172
|
-
graph.to(MapValues(mapping={
|
|
168
|
+
graph.to(MapValues(mapping={"not": {0: 1, 1: 0}}))
|
|
173
169
|
|
|
174
170
|
# replace by range, use -inf and inf for extended range
|
|
175
|
-
graph.to(
|
|
171
|
+
graph.to(
|
|
172
|
+
MapValues(
|
|
173
|
+
mapping={
|
|
174
|
+
"numbers": {"ranges": {"negative": [-inf, 0], "positive": [0, inf]}}
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
)
|
|
176
178
|
|
|
177
179
|
:param mapping: a dict with entry per column and the associated old/new values map
|
|
178
180
|
:param with_original_features: set to True to keep the original features
|
|
@@ -377,7 +379,7 @@ class Imputer(StepToDict, MLRunStep):
|
|
|
377
379
|
self,
|
|
378
380
|
method: str = "avg",
|
|
379
381
|
default_value=None,
|
|
380
|
-
mapping:
|
|
382
|
+
mapping: dict[str, Any] = None,
|
|
381
383
|
**kwargs,
|
|
382
384
|
):
|
|
383
385
|
"""Replace None values with default values
|
|
@@ -423,13 +425,15 @@ class Imputer(StepToDict, MLRunStep):
|
|
|
423
425
|
|
|
424
426
|
|
|
425
427
|
class OneHotEncoder(StepToDict, MLRunStep):
|
|
426
|
-
def __init__(self, mapping:
|
|
428
|
+
def __init__(self, mapping: dict[str, list[Union[int, str]]], **kwargs):
|
|
427
429
|
"""Create new binary fields, one per category (one hot encoded)
|
|
428
430
|
|
|
429
431
|
example::
|
|
430
432
|
|
|
431
|
-
mapping = {
|
|
432
|
-
|
|
433
|
+
mapping = {
|
|
434
|
+
"category": ["food", "health", "transportation"],
|
|
435
|
+
"gender": ["male", "female"],
|
|
436
|
+
}
|
|
433
437
|
graph.to(OneHotEncoder(mapping=one_hot_encoder_mapping))
|
|
434
438
|
|
|
435
439
|
:param mapping: a dict of per column categories (to map to binary fields)
|
|
@@ -510,15 +514,13 @@ class OneHotEncoder(StepToDict, MLRunStep):
|
|
|
510
514
|
|
|
511
515
|
|
|
512
516
|
class DateExtractor(StepToDict, MLRunStep):
|
|
513
|
-
"""Date Extractor allows you to extract a date-time component"""
|
|
514
|
-
|
|
515
517
|
def __init__(
|
|
516
518
|
self,
|
|
517
|
-
parts: Union[
|
|
519
|
+
parts: Union[dict[str, str], list[str]],
|
|
518
520
|
timestamp_col: str = None,
|
|
519
521
|
**kwargs,
|
|
520
522
|
):
|
|
521
|
-
"""Date Extractor
|
|
523
|
+
"""Date Extractor extracts a date-time component into new columns
|
|
522
524
|
|
|
523
525
|
The extracted date part will appear as `<timestamp_col>_<date_part>` feature.
|
|
524
526
|
|
|
@@ -548,10 +550,12 @@ class DateExtractor(StepToDict, MLRunStep):
|
|
|
548
550
|
|
|
549
551
|
# (taken from the fraud-detection end-to-end feature store demo)
|
|
550
552
|
# Define the Transactions FeatureSet
|
|
551
|
-
transaction_set = fstore.FeatureSet(
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
553
|
+
transaction_set = fstore.FeatureSet(
|
|
554
|
+
"transactions",
|
|
555
|
+
entities=[fstore.Entity("source")],
|
|
556
|
+
timestamp_key="timestamp",
|
|
557
|
+
description="transactions feature set",
|
|
558
|
+
)
|
|
555
559
|
|
|
556
560
|
# Get FeatureSet computation graph
|
|
557
561
|
transaction_graph = transaction_set.graph
|
|
@@ -559,11 +563,11 @@ class DateExtractor(StepToDict, MLRunStep):
|
|
|
559
563
|
# Add the custom `DateExtractor` step
|
|
560
564
|
# to the computation graph
|
|
561
565
|
transaction_graph.to(
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
566
|
+
class_name="DateExtractor",
|
|
567
|
+
name="Extract Dates",
|
|
568
|
+
parts=["hour", "day_of_week"],
|
|
569
|
+
timestamp_col="timestamp",
|
|
570
|
+
)
|
|
567
571
|
|
|
568
572
|
:param parts: list of pandas style date-time parts you want to extract.
|
|
569
573
|
:param timestamp_col: The name of the column containing the timestamps to extract from,
|
|
@@ -629,8 +633,6 @@ class DateExtractor(StepToDict, MLRunStep):
|
|
|
629
633
|
|
|
630
634
|
|
|
631
635
|
class SetEventMetadata(MapClass):
|
|
632
|
-
"""Set the event metadata (id and key) from the event body"""
|
|
633
|
-
|
|
634
636
|
def __init__(
|
|
635
637
|
self,
|
|
636
638
|
id_path: Optional[str] = None,
|
|
@@ -695,18 +697,19 @@ class SetEventMetadata(MapClass):
|
|
|
695
697
|
|
|
696
698
|
|
|
697
699
|
class DropFeatures(StepToDict, MLRunStep):
|
|
698
|
-
def __init__(self, features:
|
|
700
|
+
def __init__(self, features: list[str], **kwargs):
|
|
699
701
|
"""Drop all the features from feature list
|
|
700
702
|
|
|
701
703
|
:param features: string list of the features names to drop
|
|
702
704
|
|
|
703
705
|
example::
|
|
704
706
|
|
|
705
|
-
feature_set = fstore.FeatureSet(
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
707
|
+
feature_set = fstore.FeatureSet(
|
|
708
|
+
"fs-new",
|
|
709
|
+
entities=[fstore.Entity("id")],
|
|
710
|
+
description="feature set",
|
|
711
|
+
engine="pandas",
|
|
712
|
+
)
|
|
710
713
|
# Pre-processing graph steps
|
|
711
714
|
feature_set.graph.to(DropFeatures(features=["age"]))
|
|
712
715
|
df_pandas = feature_set.ingest(data)
|
|
@@ -740,3 +743,11 @@ class DropFeatures(StepToDict, MLRunStep):
|
|
|
740
743
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
741
744
|
f"DropFeatures can only drop features, not entities: {dropped_entities}"
|
|
742
745
|
)
|
|
746
|
+
if feature_set.spec.label_column in features:
|
|
747
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
748
|
+
f"DropFeatures can not drop label_column: {feature_set.spec.label_column}"
|
|
749
|
+
)
|
|
750
|
+
if feature_set.spec.timestamp_key in features:
|
|
751
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
752
|
+
f"DropFeatures can not drop timestamp_key: {feature_set.spec.timestamp_key}"
|
|
753
|
+
)
|
mlrun/features.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#
|
|
15
15
|
import math
|
|
16
16
|
import re
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from .data_types import ValueType, python_type_to_value_type
|
|
20
20
|
from .errors import MLRunRuntimeError, err_to_str
|
|
@@ -44,7 +44,7 @@ class Entity(ModelObj):
|
|
|
44
44
|
name: str = None,
|
|
45
45
|
value_type: Union[ValueType, str] = None,
|
|
46
46
|
description: str = None,
|
|
47
|
-
labels: Optional[
|
|
47
|
+
labels: Optional[dict[str, str]] = None,
|
|
48
48
|
):
|
|
49
49
|
"""data entity (index key)
|
|
50
50
|
|
|
@@ -65,8 +65,6 @@ class Entity(ModelObj):
|
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
class Feature(ModelObj):
|
|
68
|
-
"""data feature"""
|
|
69
|
-
|
|
70
68
|
_dict_fields = [
|
|
71
69
|
"name",
|
|
72
70
|
"description",
|
|
@@ -82,13 +80,13 @@ class Feature(ModelObj):
|
|
|
82
80
|
def __init__(
|
|
83
81
|
self,
|
|
84
82
|
value_type: Union[ValueType, str] = None,
|
|
85
|
-
dims:
|
|
83
|
+
dims: list[int] = None,
|
|
86
84
|
description: str = None,
|
|
87
85
|
aggregate: bool = None,
|
|
88
86
|
name: str = None,
|
|
89
87
|
validator=None,
|
|
90
88
|
default: str = None,
|
|
91
|
-
labels:
|
|
89
|
+
labels: dict[str, str] = None,
|
|
92
90
|
):
|
|
93
91
|
"""data feature
|
|
94
92
|
|
|
@@ -102,7 +100,8 @@ class Feature(ModelObj):
|
|
|
102
100
|
:param name: name of the feature
|
|
103
101
|
:param validator: feature validation policy
|
|
104
102
|
:param default: default value
|
|
105
|
-
:param labels: a set of key/value labels (tags)
|
|
103
|
+
:param labels: a set of key/value labels (tags). Labels can be used to filter featues, for example,
|
|
104
|
+
in the UI Feature store page.
|
|
106
105
|
"""
|
|
107
106
|
self.name = name or ""
|
|
108
107
|
if isinstance(value_type, ValueType):
|
|
@@ -240,10 +239,7 @@ class Validator(ModelObj):
|
|
|
240
239
|
from mlrun.features import Validator
|
|
241
240
|
|
|
242
241
|
# Add validator to the feature 'bid' with check type
|
|
243
|
-
quotes_set["bid"].validator = Validator(
|
|
244
|
-
check_type=True,
|
|
245
|
-
severity="info"
|
|
246
|
-
)
|
|
242
|
+
quotes_set["bid"].validator = Validator(check_type=True, severity="info")
|
|
247
243
|
|
|
248
244
|
:param check_type: check feature type e.g. True, False
|
|
249
245
|
:param severity: severity name e.g. info, warning, etc.
|
|
@@ -282,10 +278,7 @@ class MinMaxValidator(Validator):
|
|
|
282
278
|
|
|
283
279
|
# Add validator to the feature 'bid', where valid
|
|
284
280
|
# minimal value is 52
|
|
285
|
-
quotes_set["bid"].validator = MinMaxValidator(
|
|
286
|
-
min=52,
|
|
287
|
-
severity="info"
|
|
288
|
-
)
|
|
281
|
+
quotes_set["bid"].validator = MinMaxValidator(min=52, severity="info")
|
|
289
282
|
|
|
290
283
|
:param check_type: check feature type e.g. True, False
|
|
291
284
|
:param severity: severity name e.g. info, warning, etc.
|
|
@@ -346,9 +339,7 @@ class MinMaxLenValidator(Validator):
|
|
|
346
339
|
# Add length validator to the feature 'ticker', where valid
|
|
347
340
|
# minimal length is 1 and maximal length is 10
|
|
348
341
|
quotes_set["ticker"].validator = MinMaxLenValidator(
|
|
349
|
-
min=1,
|
|
350
|
-
max=10,
|
|
351
|
-
severity="info"
|
|
342
|
+
min=1, max=10, severity="info"
|
|
352
343
|
)
|
|
353
344
|
|
|
354
345
|
:param check_type: check feature type e.g. True, False
|
|
@@ -410,8 +401,7 @@ class RegexValidator(Validator):
|
|
|
410
401
|
# expression '(\b[A-Za-z]{1}[0-9]{7}\b)' where valid values are
|
|
411
402
|
# e.g. A1234567, z9874563, etc.
|
|
412
403
|
quotes_set["name"].validator = RegexValidator(
|
|
413
|
-
regex=r"(\b[A-Za-z]{1}[0-9]{7}\b)",
|
|
414
|
-
severity="info"
|
|
404
|
+
regex=r"(\b[A-Za-z]{1}[0-9]{7}\b)", severity="info"
|
|
415
405
|
)
|
|
416
406
|
|
|
417
407
|
:param check_type: check feature type e.g. True, False
|
|
@@ -445,7 +435,7 @@ class RegexValidator(Validator):
|
|
|
445
435
|
|
|
446
436
|
@classmethod
|
|
447
437
|
def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
|
|
448
|
-
new_obj = super(
|
|
438
|
+
new_obj = super().from_dict(
|
|
449
439
|
struct=struct, fields=fields, deprecated_fields=deprecated_fields
|
|
450
440
|
)
|
|
451
441
|
if hasattr(new_obj, "regex"):
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Union
|
|
17
17
|
|
|
18
18
|
import mlrun
|
|
19
19
|
|
|
@@ -39,12 +39,12 @@ class ArtifactsLibrary(ABC):
|
|
|
39
39
|
@classmethod
|
|
40
40
|
def get_plans(
|
|
41
41
|
cls,
|
|
42
|
-
artifacts: Union[
|
|
42
|
+
artifacts: Union[list[Plan], dict[str, dict], list[str]] = None,
|
|
43
43
|
context: mlrun.MLClientCtx = None,
|
|
44
44
|
include_default: bool = True,
|
|
45
45
|
# custom_plans: dict = None, :param custom_plans: Custom user plans objects to initialize from.
|
|
46
46
|
**default_kwargs,
|
|
47
|
-
) ->
|
|
47
|
+
) -> list[Plan]:
|
|
48
48
|
"""
|
|
49
49
|
Get plans for a run. The plans will be taken from the provided artifacts / configuration via code, from provided
|
|
50
50
|
configuration via MLRun context and if the 'include_default' is True, from the framework artifact library's
|
|
@@ -97,7 +97,7 @@ class ArtifactsLibrary(ABC):
|
|
|
97
97
|
|
|
98
98
|
@classmethod
|
|
99
99
|
@abstractmethod
|
|
100
|
-
def default(cls, **kwargs) ->
|
|
100
|
+
def default(cls, **kwargs) -> list[Plan]:
|
|
101
101
|
"""
|
|
102
102
|
Get the default artifacts plans list of this framework's library.
|
|
103
103
|
|
|
@@ -106,7 +106,7 @@ class ArtifactsLibrary(ABC):
|
|
|
106
106
|
pass
|
|
107
107
|
|
|
108
108
|
@classmethod
|
|
109
|
-
def _get_library_plans(cls) ->
|
|
109
|
+
def _get_library_plans(cls) -> dict[str, type[Plan]]:
|
|
110
110
|
"""
|
|
111
111
|
Get all the supported plans in this library.
|
|
112
112
|
|
|
@@ -120,8 +120,8 @@ class ArtifactsLibrary(ABC):
|
|
|
120
120
|
|
|
121
121
|
@staticmethod
|
|
122
122
|
def _from_dict(
|
|
123
|
-
requested_plans:
|
|
124
|
-
) ->
|
|
123
|
+
requested_plans: dict[str, dict], available_plans: dict[str, type[Plan]]
|
|
124
|
+
) -> list[Plan]:
|
|
125
125
|
"""
|
|
126
126
|
Initialize a list of plans from a given configuration dictionary. The configuration is expected to be a
|
|
127
127
|
dictionary of plans and their initialization parameters in the following format:
|
|
@@ -162,8 +162,8 @@ class ArtifactsLibrary(ABC):
|
|
|
162
162
|
|
|
163
163
|
@staticmethod
|
|
164
164
|
def _from_list(
|
|
165
|
-
requested_plans:
|
|
166
|
-
) ->
|
|
165
|
+
requested_plans: list[str], available_plans: dict[str, type[Plan]]
|
|
166
|
+
) -> list[Plan]:
|
|
167
167
|
"""
|
|
168
168
|
Initialize a list of plans from a given configuration list. The configuration is expected to be a list of plans
|
|
169
169
|
names to be initialized with their default configuration.
|