mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -2
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +21 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +113 -2
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +11 -0
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +224 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +374 -102
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +231 -22
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +864 -228
- mlrun/db/nopdb.py +268 -16
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1125 -414
- mlrun/render.py +28 -22
- mlrun/run.py +207 -180
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +40 -14
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +646 -177
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc5.dist-info/METADATA +0 -269
- mlrun-1.7.0rc5.dist-info/RECORD +0 -323
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
|
@@ -337,7 +337,10 @@ class FeatureSet(ModelObj):
|
|
|
337
337
|
example::
|
|
338
338
|
|
|
339
339
|
import mlrun.feature_store as fstore
|
|
340
|
-
|
|
340
|
+
|
|
341
|
+
ticks = fstore.FeatureSet(
|
|
342
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
343
|
+
)
|
|
341
344
|
ticks.ingest(df)
|
|
342
345
|
|
|
343
346
|
:param name: name of the feature set
|
|
@@ -625,12 +628,12 @@ class FeatureSet(ModelObj):
|
|
|
625
628
|
|
|
626
629
|
import mlrun.feature_store as fstore
|
|
627
630
|
|
|
628
|
-
ticks = fstore.FeatureSet(
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
ticks.add_entity(
|
|
632
|
-
|
|
633
|
-
|
|
631
|
+
ticks = fstore.FeatureSet(
|
|
632
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
633
|
+
)
|
|
634
|
+
ticks.add_entity(
|
|
635
|
+
"country", mlrun.data_types.ValueType.STRING, description="stock country"
|
|
636
|
+
)
|
|
634
637
|
ticks.add_entity("year", mlrun.data_types.ValueType.INT16)
|
|
635
638
|
ticks.save()
|
|
636
639
|
|
|
@@ -650,13 +653,23 @@ class FeatureSet(ModelObj):
|
|
|
650
653
|
import mlrun.feature_store as fstore
|
|
651
654
|
from mlrun.features import Feature
|
|
652
655
|
|
|
653
|
-
ticks = fstore.FeatureSet(
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
ticks.add_feature(
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
656
|
+
ticks = fstore.FeatureSet(
|
|
657
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
658
|
+
)
|
|
659
|
+
ticks.add_feature(
|
|
660
|
+
Feature(
|
|
661
|
+
value_type=mlrun.data_types.ValueType.STRING,
|
|
662
|
+
description="client consistency",
|
|
663
|
+
),
|
|
664
|
+
"ABC01",
|
|
665
|
+
)
|
|
666
|
+
ticks.add_feature(
|
|
667
|
+
Feature(
|
|
668
|
+
value_type=mlrun.data_types.ValueType.FLOAT,
|
|
669
|
+
description="client volatility",
|
|
670
|
+
),
|
|
671
|
+
"SAB",
|
|
672
|
+
)
|
|
660
673
|
ticks.save()
|
|
661
674
|
|
|
662
675
|
:param feature: setting of Feature
|
|
@@ -860,15 +873,18 @@ class FeatureSet(ModelObj):
|
|
|
860
873
|
example::
|
|
861
874
|
|
|
862
875
|
import mlrun.feature_store as fstore
|
|
876
|
+
|
|
863
877
|
...
|
|
864
|
-
ticks = fstore.FeatureSet(
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
ticks.add_aggregation(
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
878
|
+
ticks = fstore.FeatureSet(
|
|
879
|
+
"ticks", entities=["stock"], timestamp_key="timestamp"
|
|
880
|
+
)
|
|
881
|
+
ticks.add_aggregation(
|
|
882
|
+
name="priceN",
|
|
883
|
+
column="price",
|
|
884
|
+
operations=["avg"],
|
|
885
|
+
windows=["1d"],
|
|
886
|
+
period="1h",
|
|
887
|
+
)
|
|
872
888
|
ticks.plot(rankdir="LR", with_targets=True)
|
|
873
889
|
|
|
874
890
|
:param filename: target filepath for the graph image (None for the notebook)
|
|
@@ -901,6 +917,7 @@ class FeatureSet(ModelObj):
|
|
|
901
917
|
start_time=None,
|
|
902
918
|
end_time=None,
|
|
903
919
|
time_column=None,
|
|
920
|
+
additional_filters=None,
|
|
904
921
|
**kwargs,
|
|
905
922
|
):
|
|
906
923
|
"""return featureset (offline) data as dataframe
|
|
@@ -912,6 +929,12 @@ class FeatureSet(ModelObj):
|
|
|
912
929
|
:param end_time: filter by end time
|
|
913
930
|
:param time_column: specify the time column name in the file
|
|
914
931
|
:param kwargs: additional reader (csv, parquet, ..) args
|
|
932
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
933
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
934
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
935
|
+
Example: [("Product", "=", "Computer")]
|
|
936
|
+
For all supported filters, please see:
|
|
937
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
915
938
|
:return: DataFrame
|
|
916
939
|
"""
|
|
917
940
|
entities = list(self.spec.entities.keys())
|
|
@@ -930,6 +953,7 @@ class FeatureSet(ModelObj):
|
|
|
930
953
|
start_time=start_time,
|
|
931
954
|
end_time=end_time,
|
|
932
955
|
time_field=time_column,
|
|
956
|
+
additional_filters=additional_filters,
|
|
933
957
|
**kwargs,
|
|
934
958
|
)
|
|
935
959
|
# to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
|
|
@@ -949,6 +973,7 @@ class FeatureSet(ModelObj):
|
|
|
949
973
|
start_time=start_time,
|
|
950
974
|
end_time=end_time,
|
|
951
975
|
time_column=time_column,
|
|
976
|
+
additional_filters=additional_filters,
|
|
952
977
|
**kwargs,
|
|
953
978
|
)
|
|
954
979
|
return result
|
|
@@ -1005,7 +1030,7 @@ class FeatureSet(ModelObj):
|
|
|
1005
1030
|
df = stocks_set.ingest(stocks, infer_options=fstore.InferOptions.default())
|
|
1006
1031
|
|
|
1007
1032
|
# for running as remote job
|
|
1008
|
-
config = RunConfig(image=
|
|
1033
|
+
config = RunConfig(image="mlrun/mlrun")
|
|
1009
1034
|
df = ingest(stocks_set, stocks, run_config=config)
|
|
1010
1035
|
|
|
1011
1036
|
# specify source and targets
|
|
@@ -486,6 +486,7 @@ class FeatureVector(ModelObj):
|
|
|
486
486
|
example::
|
|
487
487
|
|
|
488
488
|
import mlrun.feature_store as fstore
|
|
489
|
+
|
|
489
490
|
features = ["quotes.bid", "quotes.asks_sum_5h as asks_5h", "stocks.*"]
|
|
490
491
|
vector = fstore.FeatureVector("my-vec", features)
|
|
491
492
|
|
|
@@ -740,6 +741,7 @@ class FeatureVector(ModelObj):
|
|
|
740
741
|
order_by: Union[str, list[str]] = None,
|
|
741
742
|
spark_service: str = None,
|
|
742
743
|
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
744
|
+
additional_filters: list = None,
|
|
743
745
|
):
|
|
744
746
|
"""retrieve offline feature vector results
|
|
745
747
|
|
|
@@ -796,6 +798,12 @@ class FeatureVector(ModelObj):
|
|
|
796
798
|
By default, the filter executes on the timestamp_key of each feature set.
|
|
797
799
|
Note: the time filtering is performed on each feature set before the
|
|
798
800
|
merge process using start_time and end_time params.
|
|
801
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
802
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
803
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
804
|
+
Example: [("Product", "=", "Computer")]
|
|
805
|
+
For all supported filters, please see:
|
|
806
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
799
807
|
|
|
800
808
|
"""
|
|
801
809
|
|
|
@@ -816,6 +824,7 @@ class FeatureVector(ModelObj):
|
|
|
816
824
|
order_by,
|
|
817
825
|
spark_service,
|
|
818
826
|
timestamp_for_filtering,
|
|
827
|
+
additional_filters,
|
|
819
828
|
)
|
|
820
829
|
|
|
821
830
|
def get_online_feature_service(
|
|
@@ -852,7 +861,7 @@ class FeatureVector(ModelObj):
|
|
|
852
861
|
|
|
853
862
|
Example::
|
|
854
863
|
|
|
855
|
-
svc = vector_uri.get_online_feature_service(entity_keys=[
|
|
864
|
+
svc = vector_uri.get_online_feature_service(entity_keys=["ticker"])
|
|
856
865
|
try:
|
|
857
866
|
resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
|
|
858
867
|
print(resp)
|
|
@@ -1077,7 +1086,9 @@ class OfflineVectorResponse:
|
|
|
1077
1086
|
def to_dataframe(self, to_pandas=True):
|
|
1078
1087
|
"""return result as dataframe"""
|
|
1079
1088
|
if self.status != "completed":
|
|
1080
|
-
raise mlrun.errors.
|
|
1089
|
+
raise mlrun.errors.MLRunTaskNotReadyError(
|
|
1090
|
+
"feature vector dataset is not ready"
|
|
1091
|
+
)
|
|
1081
1092
|
return self._merger.get_df(to_pandas=to_pandas)
|
|
1082
1093
|
|
|
1083
1094
|
def to_parquet(self, target_path, **kw):
|
mlrun/feature_store/ingestion.py
CHANGED
|
@@ -17,6 +17,7 @@ import uuid
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
|
|
19
19
|
import mlrun
|
|
20
|
+
import mlrun.common.constants as mlrun_constants
|
|
20
21
|
from mlrun.datastore.sources import get_source_from_dict, get_source_step
|
|
21
22
|
from mlrun.datastore.targets import (
|
|
22
23
|
add_target_steps,
|
|
@@ -263,13 +264,13 @@ def run_ingestion_job(name, featureset, run_config, schedule=None, spark_service
|
|
|
263
264
|
out_path=featureset.spec.output_path,
|
|
264
265
|
)
|
|
265
266
|
task.spec.secret_sources = run_config.secret_sources
|
|
266
|
-
task.set_label(
|
|
267
|
-
"feature-
|
|
268
|
-
)
|
|
267
|
+
task.set_label(
|
|
268
|
+
mlrun_constants.MLRunInternalLabels.job_type, "feature-ingest"
|
|
269
|
+
).set_label("feature-set", featureset.uri)
|
|
269
270
|
if run_config.owner:
|
|
270
|
-
task.set_label(
|
|
271
|
-
|
|
272
|
-
)
|
|
271
|
+
task.set_label(
|
|
272
|
+
mlrun_constants.MLRunInternalLabels.owner, run_config.owner
|
|
273
|
+
).set_label(mlrun_constants.MLRunInternalLabels.v3io_user, run_config.owner)
|
|
273
274
|
|
|
274
275
|
# set run UID and save in the feature set status (linking the features et to the job)
|
|
275
276
|
task.metadata.uid = uuid.uuid4().hex
|
|
@@ -88,6 +88,7 @@ class BaseMerger(abc.ABC):
|
|
|
88
88
|
update_stats=None,
|
|
89
89
|
query=None,
|
|
90
90
|
order_by=None,
|
|
91
|
+
additional_filters=None,
|
|
91
92
|
):
|
|
92
93
|
self._target = target
|
|
93
94
|
|
|
@@ -134,6 +135,7 @@ class BaseMerger(abc.ABC):
|
|
|
134
135
|
timestamp_for_filtering=timestamp_for_filtering,
|
|
135
136
|
query=query,
|
|
136
137
|
order_by=order_by,
|
|
138
|
+
additional_filters=additional_filters,
|
|
137
139
|
)
|
|
138
140
|
|
|
139
141
|
def _write_to_offline_target(self, timestamp_key=None):
|
|
@@ -186,6 +188,7 @@ class BaseMerger(abc.ABC):
|
|
|
186
188
|
timestamp_for_filtering=None,
|
|
187
189
|
query=None,
|
|
188
190
|
order_by=None,
|
|
191
|
+
additional_filters=None,
|
|
189
192
|
):
|
|
190
193
|
self._create_engine_env()
|
|
191
194
|
|
|
@@ -212,7 +215,7 @@ class BaseMerger(abc.ABC):
|
|
|
212
215
|
feature_sets.append(None)
|
|
213
216
|
join_types.append(None)
|
|
214
217
|
|
|
215
|
-
|
|
218
|
+
timestamp_filtered = False
|
|
216
219
|
for step in join_graph.steps:
|
|
217
220
|
name = step.right_feature_set_name
|
|
218
221
|
feature_set = feature_set_objects[name]
|
|
@@ -250,7 +253,7 @@ class BaseMerger(abc.ABC):
|
|
|
250
253
|
if self._drop_indexes:
|
|
251
254
|
self._append_drop_column(time_column)
|
|
252
255
|
if (start_time or end_time) and time_column:
|
|
253
|
-
|
|
256
|
+
timestamp_filtered = True
|
|
254
257
|
|
|
255
258
|
df = self._get_engine_df(
|
|
256
259
|
feature_set,
|
|
@@ -259,6 +262,7 @@ class BaseMerger(abc.ABC):
|
|
|
259
262
|
start_time if time_column else None,
|
|
260
263
|
end_time if time_column else None,
|
|
261
264
|
time_column,
|
|
265
|
+
additional_filters,
|
|
262
266
|
)
|
|
263
267
|
|
|
264
268
|
fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
|
|
@@ -302,8 +306,8 @@ class BaseMerger(abc.ABC):
|
|
|
302
306
|
new_columns.append((column, alias))
|
|
303
307
|
self._update_alias(dictionary={name: alias for name, alias in new_columns})
|
|
304
308
|
|
|
305
|
-
# None of the feature sets was filtered as required
|
|
306
|
-
if not
|
|
309
|
+
# None of the feature sets was timestamp filtered as required
|
|
310
|
+
if not timestamp_filtered and (start_time or end_time):
|
|
307
311
|
raise mlrun.errors.MLRunRuntimeError(
|
|
308
312
|
"start_time and end_time can only be provided in conjunction with "
|
|
309
313
|
"a timestamp column, or when the at least one feature_set has a timestamp key"
|
|
@@ -755,6 +759,7 @@ class BaseMerger(abc.ABC):
|
|
|
755
759
|
start_time: typing.Union[str, datetime] = None,
|
|
756
760
|
end_time: typing.Union[str, datetime] = None,
|
|
757
761
|
time_column: typing.Optional[str] = None,
|
|
762
|
+
additional_filters=None,
|
|
758
763
|
):
|
|
759
764
|
"""
|
|
760
765
|
Return the feature_set data frame according to the args
|
|
@@ -145,6 +145,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
145
145
|
start_time=None,
|
|
146
146
|
end_time=None,
|
|
147
147
|
time_column=None,
|
|
148
|
+
additional_filters=None,
|
|
148
149
|
):
|
|
149
150
|
import dask.dataframe as dd
|
|
150
151
|
|
|
@@ -155,6 +156,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
155
156
|
end_time=end_time,
|
|
156
157
|
time_column=time_column,
|
|
157
158
|
index=False,
|
|
159
|
+
additional_filters=additional_filters,
|
|
158
160
|
)
|
|
159
161
|
|
|
160
162
|
return self._reset_index(df).persist()
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import uuid
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
|
+
import mlrun.common.constants as mlrun_constants
|
|
18
19
|
from mlrun.config import config as mlconf
|
|
19
20
|
from mlrun.model import DataTargetBase, new_task
|
|
20
21
|
from mlrun.runtimes.function_reference import FunctionReference
|
|
@@ -42,6 +43,7 @@ def run_merge_job(
|
|
|
42
43
|
start_time=None,
|
|
43
44
|
end_time=None,
|
|
44
45
|
timestamp_for_filtering=None,
|
|
46
|
+
additional_filters=None,
|
|
45
47
|
):
|
|
46
48
|
name = vector.metadata.name
|
|
47
49
|
if not target or not hasattr(target, "to_dict"):
|
|
@@ -116,11 +118,14 @@ def run_merge_job(
|
|
|
116
118
|
"end_time": end_time,
|
|
117
119
|
"timestamp_for_filtering": timestamp_for_filtering,
|
|
118
120
|
"engine_args": engine_args,
|
|
121
|
+
"additional_filters": additional_filters,
|
|
119
122
|
},
|
|
120
123
|
inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
|
|
121
124
|
)
|
|
122
125
|
task.spec.secret_sources = run_config.secret_sources
|
|
123
|
-
task.set_label(
|
|
126
|
+
task.set_label(
|
|
127
|
+
mlrun_constants.MLRunInternalLabels.job_type, "feature-merge"
|
|
128
|
+
).set_label(mlrun_constants.MLRunInternalLabels.feature_vector, vector.uri)
|
|
124
129
|
task.metadata.uid = uuid.uuid4().hex
|
|
125
130
|
vector.status.run_uri = task.metadata.uid
|
|
126
131
|
vector.save()
|
|
@@ -151,7 +156,9 @@ class RemoteVectorResponse:
|
|
|
151
156
|
|
|
152
157
|
def _is_ready(self):
|
|
153
158
|
if self.status != "completed":
|
|
154
|
-
raise mlrun.errors.
|
|
159
|
+
raise mlrun.errors.MLRunTaskNotReadyError(
|
|
160
|
+
"feature vector dataset is not ready"
|
|
161
|
+
)
|
|
155
162
|
self.vector.reload()
|
|
156
163
|
|
|
157
164
|
def to_dataframe(self, columns=None, df_module=None, **kwargs):
|
|
@@ -176,6 +183,7 @@ class RemoteVectorResponse:
|
|
|
176
183
|
file_format = kwargs.get("format")
|
|
177
184
|
if not file_format:
|
|
178
185
|
file_format = self.run.status.results["target"]["kind"]
|
|
186
|
+
|
|
179
187
|
df = mlrun.get_dataitem(self.target_uri).as_df(
|
|
180
188
|
columns=columns, df_module=df_module, format=file_format, **kwargs
|
|
181
189
|
)
|
|
@@ -196,7 +204,8 @@ import mlrun.feature_store.retrieval
|
|
|
196
204
|
from mlrun.datastore.targets import get_target_driver
|
|
197
205
|
def merge_handler(context, vector_uri, target, entity_rows=None,
|
|
198
206
|
entity_timestamp_column=None, drop_columns=None, with_indexes=None, query=None,
|
|
199
|
-
engine_args=None, order_by=None, start_time=None, end_time=None, timestamp_for_filtering=None
|
|
207
|
+
engine_args=None, order_by=None, start_time=None, end_time=None, timestamp_for_filtering=None,
|
|
208
|
+
additional_filters=None):
|
|
200
209
|
vector = context.get_store_resource(vector_uri)
|
|
201
210
|
store_target = get_target_driver(target, vector)
|
|
202
211
|
if entity_rows:
|
|
@@ -206,7 +215,7 @@ def merge_handler(context, vector_uri, target, entity_rows=None,
|
|
|
206
215
|
merger = mlrun.feature_store.retrieval.{{{engine}}}(vector, **(engine_args or {}))
|
|
207
216
|
merger.start(entity_rows, entity_timestamp_column, store_target, drop_columns, with_indexes=with_indexes,
|
|
208
217
|
query=query, order_by=order_by, start_time=start_time, end_time=end_time,
|
|
209
|
-
timestamp_for_filtering=timestamp_for_filtering)
|
|
218
|
+
timestamp_for_filtering=timestamp_for_filtering, additional_filters=additional_filters)
|
|
210
219
|
|
|
211
220
|
target = vector.status.targets[store_target.name].to_dict()
|
|
212
221
|
context.log_result('feature_vector', vector.uri)
|
|
@@ -114,12 +114,14 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
114
114
|
start_time=None,
|
|
115
115
|
end_time=None,
|
|
116
116
|
time_column=None,
|
|
117
|
+
additional_filters=None,
|
|
117
118
|
):
|
|
118
119
|
df = feature_set.to_dataframe(
|
|
119
120
|
columns=column_names,
|
|
120
121
|
start_time=start_time,
|
|
121
122
|
end_time=end_time,
|
|
122
123
|
time_column=time_column,
|
|
124
|
+
additional_filters=additional_filters,
|
|
123
125
|
)
|
|
124
126
|
if df.index.names[0]:
|
|
125
127
|
df.reset_index(inplace=True)
|
|
@@ -12,16 +12,17 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
|
|
16
|
-
import semver
|
|
15
|
+
|
|
17
16
|
|
|
18
17
|
import mlrun
|
|
18
|
+
from mlrun.data_types.to_pandas import spark_df_to_pandas
|
|
19
|
+
from mlrun.datastore.sources import ParquetSource
|
|
19
20
|
from mlrun.datastore.targets import get_offline_target
|
|
21
|
+
from mlrun.runtimes import RemoteSparkRuntime
|
|
22
|
+
from mlrun.runtimes.sparkjob import Spark3Runtime
|
|
23
|
+
from mlrun.utils.helpers import additional_filters_warning
|
|
20
24
|
|
|
21
|
-
from ...runtimes import RemoteSparkRuntime
|
|
22
|
-
from ...runtimes.sparkjob import Spark3Runtime
|
|
23
25
|
from .base import BaseMerger
|
|
24
|
-
from .conversion import PandasConversionMixin
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class SparkFeatureMerger(BaseMerger):
|
|
@@ -166,29 +167,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
166
167
|
def get_df(self, to_pandas=True):
|
|
167
168
|
if to_pandas:
|
|
168
169
|
if self._pandas_df is None:
|
|
169
|
-
df = self._result_df
|
|
170
|
-
# as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
|
|
171
|
-
# when we upgrade pyspark, we should check whether this workaround is still necessary
|
|
172
|
-
# see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
|
|
173
|
-
if semver.parse(pd.__version__)["major"] >= 2:
|
|
174
|
-
import pyspark.sql.functions as pyspark_functions
|
|
175
|
-
|
|
176
|
-
type_conversion_dict = {}
|
|
177
|
-
for field in df.schema.fields:
|
|
178
|
-
if str(field.dataType) == "TimestampType":
|
|
179
|
-
df = df.withColumn(
|
|
180
|
-
field.name,
|
|
181
|
-
pyspark_functions.date_format(
|
|
182
|
-
pyspark_functions.to_timestamp(field.name),
|
|
183
|
-
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS",
|
|
184
|
-
),
|
|
185
|
-
)
|
|
186
|
-
type_conversion_dict[field.name] = "datetime64[ns]"
|
|
187
|
-
df = PandasConversionMixin.toPandas(df)
|
|
188
|
-
if type_conversion_dict:
|
|
189
|
-
df = df.astype(type_conversion_dict)
|
|
190
|
-
else:
|
|
191
|
-
df = PandasConversionMixin.toPandas(df)
|
|
170
|
+
df = spark_df_to_pandas(self._result_df)
|
|
192
171
|
self._pandas_df = df
|
|
193
172
|
self._set_indexes(self._pandas_df)
|
|
194
173
|
return self._pandas_df
|
|
@@ -209,9 +188,13 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
209
188
|
|
|
210
189
|
if self.spark is None:
|
|
211
190
|
# create spark context
|
|
212
|
-
self.spark =
|
|
213
|
-
|
|
214
|
-
|
|
191
|
+
self.spark = (
|
|
192
|
+
SparkSession.builder.appName(
|
|
193
|
+
f"vector-merger-{self.vector.metadata.name}"
|
|
194
|
+
)
|
|
195
|
+
.config("spark.driver.memory", "2g")
|
|
196
|
+
.getOrCreate()
|
|
197
|
+
)
|
|
215
198
|
|
|
216
199
|
def _get_engine_df(
|
|
217
200
|
self,
|
|
@@ -221,6 +204,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
221
204
|
start_time=None,
|
|
222
205
|
end_time=None,
|
|
223
206
|
time_column=None,
|
|
207
|
+
additional_filters=None,
|
|
224
208
|
):
|
|
225
209
|
source_kwargs = {}
|
|
226
210
|
if feature_set.spec.passthrough:
|
|
@@ -231,6 +215,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
231
215
|
source_kind = feature_set.spec.source.kind
|
|
232
216
|
source_path = feature_set.spec.source.path
|
|
233
217
|
source_kwargs.update(feature_set.spec.source.attributes)
|
|
218
|
+
source_kwargs.pop("additional_filters", None)
|
|
234
219
|
else:
|
|
235
220
|
target = get_offline_target(feature_set)
|
|
236
221
|
if not target:
|
|
@@ -239,17 +224,24 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
239
224
|
)
|
|
240
225
|
source_kind = target.kind
|
|
241
226
|
source_path = target.get_target_path()
|
|
242
|
-
|
|
227
|
+
source_kwargs = target.source_spark_attributes
|
|
243
228
|
# handling case where there are multiple feature sets and user creates vector where
|
|
244
229
|
# entity_timestamp_column is from a specific feature set (can't be entity timestamp)
|
|
245
230
|
source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
|
|
246
231
|
|
|
232
|
+
if source_driver != ParquetSource:
|
|
233
|
+
additional_filters_warning(additional_filters, source_driver)
|
|
234
|
+
additional_filters = None
|
|
235
|
+
additional_filters_dict = (
|
|
236
|
+
{"additional_filters": additional_filters} if additional_filters else {}
|
|
237
|
+
)
|
|
247
238
|
source = source_driver(
|
|
248
239
|
name=self.vector.metadata.name,
|
|
249
240
|
path=source_path,
|
|
250
241
|
time_field=time_column,
|
|
251
242
|
start_time=start_time,
|
|
252
243
|
end_time=end_time,
|
|
244
|
+
**additional_filters_dict,
|
|
253
245
|
**source_kwargs,
|
|
254
246
|
)
|
|
255
247
|
|
mlrun/feature_store/steps.py
CHANGED
|
@@ -162,13 +162,19 @@ class MapValues(StepToDict, MLRunStep):
|
|
|
162
162
|
example::
|
|
163
163
|
|
|
164
164
|
# replace the value "U" with '0' in the age column
|
|
165
|
-
graph.to(MapValues(mapping={
|
|
165
|
+
graph.to(MapValues(mapping={"age": {"U": "0"}}, with_original_features=True))
|
|
166
166
|
|
|
167
167
|
# replace integers, example
|
|
168
|
-
graph.to(MapValues(mapping={
|
|
168
|
+
graph.to(MapValues(mapping={"not": {0: 1, 1: 0}}))
|
|
169
169
|
|
|
170
170
|
# replace by range, use -inf and inf for extended range
|
|
171
|
-
graph.to(
|
|
171
|
+
graph.to(
|
|
172
|
+
MapValues(
|
|
173
|
+
mapping={
|
|
174
|
+
"numbers": {"ranges": {"negative": [-inf, 0], "positive": [0, inf]}}
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
)
|
|
172
178
|
|
|
173
179
|
:param mapping: a dict with entry per column and the associated old/new values map
|
|
174
180
|
:param with_original_features: set to True to keep the original features
|
|
@@ -424,8 +430,10 @@ class OneHotEncoder(StepToDict, MLRunStep):
|
|
|
424
430
|
|
|
425
431
|
example::
|
|
426
432
|
|
|
427
|
-
mapping = {
|
|
428
|
-
|
|
433
|
+
mapping = {
|
|
434
|
+
"category": ["food", "health", "transportation"],
|
|
435
|
+
"gender": ["male", "female"],
|
|
436
|
+
}
|
|
429
437
|
graph.to(OneHotEncoder(mapping=one_hot_encoder_mapping))
|
|
430
438
|
|
|
431
439
|
:param mapping: a dict of per column categories (to map to binary fields)
|
|
@@ -542,10 +550,12 @@ class DateExtractor(StepToDict, MLRunStep):
|
|
|
542
550
|
|
|
543
551
|
# (taken from the fraud-detection end-to-end feature store demo)
|
|
544
552
|
# Define the Transactions FeatureSet
|
|
545
|
-
transaction_set = fstore.FeatureSet(
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
553
|
+
transaction_set = fstore.FeatureSet(
|
|
554
|
+
"transactions",
|
|
555
|
+
entities=[fstore.Entity("source")],
|
|
556
|
+
timestamp_key="timestamp",
|
|
557
|
+
description="transactions feature set",
|
|
558
|
+
)
|
|
549
559
|
|
|
550
560
|
# Get FeatureSet computation graph
|
|
551
561
|
transaction_graph = transaction_set.graph
|
|
@@ -553,11 +563,11 @@ class DateExtractor(StepToDict, MLRunStep):
|
|
|
553
563
|
# Add the custom `DateExtractor` step
|
|
554
564
|
# to the computation graph
|
|
555
565
|
transaction_graph.to(
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
566
|
+
class_name="DateExtractor",
|
|
567
|
+
name="Extract Dates",
|
|
568
|
+
parts=["hour", "day_of_week"],
|
|
569
|
+
timestamp_col="timestamp",
|
|
570
|
+
)
|
|
561
571
|
|
|
562
572
|
:param parts: list of pandas style date-time parts you want to extract.
|
|
563
573
|
:param timestamp_col: The name of the column containing the timestamps to extract from,
|
|
@@ -694,11 +704,12 @@ class DropFeatures(StepToDict, MLRunStep):
|
|
|
694
704
|
|
|
695
705
|
example::
|
|
696
706
|
|
|
697
|
-
feature_set = fstore.FeatureSet(
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
707
|
+
feature_set = fstore.FeatureSet(
|
|
708
|
+
"fs-new",
|
|
709
|
+
entities=[fstore.Entity("id")],
|
|
710
|
+
description="feature set",
|
|
711
|
+
engine="pandas",
|
|
712
|
+
)
|
|
702
713
|
# Pre-processing graph steps
|
|
703
714
|
feature_set.graph.to(DropFeatures(features=["age"]))
|
|
704
715
|
df_pandas = feature_set.ingest(data)
|
|
@@ -732,3 +743,11 @@ class DropFeatures(StepToDict, MLRunStep):
|
|
|
732
743
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
733
744
|
f"DropFeatures can only drop features, not entities: {dropped_entities}"
|
|
734
745
|
)
|
|
746
|
+
if feature_set.spec.label_column in features:
|
|
747
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
748
|
+
f"DropFeatures can not drop label_column: {feature_set.spec.label_column}"
|
|
749
|
+
)
|
|
750
|
+
if feature_set.spec.timestamp_key in features:
|
|
751
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
752
|
+
f"DropFeatures can not drop timestamp_key: {feature_set.spec.timestamp_key}"
|
|
753
|
+
)
|
mlrun/features.py
CHANGED
|
@@ -100,7 +100,8 @@ class Feature(ModelObj):
|
|
|
100
100
|
:param name: name of the feature
|
|
101
101
|
:param validator: feature validation policy
|
|
102
102
|
:param default: default value
|
|
103
|
-
:param labels: a set of key/value labels (tags)
|
|
103
|
+
:param labels: a set of key/value labels (tags). Labels can be used to filter featues, for example,
|
|
104
|
+
in the UI Feature store page.
|
|
104
105
|
"""
|
|
105
106
|
self.name = name or ""
|
|
106
107
|
if isinstance(value_type, ValueType):
|
|
@@ -238,10 +239,7 @@ class Validator(ModelObj):
|
|
|
238
239
|
from mlrun.features import Validator
|
|
239
240
|
|
|
240
241
|
# Add validator to the feature 'bid' with check type
|
|
241
|
-
quotes_set["bid"].validator = Validator(
|
|
242
|
-
check_type=True,
|
|
243
|
-
severity="info"
|
|
244
|
-
)
|
|
242
|
+
quotes_set["bid"].validator = Validator(check_type=True, severity="info")
|
|
245
243
|
|
|
246
244
|
:param check_type: check feature type e.g. True, False
|
|
247
245
|
:param severity: severity name e.g. info, warning, etc.
|
|
@@ -280,10 +278,7 @@ class MinMaxValidator(Validator):
|
|
|
280
278
|
|
|
281
279
|
# Add validator to the feature 'bid', where valid
|
|
282
280
|
# minimal value is 52
|
|
283
|
-
quotes_set["bid"].validator = MinMaxValidator(
|
|
284
|
-
min=52,
|
|
285
|
-
severity="info"
|
|
286
|
-
)
|
|
281
|
+
quotes_set["bid"].validator = MinMaxValidator(min=52, severity="info")
|
|
287
282
|
|
|
288
283
|
:param check_type: check feature type e.g. True, False
|
|
289
284
|
:param severity: severity name e.g. info, warning, etc.
|
|
@@ -344,9 +339,7 @@ class MinMaxLenValidator(Validator):
|
|
|
344
339
|
# Add length validator to the feature 'ticker', where valid
|
|
345
340
|
# minimal length is 1 and maximal length is 10
|
|
346
341
|
quotes_set["ticker"].validator = MinMaxLenValidator(
|
|
347
|
-
min=1,
|
|
348
|
-
max=10,
|
|
349
|
-
severity="info"
|
|
342
|
+
min=1, max=10, severity="info"
|
|
350
343
|
)
|
|
351
344
|
|
|
352
345
|
:param check_type: check feature type e.g. True, False
|
|
@@ -408,8 +401,7 @@ class RegexValidator(Validator):
|
|
|
408
401
|
# expression '(\b[A-Za-z]{1}[0-9]{7}\b)' where valid values are
|
|
409
402
|
# e.g. A1234567, z9874563, etc.
|
|
410
403
|
quotes_set["name"].validator = RegexValidator(
|
|
411
|
-
regex=r"(\b[A-Za-z]{1}[0-9]{7}\b)",
|
|
412
|
-
severity="info"
|
|
404
|
+
regex=r"(\b[A-Za-z]{1}[0-9]{7}\b)", severity="info"
|
|
413
405
|
)
|
|
414
406
|
|
|
415
407
|
:param check_type: check feature type e.g. True, False
|