mlrun 1.7.0rc1__py3-none-any.whl → 1.7.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +8 -1
- mlrun/common/db/sql_session.py +3 -0
- mlrun/config.py +10 -3
- mlrun/datastore/base.py +0 -28
- mlrun/datastore/datastore_profile.py +12 -0
- mlrun/datastore/sources.py +1 -5
- mlrun/datastore/targets.py +9 -5
- mlrun/datastore/v3io.py +70 -46
- mlrun/feature_store/api.py +56 -56
- mlrun/feature_store/feature_set.py +0 -2
- mlrun/feature_store/feature_vector.py +120 -0
- mlrun/feature_store/steps.py +1 -9
- mlrun/features.py +0 -2
- mlrun/k8s_utils.py +51 -0
- mlrun/model_monitoring/stream_processing.py +3 -21
- mlrun/projects/project.py +45 -7
- mlrun/serving/remote.py +0 -4
- mlrun/serving/routers.py +14 -6
- mlrun/serving/states.py +1 -0
- mlrun/serving/v2_serving.py +45 -3
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/regex.py +5 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/METADATA +10 -10
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/RECORD +29 -29
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -741,6 +741,64 @@ class FeatureVector(ModelObj):
|
|
|
741
741
|
spark_service: str = None,
|
|
742
742
|
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
743
743
|
):
|
|
744
|
+
"""retrieve offline feature vector results
|
|
745
|
+
|
|
746
|
+
specify a feature vector object/uri and retrieve the desired features, their metadata
|
|
747
|
+
and statistics. returns :py:class:`~mlrun.feature_store.OfflineVectorResponse`,
|
|
748
|
+
results can be returned as a dataframe or written to a target
|
|
749
|
+
|
|
750
|
+
The start_time and end_time attributes allow filtering the data to a given time range, they accept
|
|
751
|
+
string values or pandas `Timestamp` objects, string values can also be relative, for example:
|
|
752
|
+
"now", "now - 1d2h", "now+5m", where a valid pandas Timedelta string follows the verb "now",
|
|
753
|
+
for time alignment you can use the verb "floor" e.g. "now -1d floor 1H" will align the time to the last hour
|
|
754
|
+
(the floor string is passed to pandas.Timestamp.floor(), can use D, H, T, S for day, hour, min, sec alignment).
|
|
755
|
+
Another option to filter the data is by the `query` argument - can be seen in the example.
|
|
756
|
+
example::
|
|
757
|
+
|
|
758
|
+
features = [
|
|
759
|
+
"stock-quotes.bid",
|
|
760
|
+
"stock-quotes.asks_sum_5h",
|
|
761
|
+
"stock-quotes.ask as mycol",
|
|
762
|
+
"stocks.*",
|
|
763
|
+
]
|
|
764
|
+
vector = FeatureVector(features=features)
|
|
765
|
+
vector.get_offline_features(entity_rows=trades, entity_timestamp_column="time", query="ticker in ['GOOG']
|
|
766
|
+
and bid>100")
|
|
767
|
+
print(resp.to_dataframe())
|
|
768
|
+
print(vector.get_stats_table())
|
|
769
|
+
resp.to_parquet("./out.parquet")
|
|
770
|
+
|
|
771
|
+
:param entity_rows: dataframe with entity rows to join with
|
|
772
|
+
:param target: where to write the results to
|
|
773
|
+
:param drop_columns: list of columns to drop from the final result
|
|
774
|
+
:param entity_timestamp_column: timestamp column name in the entity rows dataframe. can be specified
|
|
775
|
+
only if param entity_rows was specified.
|
|
776
|
+
:param run_config: function and/or run configuration
|
|
777
|
+
see :py:class:`~mlrun.feature_store.RunConfig`
|
|
778
|
+
:param start_time: datetime, low limit of time needed to be filtered. Optional.
|
|
779
|
+
:param end_time: datetime, high limit of time needed to be filtered. Optional.
|
|
780
|
+
:param with_indexes: Return vector with/without the entities and the timestamp_key of the feature
|
|
781
|
+
sets and with/without entity_timestamp_column and timestamp_for_filtering
|
|
782
|
+
columns. This property can be specified also in the feature vector spec
|
|
783
|
+
(feature_vector.spec.with_indexes)
|
|
784
|
+
(default False)
|
|
785
|
+
:param update_stats: update features statistics from the requested feature sets on the vector.
|
|
786
|
+
(default False).
|
|
787
|
+
:param engine: processing engine kind ("local", "dask", or "spark")
|
|
788
|
+
:param engine_args: kwargs for the processing engine
|
|
789
|
+
:param query: The query string used to filter rows on the output
|
|
790
|
+
:param spark_service: Name of the spark service to be used (when using a remote-spark runtime)
|
|
791
|
+
:param order_by: Name or list of names to order by. The name or the names in the list can be the
|
|
792
|
+
feature name or the alias of the feature you pass in the feature list.
|
|
793
|
+
:param timestamp_for_filtering: name of the column to filter by, can be str for all the feature sets or a
|
|
794
|
+
dictionary ({<feature set name>: <timestamp column name>, ...})
|
|
795
|
+
that indicates the timestamp column name for each feature set. Optional.
|
|
796
|
+
By default, the filter executes on the timestamp_key of each feature set.
|
|
797
|
+
Note: the time filtering is performed on each feature set before the
|
|
798
|
+
merge process using start_time and end_time params.
|
|
799
|
+
|
|
800
|
+
"""
|
|
801
|
+
|
|
744
802
|
return mlrun.feature_store.api._get_offline_features(
|
|
745
803
|
self,
|
|
746
804
|
entity_rows,
|
|
@@ -768,6 +826,68 @@ class FeatureVector(ModelObj):
|
|
|
768
826
|
update_stats: bool = False,
|
|
769
827
|
entity_keys: list[str] = None,
|
|
770
828
|
):
|
|
829
|
+
"""initialize and return online feature vector service api,
|
|
830
|
+
returns :py:class:`~mlrun.feature_store.OnlineVectorService`
|
|
831
|
+
|
|
832
|
+
:**usage**:
|
|
833
|
+
There are two ways to use the function:
|
|
834
|
+
|
|
835
|
+
1. As context manager
|
|
836
|
+
|
|
837
|
+
Example::
|
|
838
|
+
|
|
839
|
+
with vector_uri.get_online_feature_service() as svc:
|
|
840
|
+
resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
|
|
841
|
+
print(resp)
|
|
842
|
+
resp = svc.get([{"ticker": "AAPL"}], as_list=True)
|
|
843
|
+
print(resp)
|
|
844
|
+
|
|
845
|
+
Example with imputing::
|
|
846
|
+
|
|
847
|
+
with vector_uri.get_online_feature_service(entity_keys=['id'],
|
|
848
|
+
impute_policy={"*": "$mean", "amount": 0)) as svc:
|
|
849
|
+
resp = svc.get([{"id": "C123487"}])
|
|
850
|
+
|
|
851
|
+
2. as simple function, note that in that option you need to close the session.
|
|
852
|
+
|
|
853
|
+
Example::
|
|
854
|
+
|
|
855
|
+
svc = vector_uri.get_online_feature_service(entity_keys=['ticker'])
|
|
856
|
+
try:
|
|
857
|
+
resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
|
|
858
|
+
print(resp)
|
|
859
|
+
resp = svc.get([{"ticker": "AAPL"}], as_list=True)
|
|
860
|
+
print(resp)
|
|
861
|
+
|
|
862
|
+
finally:
|
|
863
|
+
svc.close()
|
|
864
|
+
|
|
865
|
+
Example with imputing::
|
|
866
|
+
|
|
867
|
+
svc = vector_uri.get_online_feature_service(entity_keys=['id'],
|
|
868
|
+
impute_policy={"*": "$mean", "amount": 0))
|
|
869
|
+
try:
|
|
870
|
+
resp = svc.get([{"id": "C123487"}])
|
|
871
|
+
except Exception as e:
|
|
872
|
+
handling exception...
|
|
873
|
+
finally:
|
|
874
|
+
svc.close()
|
|
875
|
+
|
|
876
|
+
:param run_config: function and/or run configuration for remote jobs/services
|
|
877
|
+
:param impute_policy: a dict with `impute_policy` per feature, the dict key is the feature name and the
|
|
878
|
+
dict value indicate which value will be used in case the feature is NaN/empty, the
|
|
879
|
+
replaced value can be fixed number for constants or $mean, $max, $min, $std, $count
|
|
880
|
+
for statistical values.
|
|
881
|
+
"*" is used to specify the default for all features, example: `{"*": "$mean"}`
|
|
882
|
+
:param fixed_window_type: determines how to query the fixed window values which were previously inserted by
|
|
883
|
+
ingest
|
|
884
|
+
:param update_stats: update features statistics from the requested feature sets on the vector.
|
|
885
|
+
Default: False.
|
|
886
|
+
:param entity_keys: Entity list of the first feature_set in the vector.
|
|
887
|
+
The indexes that are used to query the online service.
|
|
888
|
+
:return: Initialize the `OnlineVectorService`.
|
|
889
|
+
Will be used in subclasses where `support_online=True`.
|
|
890
|
+
"""
|
|
771
891
|
return mlrun.feature_store.api._get_online_feature_service(
|
|
772
892
|
self,
|
|
773
893
|
run_config,
|
mlrun/feature_store/steps.py
CHANGED
|
@@ -92,8 +92,6 @@ class MLRunStep(MapClass):
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
class FeaturesetValidator(StepToDict, MLRunStep):
|
|
95
|
-
"""Validate feature values according to the feature set validation policy"""
|
|
96
|
-
|
|
97
95
|
def __init__(self, featureset=None, columns=None, name=None, **kwargs):
|
|
98
96
|
"""Validate feature values according to the feature set validation policy
|
|
99
97
|
|
|
@@ -152,8 +150,6 @@ class FeaturesetValidator(StepToDict, MLRunStep):
|
|
|
152
150
|
|
|
153
151
|
|
|
154
152
|
class MapValues(StepToDict, MLRunStep):
|
|
155
|
-
"""Map column values to new values"""
|
|
156
|
-
|
|
157
153
|
def __init__(
|
|
158
154
|
self,
|
|
159
155
|
mapping: dict[str, dict[Union[str, int, bool], Any]],
|
|
@@ -510,15 +506,13 @@ class OneHotEncoder(StepToDict, MLRunStep):
|
|
|
510
506
|
|
|
511
507
|
|
|
512
508
|
class DateExtractor(StepToDict, MLRunStep):
|
|
513
|
-
"""Date Extractor allows you to extract a date-time component"""
|
|
514
|
-
|
|
515
509
|
def __init__(
|
|
516
510
|
self,
|
|
517
511
|
parts: Union[dict[str, str], list[str]],
|
|
518
512
|
timestamp_col: str = None,
|
|
519
513
|
**kwargs,
|
|
520
514
|
):
|
|
521
|
-
"""Date Extractor
|
|
515
|
+
"""Date Extractor extracts a date-time component into new columns
|
|
522
516
|
|
|
523
517
|
The extracted date part will appear as `<timestamp_col>_<date_part>` feature.
|
|
524
518
|
|
|
@@ -629,8 +623,6 @@ class DateExtractor(StepToDict, MLRunStep):
|
|
|
629
623
|
|
|
630
624
|
|
|
631
625
|
class SetEventMetadata(MapClass):
|
|
632
|
-
"""Set the event metadata (id and key) from the event body"""
|
|
633
|
-
|
|
634
626
|
def __init__(
|
|
635
627
|
self,
|
|
636
628
|
id_path: Optional[str] = None,
|
mlrun/features.py
CHANGED
mlrun/k8s_utils.py
CHANGED
|
@@ -17,6 +17,7 @@ import kubernetes.client
|
|
|
17
17
|
|
|
18
18
|
import mlrun.common.schemas
|
|
19
19
|
import mlrun.errors
|
|
20
|
+
import mlrun.utils.regex
|
|
20
21
|
|
|
21
22
|
from .config import config as mlconfig
|
|
22
23
|
|
|
@@ -130,3 +131,53 @@ def sanitize_label_value(value: str) -> str:
|
|
|
130
131
|
:return: string fully compliant with k8s label value expectations
|
|
131
132
|
"""
|
|
132
133
|
return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def verify_label_key(key):
|
|
137
|
+
if not key:
|
|
138
|
+
raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
|
|
139
|
+
if key.startswith("k8s.io") or key.startswith("kubernetes.io"):
|
|
140
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
141
|
+
"Labels cannot start with 'k8s.io' or 'kubernetes.io'"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
mlrun.utils.helpers.verify_field_regex(
|
|
145
|
+
f"project.metadata.labels.'{key}'",
|
|
146
|
+
key,
|
|
147
|
+
mlrun.utils.regex.k8s_character_limit,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
parts = key.split("/")
|
|
151
|
+
if len(parts) == 1:
|
|
152
|
+
name = parts[0]
|
|
153
|
+
elif len(parts) == 2:
|
|
154
|
+
prefix, name = parts
|
|
155
|
+
if len(prefix) == 0:
|
|
156
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
157
|
+
"Label key prefix cannot be empty"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# prefix must adhere dns_1123_subdomain
|
|
161
|
+
mlrun.utils.helpers.verify_field_regex(
|
|
162
|
+
f"Project.metadata.labels.'{key}'",
|
|
163
|
+
prefix,
|
|
164
|
+
mlrun.utils.regex.dns_1123_subdomain,
|
|
165
|
+
)
|
|
166
|
+
else:
|
|
167
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
168
|
+
"Label key can only contain one '/'"
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
mlrun.utils.helpers.verify_field_regex(
|
|
172
|
+
f"project.metadata.labels.'{key}'",
|
|
173
|
+
name,
|
|
174
|
+
mlrun.utils.regex.qualified_name,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def verify_label_value(value, label_key):
|
|
179
|
+
mlrun.utils.helpers.verify_field_regex(
|
|
180
|
+
f"project.metadata.labels.'{label_key}'",
|
|
181
|
+
value,
|
|
182
|
+
mlrun.utils.regex.label_value,
|
|
183
|
+
)
|
|
@@ -745,18 +745,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
745
745
|
# in list of events. This list will be used as the body for the storey event.
|
|
746
746
|
events = []
|
|
747
747
|
for i, (feature, prediction) in enumerate(zip(features, predictions)):
|
|
748
|
-
# Validate that inputs are based on numeric values
|
|
749
|
-
if not self.is_valid(
|
|
750
|
-
endpoint_id,
|
|
751
|
-
self.is_list_of_numerics,
|
|
752
|
-
feature,
|
|
753
|
-
["request", "inputs", f"[{i}]"],
|
|
754
|
-
):
|
|
755
|
-
return None
|
|
756
|
-
|
|
757
748
|
if not isinstance(prediction, list):
|
|
758
749
|
prediction = [prediction]
|
|
759
750
|
|
|
751
|
+
if not isinstance(feature, list):
|
|
752
|
+
feature = [feature]
|
|
753
|
+
|
|
760
754
|
events.append(
|
|
761
755
|
{
|
|
762
756
|
EventFieldType.FUNCTION_URI: function_uri,
|
|
@@ -803,18 +797,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
|
|
|
803
797
|
f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
|
|
804
798
|
)
|
|
805
799
|
|
|
806
|
-
@staticmethod
|
|
807
|
-
def is_list_of_numerics(
|
|
808
|
-
field: list[typing.Union[int, float, dict, list]],
|
|
809
|
-
dict_path: list[str],
|
|
810
|
-
):
|
|
811
|
-
if all(isinstance(x, int) or isinstance(x, float) for x in field):
|
|
812
|
-
return True
|
|
813
|
-
logger.error(
|
|
814
|
-
f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
|
|
815
|
-
)
|
|
816
|
-
return False
|
|
817
|
-
|
|
818
800
|
def resume_state(self, endpoint_id):
|
|
819
801
|
# Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
|
|
820
802
|
# left them
|
mlrun/projects/project.py
CHANGED
|
@@ -40,6 +40,7 @@ import mlrun.common.schemas.model_monitoring
|
|
|
40
40
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
41
41
|
import mlrun.db
|
|
42
42
|
import mlrun.errors
|
|
43
|
+
import mlrun.k8s_utils
|
|
43
44
|
import mlrun.runtimes
|
|
44
45
|
import mlrun.runtimes.pod
|
|
45
46
|
import mlrun.runtimes.utils
|
|
@@ -579,24 +580,36 @@ def _run_project_setup(
|
|
|
579
580
|
|
|
580
581
|
def _load_project_dir(context, name="", subpath=""):
|
|
581
582
|
subpath_str = subpath or ""
|
|
582
|
-
|
|
583
|
+
|
|
584
|
+
# support both .yaml and .yml file extensions
|
|
585
|
+
project_file_path = path.join(context, subpath_str, "project.y*ml")
|
|
586
|
+
function_file_path = path.join(context, subpath_str, "function.y*ml")
|
|
583
587
|
setup_file_path = path.join(context, subpath_str, "project_setup.py")
|
|
584
|
-
|
|
585
|
-
|
|
588
|
+
|
|
589
|
+
if project_files := glob.glob(project_file_path):
|
|
590
|
+
# if there are multiple project files, use the first one
|
|
591
|
+
project_file_path = project_files[0]
|
|
592
|
+
with open(project_file_path) as fp:
|
|
586
593
|
data = fp.read()
|
|
587
594
|
struct = yaml.load(data, Loader=yaml.FullLoader)
|
|
588
595
|
project = _project_instance_from_struct(struct, name)
|
|
589
596
|
project.spec.context = context
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
func = import_function(
|
|
597
|
+
elif function_files := glob.glob(function_file_path):
|
|
598
|
+
function_path = function_files[0]
|
|
599
|
+
func = import_function(function_path)
|
|
600
|
+
function_file_name = path.basename(path.normpath(function_path))
|
|
593
601
|
project = MlrunProject.from_dict(
|
|
594
602
|
{
|
|
595
603
|
"metadata": {
|
|
596
604
|
"name": func.metadata.project,
|
|
597
605
|
},
|
|
598
606
|
"spec": {
|
|
599
|
-
"functions": [
|
|
607
|
+
"functions": [
|
|
608
|
+
{
|
|
609
|
+
"url": function_file_name,
|
|
610
|
+
"name": func.metadata.name,
|
|
611
|
+
},
|
|
612
|
+
],
|
|
600
613
|
},
|
|
601
614
|
}
|
|
602
615
|
)
|
|
@@ -693,6 +706,31 @@ class ProjectMetadata(ModelObj):
|
|
|
693
706
|
return False
|
|
694
707
|
return True
|
|
695
708
|
|
|
709
|
+
@staticmethod
|
|
710
|
+
def validate_project_labels(labels: dict, raise_on_failure: bool = True) -> bool:
|
|
711
|
+
"""
|
|
712
|
+
This
|
|
713
|
+
https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
|
714
|
+
"""
|
|
715
|
+
|
|
716
|
+
# no labels is a valid case
|
|
717
|
+
if not labels:
|
|
718
|
+
return True
|
|
719
|
+
if not isinstance(labels, dict):
|
|
720
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
721
|
+
"Labels must be a dictionary of key-value pairs"
|
|
722
|
+
)
|
|
723
|
+
try:
|
|
724
|
+
for key, value in labels.items():
|
|
725
|
+
mlrun.k8s_utils.verify_label_key(key)
|
|
726
|
+
mlrun.k8s_utils.verify_label_value(value, label_key=key)
|
|
727
|
+
|
|
728
|
+
except mlrun.errors.MLRunInvalidArgumentError:
|
|
729
|
+
if raise_on_failure:
|
|
730
|
+
raise
|
|
731
|
+
return False
|
|
732
|
+
return True
|
|
733
|
+
|
|
696
734
|
|
|
697
735
|
class ProjectSpec(ModelObj):
|
|
698
736
|
def __init__(
|
mlrun/serving/remote.py
CHANGED
|
@@ -36,8 +36,6 @@ default_backoff_factor = 1
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
class RemoteStep(storey.SendToHttp):
|
|
39
|
-
"""class for calling remote endpoints"""
|
|
40
|
-
|
|
41
39
|
def __init__(
|
|
42
40
|
self,
|
|
43
41
|
url: str,
|
|
@@ -240,8 +238,6 @@ class RemoteStep(storey.SendToHttp):
|
|
|
240
238
|
|
|
241
239
|
|
|
242
240
|
class BatchHttpRequests(_ConcurrentJobExecution):
|
|
243
|
-
"""class for calling remote endpoints in parallel"""
|
|
244
|
-
|
|
245
241
|
def __init__(
|
|
246
242
|
self,
|
|
247
243
|
url: str = None,
|
mlrun/serving/routers.py
CHANGED
|
@@ -1111,7 +1111,7 @@ class EnrichmentModelRouter(ModelRouter):
|
|
|
1111
1111
|
url_prefix: str = None,
|
|
1112
1112
|
health_prefix: str = None,
|
|
1113
1113
|
feature_vector_uri: str = "",
|
|
1114
|
-
impute_policy: dict =
|
|
1114
|
+
impute_policy: dict = None,
|
|
1115
1115
|
**kwargs,
|
|
1116
1116
|
):
|
|
1117
1117
|
"""Model router with feature enrichment (from the feature store)
|
|
@@ -1156,13 +1156,17 @@ class EnrichmentModelRouter(ModelRouter):
|
|
|
1156
1156
|
)
|
|
1157
1157
|
|
|
1158
1158
|
self.feature_vector_uri = feature_vector_uri
|
|
1159
|
-
self.impute_policy = impute_policy
|
|
1159
|
+
self.impute_policy = impute_policy or {}
|
|
1160
1160
|
|
|
1161
1161
|
self._feature_service = None
|
|
1162
1162
|
|
|
1163
1163
|
def post_init(self, mode="sync"):
|
|
1164
|
+
from ..feature_store import get_feature_vector
|
|
1165
|
+
|
|
1164
1166
|
super().post_init(mode)
|
|
1165
|
-
self._feature_service =
|
|
1167
|
+
self._feature_service = get_feature_vector(
|
|
1168
|
+
self.feature_vector_uri
|
|
1169
|
+
).get_online_feature_service(
|
|
1166
1170
|
impute_policy=self.impute_policy,
|
|
1167
1171
|
)
|
|
1168
1172
|
|
|
@@ -1191,7 +1195,7 @@ class EnrichmentVotingEnsemble(VotingEnsemble):
|
|
|
1191
1195
|
executor_type: Union[ParallelRunnerModes, str] = ParallelRunnerModes.thread,
|
|
1192
1196
|
prediction_col_name: str = None,
|
|
1193
1197
|
feature_vector_uri: str = "",
|
|
1194
|
-
impute_policy: dict =
|
|
1198
|
+
impute_policy: dict = None,
|
|
1195
1199
|
**kwargs,
|
|
1196
1200
|
):
|
|
1197
1201
|
"""Voting Ensemble with feature enrichment (from the feature store)
|
|
@@ -1298,13 +1302,17 @@ class EnrichmentVotingEnsemble(VotingEnsemble):
|
|
|
1298
1302
|
)
|
|
1299
1303
|
|
|
1300
1304
|
self.feature_vector_uri = feature_vector_uri
|
|
1301
|
-
self.impute_policy = impute_policy
|
|
1305
|
+
self.impute_policy = impute_policy or {}
|
|
1302
1306
|
|
|
1303
1307
|
self._feature_service = None
|
|
1304
1308
|
|
|
1305
1309
|
def post_init(self, mode="sync"):
|
|
1310
|
+
from ..feature_store import get_feature_vector
|
|
1311
|
+
|
|
1306
1312
|
super().post_init(mode)
|
|
1307
|
-
self._feature_service =
|
|
1313
|
+
self._feature_service = get_feature_vector(
|
|
1314
|
+
self.feature_vector_uri
|
|
1315
|
+
).get_online_feature_service(
|
|
1308
1316
|
impute_policy=self.impute_policy,
|
|
1309
1317
|
)
|
|
1310
1318
|
|
mlrun/serving/states.py
CHANGED
mlrun/serving/v2_serving.py
CHANGED
|
@@ -29,8 +29,6 @@ from .utils import StepToDict, _extract_input_data, _update_result_body
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class V2ModelServer(StepToDict):
|
|
32
|
-
"""base model serving class (v2), using similar API to KFServing v2 and Triton"""
|
|
33
|
-
|
|
34
32
|
def __init__(
|
|
35
33
|
self,
|
|
36
34
|
context=None,
|
|
@@ -221,6 +219,8 @@ class V2ModelServer(StepToDict):
|
|
|
221
219
|
|
|
222
220
|
def _pre_event_processing_actions(self, event, event_body, op):
|
|
223
221
|
self._check_readiness(event)
|
|
222
|
+
if "_dict" in op:
|
|
223
|
+
event_body = self._inputs_to_list(event_body)
|
|
224
224
|
request = self.preprocess(event_body, op)
|
|
225
225
|
return self.validate(request, op)
|
|
226
226
|
|
|
@@ -237,7 +237,12 @@ class V2ModelServer(StepToDict):
|
|
|
237
237
|
if not op and event.method != "GET":
|
|
238
238
|
op = "infer"
|
|
239
239
|
|
|
240
|
-
if
|
|
240
|
+
if (
|
|
241
|
+
op == "predict"
|
|
242
|
+
or op == "infer"
|
|
243
|
+
or op == "infer_dict"
|
|
244
|
+
or op == "predict_dict"
|
|
245
|
+
):
|
|
241
246
|
# predict operation
|
|
242
247
|
request = self._pre_event_processing_actions(event, event_body, op)
|
|
243
248
|
try:
|
|
@@ -378,6 +383,43 @@ class V2ModelServer(StepToDict):
|
|
|
378
383
|
"""model explain operation"""
|
|
379
384
|
raise NotImplementedError()
|
|
380
385
|
|
|
386
|
+
def _inputs_to_list(self, request: dict) -> dict:
|
|
387
|
+
"""
|
|
388
|
+
Convert the inputs from list of dictionary / dictionary to list of lists / list
|
|
389
|
+
where the internal list order is according to the ArtifactModel inputs.
|
|
390
|
+
|
|
391
|
+
:param request: event
|
|
392
|
+
:return: evnet body converting the inputs to be list of lists
|
|
393
|
+
"""
|
|
394
|
+
if self.model_spec and self.model_spec.inputs:
|
|
395
|
+
input_order = [feature.name for feature in self.model_spec.inputs]
|
|
396
|
+
else:
|
|
397
|
+
raise mlrun.MLRunInvalidArgumentError(
|
|
398
|
+
"In order to use predict_dict or infer_dict operation you have to provide `model_path` "
|
|
399
|
+
"to the model server and to load it by `load()` function"
|
|
400
|
+
)
|
|
401
|
+
inputs = request.get("inputs")
|
|
402
|
+
try:
|
|
403
|
+
if isinstance(inputs, list) and all(
|
|
404
|
+
isinstance(item, dict) for item in inputs
|
|
405
|
+
):
|
|
406
|
+
new_inputs = [
|
|
407
|
+
[input_dict[key] for key in input_order] for input_dict in inputs
|
|
408
|
+
]
|
|
409
|
+
elif isinstance(inputs, dict):
|
|
410
|
+
new_inputs = [inputs[key] for key in input_order]
|
|
411
|
+
else:
|
|
412
|
+
raise mlrun.MLRunInvalidArgumentError(
|
|
413
|
+
"When using predict_dict or infer_dict operation the inputs must be "
|
|
414
|
+
"of type `list[dict]` or `dict`"
|
|
415
|
+
)
|
|
416
|
+
except KeyError:
|
|
417
|
+
raise mlrun.MLRunInvalidArgumentError(
|
|
418
|
+
f"Input dictionary don't contain all the necessary input keys : {input_order}"
|
|
419
|
+
)
|
|
420
|
+
request["inputs"] = new_inputs
|
|
421
|
+
return request
|
|
422
|
+
|
|
381
423
|
|
|
382
424
|
class _ModelLogPusher:
|
|
383
425
|
def __init__(self, model, context, output_stream=None):
|
mlrun/utils/helpers.py
CHANGED
|
@@ -181,6 +181,8 @@ def verify_field_regex(
|
|
|
181
181
|
log_message: str = "Field is malformed. Does not match required pattern",
|
|
182
182
|
mode: mlrun.common.schemas.RegexMatchModes = mlrun.common.schemas.RegexMatchModes.all,
|
|
183
183
|
) -> bool:
|
|
184
|
+
# limit the error message
|
|
185
|
+
max_chars = 63
|
|
184
186
|
for pattern in patterns:
|
|
185
187
|
if not re.match(pattern, str(field_value)):
|
|
186
188
|
log_func = logger.warn if raise_on_failure else logger.debug
|
|
@@ -193,7 +195,8 @@ def verify_field_regex(
|
|
|
193
195
|
if mode == mlrun.common.schemas.RegexMatchModes.all:
|
|
194
196
|
if raise_on_failure:
|
|
195
197
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
196
|
-
f"Field '{field_name}' is malformed. '{field_value}'
|
|
198
|
+
f"Field '{field_name[:max_chars]}' is malformed. '{field_value[:max_chars]}' "
|
|
199
|
+
f"does not match required pattern: {pattern}"
|
|
197
200
|
)
|
|
198
201
|
return False
|
|
199
202
|
elif mode == mlrun.common.schemas.RegexMatchModes.any:
|
|
@@ -203,7 +206,7 @@ def verify_field_regex(
|
|
|
203
206
|
elif mode == mlrun.common.schemas.RegexMatchModes.any:
|
|
204
207
|
if raise_on_failure:
|
|
205
208
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
206
|
-
f"Field '{field_name}' is malformed. '{field_value}' does not match any of the"
|
|
209
|
+
f"Field '{field_name[:max_chars]}' is malformed. '{field_value[:max_chars]}' does not match any of the"
|
|
207
210
|
f" required patterns: {patterns}"
|
|
208
211
|
)
|
|
209
212
|
return False
|
mlrun/utils/regex.py
CHANGED
|
@@ -21,9 +21,13 @@ pipeline_param = [r"{{pipelineparam:op=([\w\s_-]*);name=([\w\s_-]+)}}"]
|
|
|
21
21
|
# k8s character limit is for 63 characters
|
|
22
22
|
k8s_character_limit = [r"^.{0,63}$"]
|
|
23
23
|
|
|
24
|
+
# k8s name
|
|
25
|
+
# https://github.com/kubernetes/apimachinery/blob/kubernetes-1.25.16/pkg/util/validation/validation.go#L33
|
|
26
|
+
qualified_name = [r"^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$"]
|
|
27
|
+
|
|
24
28
|
# k8s label value format
|
|
25
29
|
# https://github.com/kubernetes/kubernetes/blob/v1.20.0/staging/src/k8s.io/apimachinery/pkg/util/validation/validation.go#L161
|
|
26
|
-
label_value = k8s_character_limit +
|
|
30
|
+
label_value = k8s_character_limit + qualified_name
|
|
27
31
|
|
|
28
32
|
# DNS Subdomain (RFC 1123) - used by k8s for most resource names format
|
|
29
33
|
# https://github.com/kubernetes/kubernetes/blob/v1.20.0/staging/src/k8s.io/apimachinery/pkg/util/validation/validation.go#L204
|
mlrun/utils/version/version.json
CHANGED