mlrun 1.8.0rc43__py3-none-any.whl → 1.8.0rc45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/schemas/model_monitoring/constants.py +1 -0
- mlrun/config.py +6 -0
- mlrun/db/base.py +3 -7
- mlrun/db/httpdb.py +16 -18
- mlrun/db/nopdb.py +0 -5
- mlrun/feature_store/__init__.py +2 -0
- mlrun/feature_store/api.py +0 -139
- mlrun/feature_store/feature_vector.py +91 -468
- mlrun/feature_store/feature_vector_utils.py +466 -0
- mlrun/feature_store/retrieval/base.py +1 -2
- mlrun/feature_store/retrieval/storey_merger.py +1 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +10 -18
- mlrun/model_monitoring/db/tsdb/base.py +5 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +124 -42
- mlrun/model_monitoring/writer.py +1 -1
- mlrun/projects/project.py +24 -27
- mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
- mlrun/serving/states.py +40 -22
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc43.dist-info → mlrun-1.8.0rc45.dist-info}/METADATA +3 -2
- {mlrun-1.8.0rc43.dist-info → mlrun-1.8.0rc45.dist-info}/RECORD +26 -25
- {mlrun-1.8.0rc43.dist-info → mlrun-1.8.0rc45.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc43.dist-info → mlrun-1.8.0rc45.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc43.dist-info → mlrun-1.8.0rc45.dist-info/licenses}/LICENSE +0 -0
- {mlrun-1.8.0rc43.dist-info → mlrun-1.8.0rc45.dist-info}/top_level.txt +0 -0
mlrun/config.py
CHANGED
|
@@ -549,6 +549,10 @@ default_config = {
|
|
|
549
549
|
},
|
|
550
550
|
},
|
|
551
551
|
"model_endpoint_monitoring": {
|
|
552
|
+
# Scaling Rule
|
|
553
|
+
# The fundamental scaling rule to maintain is: Shards/Partitions = Replicas * Workers
|
|
554
|
+
# In other words, the number of shards (V3IO) or partitions (Kafka) must be equal to the
|
|
555
|
+
# total number of worker processes across all pods.
|
|
552
556
|
"serving_stream": {
|
|
553
557
|
"v3io": {
|
|
554
558
|
"shard_count": 2,
|
|
@@ -822,6 +826,8 @@ default_config = {
|
|
|
822
826
|
# maximum allowed alert config cache size in alert's CRUD
|
|
823
827
|
# for the best performance, it is recommended to set this value to the maximum number of alerts
|
|
824
828
|
"max_allowed_cache_size": 20000,
|
|
829
|
+
# default limit for listing alert configs
|
|
830
|
+
"default_list_alert_configs_limit": 2000,
|
|
825
831
|
},
|
|
826
832
|
"auth_with_client_id": {
|
|
827
833
|
"enabled": False,
|
mlrun/db/base.py
CHANGED
|
@@ -889,7 +889,9 @@ class RunDBInterface(ABC):
|
|
|
889
889
|
pass
|
|
890
890
|
|
|
891
891
|
@abstractmethod
|
|
892
|
-
def list_alerts_configs(
|
|
892
|
+
def list_alerts_configs(
|
|
893
|
+
self, project="", limit: Optional[int] = None, offset: Optional[int] = None
|
|
894
|
+
):
|
|
893
895
|
pass
|
|
894
896
|
|
|
895
897
|
@abstractmethod
|
|
@@ -1105,12 +1107,6 @@ class RunDBInterface(ABC):
|
|
|
1105
1107
|
) -> bool:
|
|
1106
1108
|
pass
|
|
1107
1109
|
|
|
1108
|
-
@abstractmethod
|
|
1109
|
-
def deploy_histogram_data_drift_app(
|
|
1110
|
-
self, project: str, image: str = "mlrun/mlrun"
|
|
1111
|
-
) -> None:
|
|
1112
|
-
pass
|
|
1113
|
-
|
|
1114
1110
|
@abstractmethod
|
|
1115
1111
|
def set_model_monitoring_credentials(
|
|
1116
1112
|
self,
|
mlrun/db/httpdb.py
CHANGED
|
@@ -4080,21 +4080,6 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4080
4080
|
deletion_failed = True
|
|
4081
4081
|
return not deletion_failed
|
|
4082
4082
|
|
|
4083
|
-
def deploy_histogram_data_drift_app(
|
|
4084
|
-
self, project: str, image: str = "mlrun/mlrun"
|
|
4085
|
-
) -> None:
|
|
4086
|
-
"""
|
|
4087
|
-
Deploy the histogram data drift application.
|
|
4088
|
-
|
|
4089
|
-
:param project: Project name.
|
|
4090
|
-
:param image: The image on which the application will run.
|
|
4091
|
-
"""
|
|
4092
|
-
self.api_call(
|
|
4093
|
-
method=mlrun.common.types.HTTPMethod.PUT,
|
|
4094
|
-
path=f"projects/{project}/model-monitoring/histogram-data-drift-app",
|
|
4095
|
-
params={"image": image},
|
|
4096
|
-
)
|
|
4097
|
-
|
|
4098
4083
|
def set_model_monitoring_credentials(
|
|
4099
4084
|
self,
|
|
4100
4085
|
project: str,
|
|
@@ -4818,20 +4803,33 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4818
4803
|
response = self.api_call("GET", endpoint_path, error_message)
|
|
4819
4804
|
return AlertConfig.from_dict(response.json())
|
|
4820
4805
|
|
|
4821
|
-
def list_alerts_configs(
|
|
4806
|
+
def list_alerts_configs(
|
|
4807
|
+
self, project="", limit: Optional[int] = None, offset: Optional[int] = None
|
|
4808
|
+
) -> list[AlertConfig]:
|
|
4822
4809
|
"""
|
|
4823
4810
|
Retrieve list of alerts of a project.
|
|
4824
4811
|
|
|
4825
4812
|
:param project: The project name.
|
|
4813
|
+
:param limit: The maximum number of alerts to return.
|
|
4814
|
+
Defaults to `mlconf.alerts.default_list_alert_configs_limit` if not provided.
|
|
4815
|
+
:param offset: The number of alerts to skip.
|
|
4826
4816
|
|
|
4827
4817
|
:returns: All the alerts objects of the project.
|
|
4828
4818
|
"""
|
|
4829
4819
|
project = project or config.default_project
|
|
4830
4820
|
endpoint_path = f"projects/{project}/alerts"
|
|
4831
4821
|
error_message = f"get alerts {project}/alerts"
|
|
4832
|
-
|
|
4822
|
+
params = {}
|
|
4823
|
+
# TODO: Deprecate limit and offset when pagination is implemented
|
|
4824
|
+
if limit:
|
|
4825
|
+
params["page-size"] = limit
|
|
4826
|
+
if offset:
|
|
4827
|
+
params["offset"] = offset
|
|
4828
|
+
response = self.api_call(
|
|
4829
|
+
"GET", endpoint_path, error_message, params=params
|
|
4830
|
+
).json()
|
|
4833
4831
|
results = []
|
|
4834
|
-
for item in response:
|
|
4832
|
+
for item in response.get("alerts", []):
|
|
4835
4833
|
results.append(AlertConfig(**item))
|
|
4836
4834
|
return results
|
|
4837
4835
|
|
mlrun/db/nopdb.py
CHANGED
|
@@ -883,11 +883,6 @@ class NopDB(RunDBInterface):
|
|
|
883
883
|
) -> bool:
|
|
884
884
|
pass
|
|
885
885
|
|
|
886
|
-
def deploy_histogram_data_drift_app(
|
|
887
|
-
self, project: str, image: str = "mlrun/mlrun"
|
|
888
|
-
) -> None:
|
|
889
|
-
pass
|
|
890
|
-
|
|
891
886
|
def set_model_monitoring_credentials(
|
|
892
887
|
self,
|
|
893
888
|
project: str,
|
mlrun/feature_store/__init__.py
CHANGED
mlrun/feature_store/api.py
CHANGED
|
@@ -47,159 +47,20 @@ from .common import (
|
|
|
47
47
|
get_feature_vector_by_uri,
|
|
48
48
|
verify_feature_set_exists,
|
|
49
49
|
verify_feature_set_permissions,
|
|
50
|
-
verify_feature_vector_permissions,
|
|
51
50
|
)
|
|
52
51
|
from .feature_set import FeatureSet
|
|
53
|
-
from .feature_vector import (
|
|
54
|
-
FeatureVector,
|
|
55
|
-
FixedWindowType,
|
|
56
|
-
OfflineVectorResponse,
|
|
57
|
-
OnlineVectorService,
|
|
58
|
-
)
|
|
59
52
|
from .ingestion import (
|
|
60
53
|
context_to_ingestion_params,
|
|
61
54
|
init_featureset_graph,
|
|
62
55
|
run_ingestion_job,
|
|
63
56
|
run_spark_graph,
|
|
64
57
|
)
|
|
65
|
-
from .retrieval import RemoteVectorResponse, get_merger, run_merge_job
|
|
66
58
|
|
|
67
59
|
_v3iofs = None
|
|
68
60
|
spark_transform_handler = "transform"
|
|
69
61
|
_TRANS_TABLE = str.maketrans({" ": "_", "(": "", ")": ""})
|
|
70
62
|
|
|
71
63
|
|
|
72
|
-
def _features_to_vector_and_check_permissions(features, update_stats):
|
|
73
|
-
if isinstance(features, str):
|
|
74
|
-
vector = get_feature_vector_by_uri(features, update=update_stats)
|
|
75
|
-
elif isinstance(features, FeatureVector):
|
|
76
|
-
vector = features
|
|
77
|
-
if not vector.metadata.name:
|
|
78
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
79
|
-
"feature vector name must be specified"
|
|
80
|
-
)
|
|
81
|
-
verify_feature_vector_permissions(
|
|
82
|
-
vector, mlrun.common.schemas.AuthorizationAction.update
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
vector.save()
|
|
86
|
-
else:
|
|
87
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
88
|
-
f"illegal features value/type ({type(features)})"
|
|
89
|
-
)
|
|
90
|
-
return vector
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _get_offline_features(
|
|
94
|
-
feature_vector: Union[str, FeatureVector],
|
|
95
|
-
entity_rows=None,
|
|
96
|
-
entity_timestamp_column: Optional[str] = None,
|
|
97
|
-
target: DataTargetBase = None,
|
|
98
|
-
run_config: RunConfig = None,
|
|
99
|
-
drop_columns: Optional[list[str]] = None,
|
|
100
|
-
start_time: Optional[Union[str, datetime]] = None,
|
|
101
|
-
end_time: Optional[Union[str, datetime]] = None,
|
|
102
|
-
with_indexes: bool = False,
|
|
103
|
-
update_stats: bool = False,
|
|
104
|
-
engine: Optional[str] = None,
|
|
105
|
-
engine_args: Optional[dict] = None,
|
|
106
|
-
query: Optional[str] = None,
|
|
107
|
-
order_by: Optional[Union[str, list[str]]] = None,
|
|
108
|
-
spark_service: Optional[str] = None,
|
|
109
|
-
timestamp_for_filtering: Optional[Union[str, dict[str, str]]] = None,
|
|
110
|
-
additional_filters=None,
|
|
111
|
-
) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
|
|
112
|
-
if entity_rows is None and entity_timestamp_column is not None:
|
|
113
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
114
|
-
"entity_timestamp_column param "
|
|
115
|
-
"can not be specified without entity_rows param"
|
|
116
|
-
)
|
|
117
|
-
if isinstance(target, BaseStoreTarget) and not target.support_pandas:
|
|
118
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
119
|
-
f"get_offline_features does not support targets that do not support pandas engine."
|
|
120
|
-
f" Target kind: {target.kind}"
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
if isinstance(feature_vector, FeatureVector):
|
|
124
|
-
update_stats = True
|
|
125
|
-
|
|
126
|
-
feature_vector = _features_to_vector_and_check_permissions(
|
|
127
|
-
feature_vector, update_stats
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
entity_timestamp_column = (
|
|
131
|
-
entity_timestamp_column or feature_vector.spec.timestamp_field
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
merger_engine = get_merger(engine)
|
|
135
|
-
|
|
136
|
-
if run_config and not run_config.local:
|
|
137
|
-
return run_merge_job(
|
|
138
|
-
feature_vector,
|
|
139
|
-
target,
|
|
140
|
-
merger_engine,
|
|
141
|
-
engine,
|
|
142
|
-
engine_args,
|
|
143
|
-
spark_service,
|
|
144
|
-
entity_rows,
|
|
145
|
-
entity_timestamp_column=entity_timestamp_column,
|
|
146
|
-
run_config=run_config,
|
|
147
|
-
drop_columns=drop_columns,
|
|
148
|
-
with_indexes=with_indexes,
|
|
149
|
-
query=query,
|
|
150
|
-
order_by=order_by,
|
|
151
|
-
start_time=start_time,
|
|
152
|
-
end_time=end_time,
|
|
153
|
-
timestamp_for_filtering=timestamp_for_filtering,
|
|
154
|
-
additional_filters=additional_filters,
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
merger = merger_engine(feature_vector, **(engine_args or {}))
|
|
158
|
-
return merger.start(
|
|
159
|
-
entity_rows,
|
|
160
|
-
entity_timestamp_column,
|
|
161
|
-
target=target,
|
|
162
|
-
drop_columns=drop_columns,
|
|
163
|
-
start_time=start_time,
|
|
164
|
-
end_time=end_time,
|
|
165
|
-
timestamp_for_filtering=timestamp_for_filtering,
|
|
166
|
-
with_indexes=with_indexes,
|
|
167
|
-
update_stats=update_stats,
|
|
168
|
-
query=query,
|
|
169
|
-
order_by=order_by,
|
|
170
|
-
additional_filters=additional_filters,
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def _get_online_feature_service(
|
|
175
|
-
feature_vector: Union[str, FeatureVector],
|
|
176
|
-
run_config: RunConfig = None,
|
|
177
|
-
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
178
|
-
impute_policy: Optional[dict] = None,
|
|
179
|
-
update_stats: bool = False,
|
|
180
|
-
entity_keys: Optional[list[str]] = None,
|
|
181
|
-
) -> OnlineVectorService:
|
|
182
|
-
if isinstance(feature_vector, FeatureVector):
|
|
183
|
-
update_stats = True
|
|
184
|
-
feature_vector = _features_to_vector_and_check_permissions(
|
|
185
|
-
feature_vector, update_stats
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
# Impute policies rely on statistics in many cases, so verifying that the fvec has stats in it
|
|
189
|
-
if impute_policy and not feature_vector.status.stats:
|
|
190
|
-
update_stats = True
|
|
191
|
-
|
|
192
|
-
engine_args = {"impute_policy": impute_policy}
|
|
193
|
-
merger_engine = get_merger("storey")
|
|
194
|
-
# todo: support remote service (using remote nuclio/mlrun function if run_config)
|
|
195
|
-
|
|
196
|
-
merger = merger_engine(feature_vector, **engine_args)
|
|
197
|
-
|
|
198
|
-
return merger.init_online_vector_service(
|
|
199
|
-
entity_keys, fixed_window_type, update_stats=update_stats
|
|
200
|
-
)
|
|
201
|
-
|
|
202
|
-
|
|
203
64
|
def norm_column_name(name: str) -> str:
|
|
204
65
|
"""
|
|
205
66
|
Remove parentheses () and replace whitespaces with an underscore _.
|