mlrun 1.8.0rc37__py3-none-any.whl → 1.8.0rc39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +1 -8
- mlrun/artifacts/base.py +3 -3
- mlrun/artifacts/manager.py +1 -1
- mlrun/common/model_monitoring/helpers.py +0 -13
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +2 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +5 -11
- mlrun/datastore/__init__.py +57 -16
- mlrun/datastore/base.py +0 -11
- mlrun/datastore/datastore_profile.py +10 -7
- mlrun/datastore/sources.py +6 -17
- mlrun/datastore/storeytargets.py +29 -15
- mlrun/datastore/utils.py +73 -0
- mlrun/db/base.py +1 -0
- mlrun/db/httpdb.py +16 -0
- mlrun/db/nopdb.py +1 -0
- mlrun/feature_store/__init__.py +2 -0
- mlrun/feature_store/api.py +77 -0
- mlrun/model_monitoring/api.py +2 -20
- mlrun/model_monitoring/controller.py +18 -2
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +21 -6
- mlrun/model_monitoring/helpers.py +15 -27
- mlrun/model_monitoring/stream_processing.py +7 -34
- mlrun/projects/operations.py +3 -3
- mlrun/projects/pipelines.py +5 -0
- mlrun/projects/project.py +4 -4
- mlrun/run.py +4 -4
- mlrun/runtimes/kubejob.py +2 -2
- mlrun/runtimes/nuclio/application/application.py +0 -2
- mlrun/runtimes/nuclio/function.py +1 -46
- mlrun/runtimes/pod.py +37 -145
- mlrun/serving/routers.py +80 -64
- mlrun/serving/states.py +30 -1
- mlrun/serving/v2_serving.py +24 -62
- mlrun/utils/async_http.py +1 -2
- mlrun/utils/helpers.py +1 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/METADATA +1 -1
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/RECORD +43 -43
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/top_level.txt +0 -0
mlrun/runtimes/pod.py
CHANGED
|
@@ -17,7 +17,6 @@ import os
|
|
|
17
17
|
import re
|
|
18
18
|
import time
|
|
19
19
|
import typing
|
|
20
|
-
import warnings
|
|
21
20
|
from collections.abc import Iterable
|
|
22
21
|
from enum import Enum
|
|
23
22
|
|
|
@@ -704,7 +703,29 @@ class KubeResourceSpec(FunctionSpec):
|
|
|
704
703
|
),
|
|
705
704
|
affinity_field_name=affinity_field_name,
|
|
706
705
|
)
|
|
706
|
+
# purge any affinity / anti-affinity preemption related configuration and enrich with preemptible tolerations
|
|
707
707
|
elif self_preemption_mode == PreemptionModes.allow.value:
|
|
708
|
+
# remove preemptible anti-affinity
|
|
709
|
+
self._prune_affinity_node_selector_requirement(
|
|
710
|
+
generate_preemptible_node_selector_requirements(
|
|
711
|
+
NodeSelectorOperator.node_selector_op_not_in.value
|
|
712
|
+
),
|
|
713
|
+
affinity_field_name=affinity_field_name,
|
|
714
|
+
)
|
|
715
|
+
# remove preemptible affinity
|
|
716
|
+
self._prune_affinity_node_selector_requirement(
|
|
717
|
+
generate_preemptible_node_selector_requirements(
|
|
718
|
+
NodeSelectorOperator.node_selector_op_in.value
|
|
719
|
+
),
|
|
720
|
+
affinity_field_name=affinity_field_name,
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
# remove preemptible nodes constrain
|
|
724
|
+
self._prune_node_selector(
|
|
725
|
+
mlconf.get_preemptible_node_selector(),
|
|
726
|
+
node_selector_field_name=node_selector_field_name,
|
|
727
|
+
)
|
|
728
|
+
|
|
708
729
|
# enrich with tolerations
|
|
709
730
|
self._merge_tolerations(
|
|
710
731
|
generate_preemptible_tolerations(),
|
|
@@ -1180,132 +1201,6 @@ class KubeResource(BaseRuntime):
|
|
|
1180
1201
|
"""
|
|
1181
1202
|
self.spec.with_requests(mem, cpu, patch=patch)
|
|
1182
1203
|
|
|
1183
|
-
def detect_preemptible_node_selector(
|
|
1184
|
-
self, node_selector: dict[str, str]
|
|
1185
|
-
) -> list[str]:
|
|
1186
|
-
"""
|
|
1187
|
-
Checks if any provided node selector matches the preemptible node selectors.
|
|
1188
|
-
Issues a warning if a selector may be pruned at runtime depending on preemption mode.
|
|
1189
|
-
|
|
1190
|
-
:param node_selector: The user-provided node selector dictionary.
|
|
1191
|
-
"""
|
|
1192
|
-
preemptible_node_selector = mlconf.get_preemptible_node_selector()
|
|
1193
|
-
|
|
1194
|
-
return [
|
|
1195
|
-
f"'{key}': '{val}'"
|
|
1196
|
-
for key, val in node_selector.items()
|
|
1197
|
-
if preemptible_node_selector.get(key) == val
|
|
1198
|
-
]
|
|
1199
|
-
|
|
1200
|
-
def detect_preemptible_tolerations(
|
|
1201
|
-
self, tolerations: list[k8s_client.V1Toleration]
|
|
1202
|
-
) -> list[str]:
|
|
1203
|
-
"""
|
|
1204
|
-
Checks if any provided toleration matches preemptible tolerations.
|
|
1205
|
-
Issues a warning if a toleration may be pruned at runtime depending on preemption mode.
|
|
1206
|
-
|
|
1207
|
-
:param tolerations: The user-provided list of tolerations.
|
|
1208
|
-
"""
|
|
1209
|
-
preemptible_tolerations = [
|
|
1210
|
-
k8s_client.V1Toleration(
|
|
1211
|
-
key=toleration.get("key"),
|
|
1212
|
-
value=toleration.get("value"),
|
|
1213
|
-
effect=toleration.get("effect"),
|
|
1214
|
-
)
|
|
1215
|
-
for toleration in mlconf.get_preemptible_tolerations()
|
|
1216
|
-
]
|
|
1217
|
-
|
|
1218
|
-
def _format_toleration(toleration):
|
|
1219
|
-
return f"'{toleration.key}'='{toleration.value}' (effect: '{toleration.effect}')"
|
|
1220
|
-
|
|
1221
|
-
return [
|
|
1222
|
-
_format_toleration(toleration)
|
|
1223
|
-
for toleration in tolerations
|
|
1224
|
-
if toleration in preemptible_tolerations
|
|
1225
|
-
]
|
|
1226
|
-
|
|
1227
|
-
def detect_preemptible_affinity(self, affinity: k8s_client.V1Affinity) -> list[str]:
|
|
1228
|
-
"""
|
|
1229
|
-
Checks if any provided affinity rules match preemptible affinity configurations.
|
|
1230
|
-
Issues a warning if an affinity rule may be pruned at runtime depending on preemption mode.
|
|
1231
|
-
|
|
1232
|
-
:param affinity: The user-provided affinity object.
|
|
1233
|
-
"""
|
|
1234
|
-
|
|
1235
|
-
preemptible_affinity_terms = generate_preemptible_nodes_affinity_terms()
|
|
1236
|
-
conflicting_affinities = []
|
|
1237
|
-
|
|
1238
|
-
if (
|
|
1239
|
-
affinity
|
|
1240
|
-
and affinity.node_affinity
|
|
1241
|
-
and affinity.node_affinity.required_during_scheduling_ignored_during_execution
|
|
1242
|
-
):
|
|
1243
|
-
user_terms = affinity.node_affinity.required_during_scheduling_ignored_during_execution.node_selector_terms
|
|
1244
|
-
for user_term in user_terms:
|
|
1245
|
-
user_expressions = {
|
|
1246
|
-
(expr.key, expr.operator, tuple(expr.values or []))
|
|
1247
|
-
for expr in user_term.match_expressions or []
|
|
1248
|
-
}
|
|
1249
|
-
|
|
1250
|
-
for preemptible_term in preemptible_affinity_terms:
|
|
1251
|
-
preemptible_expressions = {
|
|
1252
|
-
(expr.key, expr.operator, tuple(expr.values or []))
|
|
1253
|
-
for expr in preemptible_term.match_expressions or []
|
|
1254
|
-
}
|
|
1255
|
-
|
|
1256
|
-
# Ensure operators match and preemptible expressions are present
|
|
1257
|
-
common_exprs = user_expressions & preemptible_expressions
|
|
1258
|
-
if common_exprs:
|
|
1259
|
-
formatted = ", ".join(
|
|
1260
|
-
f"'{key} {operator} {list(values)}'"
|
|
1261
|
-
for key, operator, values in common_exprs
|
|
1262
|
-
)
|
|
1263
|
-
conflicting_affinities.append(formatted)
|
|
1264
|
-
return conflicting_affinities
|
|
1265
|
-
|
|
1266
|
-
def raise_preemptible_warning(
|
|
1267
|
-
self,
|
|
1268
|
-
node_selector: typing.Optional[dict[str, str]],
|
|
1269
|
-
tolerations: typing.Optional[list[k8s_client.V1Toleration]],
|
|
1270
|
-
affinity: typing.Optional[k8s_client.V1Affinity],
|
|
1271
|
-
) -> None:
|
|
1272
|
-
"""
|
|
1273
|
-
Detects conflicts and issues a single warning if necessary.
|
|
1274
|
-
|
|
1275
|
-
:param node_selector: The user-provided node selector dictionary.
|
|
1276
|
-
:param tolerations: The user-provided list of tolerations.
|
|
1277
|
-
:param affinity: The user-provided affinity object.
|
|
1278
|
-
"""
|
|
1279
|
-
conflict_messages = []
|
|
1280
|
-
|
|
1281
|
-
if node_selector:
|
|
1282
|
-
ns_conflicts = ", ".join(
|
|
1283
|
-
self.detect_preemptible_node_selector(node_selector)
|
|
1284
|
-
)
|
|
1285
|
-
if ns_conflicts:
|
|
1286
|
-
conflict_messages.append(f"Node selectors: {ns_conflicts}")
|
|
1287
|
-
|
|
1288
|
-
if tolerations:
|
|
1289
|
-
tol_conflicts = ", ".join(self.detect_preemptible_tolerations(tolerations))
|
|
1290
|
-
if tol_conflicts:
|
|
1291
|
-
conflict_messages.append(f"Tolerations: {tol_conflicts}")
|
|
1292
|
-
|
|
1293
|
-
if affinity:
|
|
1294
|
-
affinity_conflicts = ", ".join(self.detect_preemptible_affinity(affinity))
|
|
1295
|
-
if affinity_conflicts:
|
|
1296
|
-
conflict_messages.append(f"Affinity: {affinity_conflicts}")
|
|
1297
|
-
|
|
1298
|
-
if conflict_messages:
|
|
1299
|
-
warning_componentes = "; \n".join(conflict_messages)
|
|
1300
|
-
warnings.warn(
|
|
1301
|
-
f"Warning: based on the preemptible node settings configured in your MLRun configuration,\n"
|
|
1302
|
-
f"{warning_componentes}\n"
|
|
1303
|
-
f" may be removed or adjusted at runtime.\n"
|
|
1304
|
-
"This adjustment depends on the function's preemption mode. \n"
|
|
1305
|
-
"The list of potential adjusted preemptible selectors can be viewed here: "
|
|
1306
|
-
"mlrun.mlconf.get_preemptible_node_selector() and mlrun.mlconf.get_preemptible_tolerations()."
|
|
1307
|
-
)
|
|
1308
|
-
|
|
1309
1204
|
def with_node_selection(
|
|
1310
1205
|
self,
|
|
1311
1206
|
node_name: typing.Optional[str] = None,
|
|
@@ -1314,14 +1209,19 @@ class KubeResource(BaseRuntime):
|
|
|
1314
1209
|
tolerations: typing.Optional[list[k8s_client.V1Toleration]] = None,
|
|
1315
1210
|
):
|
|
1316
1211
|
"""
|
|
1317
|
-
Enables control
|
|
1212
|
+
Enables to control on which k8s node the job will run
|
|
1213
|
+
|
|
1214
|
+
:param node_name: The name of the k8s node
|
|
1215
|
+
:param node_selector: Label selector, only nodes with matching labels will be eligible to be picked
|
|
1216
|
+
:param affinity: Expands the types of constraints you can express - see
|
|
1217
|
+
https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
|
|
1218
|
+
for details
|
|
1219
|
+
:param tolerations: Tolerations are applied to pods, and allow (but do not require) the pods to schedule
|
|
1220
|
+
onto nodes with matching taints - see
|
|
1221
|
+
https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration
|
|
1222
|
+
for details
|
|
1318
1223
|
|
|
1319
|
-
:param node_name: The name of the Kubernetes node.
|
|
1320
|
-
:param node_selector: Label selector, only nodes with matching labels will be eligible.
|
|
1321
|
-
:param affinity: Defines scheduling constraints.
|
|
1322
|
-
:param tolerations: Allows scheduling onto nodes with matching taints.
|
|
1323
1224
|
"""
|
|
1324
|
-
# Apply values as before
|
|
1325
1225
|
if node_name:
|
|
1326
1226
|
self.spec.node_name = node_name
|
|
1327
1227
|
if node_selector is not None:
|
|
@@ -1332,12 +1232,6 @@ class KubeResource(BaseRuntime):
|
|
|
1332
1232
|
if tolerations is not None:
|
|
1333
1233
|
self.spec.tolerations = tolerations
|
|
1334
1234
|
|
|
1335
|
-
self.raise_preemptible_warning(
|
|
1336
|
-
node_selector=self.spec.node_selector,
|
|
1337
|
-
tolerations=self.spec.tolerations,
|
|
1338
|
-
affinity=self.spec.affinity,
|
|
1339
|
-
)
|
|
1340
|
-
|
|
1341
1235
|
def with_priority_class(self, name: typing.Optional[str] = None):
|
|
1342
1236
|
"""
|
|
1343
1237
|
Enables to control the priority of the pod
|
|
@@ -1578,15 +1472,13 @@ class KubeResource(BaseRuntime):
|
|
|
1578
1472
|
f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
|
|
1579
1473
|
)
|
|
1580
1474
|
if watch and not ready:
|
|
1581
|
-
state = self._build_watch(
|
|
1475
|
+
self.status.state = self._build_watch(
|
|
1582
1476
|
watch=watch,
|
|
1583
1477
|
show_on_failure=show_on_failure,
|
|
1584
1478
|
)
|
|
1585
|
-
ready = state == "ready"
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
if watch and not ready:
|
|
1589
|
-
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
1479
|
+
ready = self.status.state == "ready"
|
|
1480
|
+
if not ready:
|
|
1481
|
+
raise mlrun.errors.MLRunRuntimeError("Deploy failed")
|
|
1590
1482
|
return ready
|
|
1591
1483
|
|
|
1592
1484
|
def _build_watch(
|
mlrun/serving/routers.py
CHANGED
|
@@ -18,6 +18,7 @@ import copy
|
|
|
18
18
|
import json
|
|
19
19
|
import traceback
|
|
20
20
|
import typing
|
|
21
|
+
from datetime import timedelta
|
|
21
22
|
from enum import Enum
|
|
22
23
|
from io import BytesIO
|
|
23
24
|
from typing import Union
|
|
@@ -78,6 +79,9 @@ class BaseModelRouter(RouterToDict):
|
|
|
78
79
|
self.inputs_key = "instances" if self.protocol == "v1" else "inputs"
|
|
79
80
|
self._input_path = input_path
|
|
80
81
|
self._result_path = result_path
|
|
82
|
+
self._background_task_check_timestamp = None
|
|
83
|
+
self._background_task_terminate = False
|
|
84
|
+
self._background_task_current_state = None
|
|
81
85
|
self.kwargs = kwargs
|
|
82
86
|
|
|
83
87
|
def parse_event(self, event):
|
|
@@ -135,6 +139,7 @@ class BaseModelRouter(RouterToDict):
|
|
|
135
139
|
raise ValueError(
|
|
136
140
|
f"illegal path prefix {urlpath}, must start with {self.url_prefix}"
|
|
137
141
|
)
|
|
142
|
+
self._update_background_task_state(event)
|
|
138
143
|
return event
|
|
139
144
|
|
|
140
145
|
def do_event(self, event, *args, **kwargs):
|
|
@@ -160,6 +165,63 @@ class BaseModelRouter(RouterToDict):
|
|
|
160
165
|
"""run tasks after processing the event"""
|
|
161
166
|
return event
|
|
162
167
|
|
|
168
|
+
def _get_background_task_status(
|
|
169
|
+
self,
|
|
170
|
+
) -> mlrun.common.schemas.BackgroundTaskState:
|
|
171
|
+
self._background_task_check_timestamp = now_date()
|
|
172
|
+
server: mlrun.serving.GraphServer = getattr(
|
|
173
|
+
self.context, "_server", None
|
|
174
|
+
) or getattr(self.context, "server", None)
|
|
175
|
+
if not self.context.is_mock:
|
|
176
|
+
if server.model_endpoint_creation_task_name:
|
|
177
|
+
background_task = mlrun.get_run_db().get_project_background_task(
|
|
178
|
+
server.project, server.model_endpoint_creation_task_name
|
|
179
|
+
)
|
|
180
|
+
logger.debug(
|
|
181
|
+
"Checking model endpoint creation task status",
|
|
182
|
+
task_name=server.model_endpoint_creation_task_name,
|
|
183
|
+
)
|
|
184
|
+
if (
|
|
185
|
+
background_task.status.state
|
|
186
|
+
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
187
|
+
):
|
|
188
|
+
logger.debug(
|
|
189
|
+
f"Model endpoint creation task completed with state {background_task.status.state}"
|
|
190
|
+
)
|
|
191
|
+
self._background_task_terminate = True
|
|
192
|
+
else: # in progress
|
|
193
|
+
logger.debug(
|
|
194
|
+
f"Model endpoint creation task is still in progress with the current state: "
|
|
195
|
+
f"{background_task.status.state}. Events will not be monitored for the next 15 seconds",
|
|
196
|
+
name=self.name,
|
|
197
|
+
background_task_check_timestamp=self._background_task_check_timestamp.isoformat(),
|
|
198
|
+
)
|
|
199
|
+
return background_task.status.state
|
|
200
|
+
else:
|
|
201
|
+
logger.debug(
|
|
202
|
+
"Model endpoint creation task name not provided",
|
|
203
|
+
)
|
|
204
|
+
elif self.context.monitoring_mock:
|
|
205
|
+
self._background_task_terminate = (
|
|
206
|
+
True # If mock monitoring we return success and terminate task check.
|
|
207
|
+
)
|
|
208
|
+
return mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
209
|
+
self._background_task_terminate = True # If mock without monitoring we return failed and terminate task check.
|
|
210
|
+
return mlrun.common.schemas.BackgroundTaskState.failed
|
|
211
|
+
|
|
212
|
+
def _update_background_task_state(self, event):
|
|
213
|
+
if not self._background_task_terminate and (
|
|
214
|
+
self._background_task_check_timestamp is None
|
|
215
|
+
or now_date() - self._background_task_check_timestamp
|
|
216
|
+
>= timedelta(seconds=15)
|
|
217
|
+
):
|
|
218
|
+
self._background_task_current_state = self._get_background_task_status()
|
|
219
|
+
if event.body:
|
|
220
|
+
event.body["background_task_state"] = (
|
|
221
|
+
self._background_task_current_state
|
|
222
|
+
or mlrun.common.schemas.BackgroundTaskState.running
|
|
223
|
+
)
|
|
224
|
+
|
|
163
225
|
|
|
164
226
|
class ModelRouter(BaseModelRouter):
|
|
165
227
|
def _resolve_route(self, body, urlpath):
|
|
@@ -599,75 +661,29 @@ class VotingEnsemble(ParallelRun):
|
|
|
599
661
|
self.log_router = True
|
|
600
662
|
self.prediction_col_name = prediction_col_name or "prediction"
|
|
601
663
|
self.format_response_with_col_name_flag = format_response_with_col_name_flag
|
|
602
|
-
self.model_endpoint_uid = None
|
|
603
|
-
self.model_endpoint = None
|
|
664
|
+
self.model_endpoint_uid = kwargs.get("model_endpoint_uid", None)
|
|
604
665
|
self.shard_by_endpoint = shard_by_endpoint
|
|
666
|
+
self._model_logger = None
|
|
605
667
|
self.initialized = False
|
|
606
668
|
|
|
607
669
|
def post_init(self, mode="sync", **kwargs):
|
|
608
670
|
self._update_weights(self.weights)
|
|
609
671
|
|
|
610
|
-
def _lazy_init(self,
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
"Checking model endpoint creation task status",
|
|
624
|
-
task_name=server.model_endpoint_creation_task_name,
|
|
625
|
-
)
|
|
626
|
-
if (
|
|
627
|
-
background_task.status.state
|
|
628
|
-
in mlrun.common.schemas.BackgroundTaskState.terminal_states()
|
|
629
|
-
):
|
|
630
|
-
logger.info(
|
|
631
|
-
f"Model endpoint creation task completed with state {background_task.status.state}"
|
|
632
|
-
)
|
|
633
|
-
else: # in progress
|
|
634
|
-
logger.debug(
|
|
635
|
-
f"Model endpoint creation task is still in progress with the current state: "
|
|
636
|
-
f"{background_task.status.state}. This event will not be monitored.",
|
|
637
|
-
name=self.name,
|
|
638
|
-
event_id=event_id,
|
|
639
|
-
)
|
|
640
|
-
self.initialized = False
|
|
641
|
-
return
|
|
642
|
-
else:
|
|
643
|
-
logger.info(
|
|
644
|
-
"Model endpoint creation task name not provided",
|
|
645
|
-
)
|
|
646
|
-
try:
|
|
647
|
-
self.model_endpoint_uid = (
|
|
648
|
-
mlrun.get_run_db()
|
|
649
|
-
.get_model_endpoint(
|
|
650
|
-
project=server.project,
|
|
651
|
-
name=self.name,
|
|
652
|
-
function_name=server.function_name,
|
|
653
|
-
function_tag=server.function_tag or "latest",
|
|
654
|
-
tsdb_metrics=False,
|
|
655
|
-
)
|
|
656
|
-
.metadata.uid
|
|
657
|
-
)
|
|
658
|
-
except mlrun.errors.MLRunNotFoundError:
|
|
659
|
-
logger.info(
|
|
660
|
-
"Model endpoint not found for this step; monitoring for this model will not be performed",
|
|
661
|
-
function_name=server.function_name,
|
|
662
|
-
name=self.name,
|
|
672
|
+
def _lazy_init(self, event):
|
|
673
|
+
if event and isinstance(event, dict):
|
|
674
|
+
background_task_state = event.get("background_task_state", None)
|
|
675
|
+
if (
|
|
676
|
+
background_task_state
|
|
677
|
+
== mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
678
|
+
):
|
|
679
|
+
self._model_logger = (
|
|
680
|
+
_ModelLogPusher(self, self.context)
|
|
681
|
+
if self.context
|
|
682
|
+
and self.context.stream.enabled
|
|
683
|
+
and self.model_endpoint_uid
|
|
684
|
+
else None
|
|
663
685
|
)
|
|
664
|
-
self.
|
|
665
|
-
self._model_logger = (
|
|
666
|
-
_ModelLogPusher(self, self.context)
|
|
667
|
-
if self.context and self.context.stream.enabled and self.model_endpoint_uid
|
|
668
|
-
else None
|
|
669
|
-
)
|
|
670
|
-
self.initialized = True
|
|
686
|
+
self.initialized = True
|
|
671
687
|
|
|
672
688
|
def _resolve_route(self, body, urlpath):
|
|
673
689
|
"""Resolves the appropriate model to send the event to.
|
|
@@ -872,14 +888,14 @@ class VotingEnsemble(ParallelRun):
|
|
|
872
888
|
Response
|
|
873
889
|
Event response after running the requested logic
|
|
874
890
|
"""
|
|
875
|
-
if not self.initialized:
|
|
876
|
-
self._lazy_init(event.id)
|
|
877
891
|
start = now_date()
|
|
878
892
|
# Handle and verify the request
|
|
879
893
|
original_body = event.body
|
|
880
894
|
event.body = _extract_input_data(self._input_path, event.body)
|
|
881
895
|
event = self.preprocess(event)
|
|
882
896
|
event = self._pre_handle_event(event)
|
|
897
|
+
if not self.initialized:
|
|
898
|
+
self._lazy_init(event.body)
|
|
883
899
|
|
|
884
900
|
# Should we terminate the event?
|
|
885
901
|
if hasattr(event, "terminated") and event.terminated:
|
mlrun/serving/states.py
CHANGED
|
@@ -31,6 +31,13 @@ import storey.utils
|
|
|
31
31
|
|
|
32
32
|
import mlrun
|
|
33
33
|
import mlrun.common.schemas as schemas
|
|
34
|
+
from mlrun.datastore.datastore_profile import (
|
|
35
|
+
DatastoreProfileKafkaSource,
|
|
36
|
+
DatastoreProfileKafkaTarget,
|
|
37
|
+
DatastoreProfileV3io,
|
|
38
|
+
datastore_profile_read,
|
|
39
|
+
)
|
|
40
|
+
from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
|
|
34
41
|
from mlrun.utils import logger
|
|
35
42
|
|
|
36
43
|
from ..config import config
|
|
@@ -1885,7 +1892,29 @@ def _init_async_objects(context, steps):
|
|
|
1885
1892
|
|
|
1886
1893
|
kafka_brokers = get_kafka_brokers_from_dict(options, pop=True)
|
|
1887
1894
|
|
|
1888
|
-
if stream_path.startswith("
|
|
1895
|
+
if stream_path and stream_path.startswith("ds://"):
|
|
1896
|
+
datastore_profile = datastore_profile_read(stream_path)
|
|
1897
|
+
if isinstance(
|
|
1898
|
+
datastore_profile,
|
|
1899
|
+
(DatastoreProfileKafkaTarget, DatastoreProfileKafkaSource),
|
|
1900
|
+
):
|
|
1901
|
+
step._async_object = KafkaStoreyTarget(
|
|
1902
|
+
path=stream_path,
|
|
1903
|
+
context=context,
|
|
1904
|
+
**options,
|
|
1905
|
+
)
|
|
1906
|
+
elif isinstance(datastore_profile, DatastoreProfileV3io):
|
|
1907
|
+
step._async_object = StreamStoreyTarget(
|
|
1908
|
+
stream_path=stream_path,
|
|
1909
|
+
context=context,
|
|
1910
|
+
**options,
|
|
1911
|
+
)
|
|
1912
|
+
else:
|
|
1913
|
+
raise mlrun.errors.MLRunValueError(
|
|
1914
|
+
f"Received an unexpected stream profile type: {type(datastore_profile)}\n"
|
|
1915
|
+
"Expects `DatastoreProfileV3io` or `DatastoreProfileKafkaSource`."
|
|
1916
|
+
)
|
|
1917
|
+
elif stream_path.startswith("kafka://") or kafka_brokers:
|
|
1889
1918
|
topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
|
|
1890
1919
|
|
|
1891
1920
|
kafka_producer_options = options.pop(
|
mlrun/serving/v2_serving.py
CHANGED
|
@@ -111,11 +111,11 @@ class V2ModelServer(StepToDict):
|
|
|
111
111
|
if model:
|
|
112
112
|
self.model = model
|
|
113
113
|
self.ready = True
|
|
114
|
-
self.model_endpoint_uid = None
|
|
114
|
+
self.model_endpoint_uid = kwargs.get("model_endpoint_uid", None)
|
|
115
115
|
self.shard_by_endpoint = shard_by_endpoint
|
|
116
116
|
self._model_logger = None
|
|
117
117
|
self.initialized = False
|
|
118
|
-
self.output_schema = []
|
|
118
|
+
self.output_schema = kwargs.get("outputs", [])
|
|
119
119
|
|
|
120
120
|
def _load_and_update_state(self):
|
|
121
121
|
try:
|
|
@@ -137,67 +137,29 @@ class V2ModelServer(StepToDict):
|
|
|
137
137
|
else:
|
|
138
138
|
self._load_and_update_state()
|
|
139
139
|
|
|
140
|
-
|
|
141
|
-
server: mlrun.serving.GraphServer = getattr(
|
|
142
|
-
self.context, "_server", None
|
|
143
|
-
) or getattr(self.context, "server", None)
|
|
144
|
-
if not server:
|
|
145
|
-
logger.warn("GraphServer not initialized for VotingEnsemble instance")
|
|
146
|
-
return
|
|
147
|
-
if not self.context.is_mock and not self.model_spec:
|
|
140
|
+
if self.ready and not self.context.is_mock and not self.model_spec:
|
|
148
141
|
self.get_model()
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
f"{background_task.status.state}. This event will not be monitored.",
|
|
169
|
-
name=self.name,
|
|
170
|
-
event_id=event_id,
|
|
171
|
-
)
|
|
172
|
-
self.initialized = False
|
|
173
|
-
return
|
|
174
|
-
else:
|
|
175
|
-
logger.debug(
|
|
176
|
-
"Model endpoint creation task name not provided",
|
|
177
|
-
)
|
|
178
|
-
try:
|
|
179
|
-
model_endpoint = mlrun.get_run_db().get_model_endpoint(
|
|
180
|
-
project=server.project,
|
|
181
|
-
name=self.name,
|
|
182
|
-
function_name=server.function_name,
|
|
183
|
-
function_tag=server.function_tag or "latest",
|
|
184
|
-
tsdb_metrics=False,
|
|
185
|
-
)
|
|
186
|
-
self.model_endpoint_uid = model_endpoint.metadata.uid
|
|
187
|
-
self.output_schema = model_endpoint.spec.label_names
|
|
188
|
-
except mlrun.errors.MLRunNotFoundError:
|
|
189
|
-
logger.info(
|
|
190
|
-
"Model endpoint not found for this step; monitoring for this model will not be performed",
|
|
191
|
-
function_name=server.function_name,
|
|
192
|
-
name=self.name,
|
|
142
|
+
|
|
143
|
+
if self.model_spec:
|
|
144
|
+
self.output_schema = self.output_schema or [
|
|
145
|
+
feature.name for feature in self.model_spec.outputs
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
def _lazy_init(self, event):
|
|
149
|
+
if event and isinstance(event, dict):
|
|
150
|
+
background_task_state = event.get("background_task_state", None)
|
|
151
|
+
if (
|
|
152
|
+
background_task_state
|
|
153
|
+
== mlrun.common.schemas.BackgroundTaskState.succeeded
|
|
154
|
+
):
|
|
155
|
+
self._model_logger = (
|
|
156
|
+
_ModelLogPusher(self, self.context)
|
|
157
|
+
if self.context
|
|
158
|
+
and self.context.stream.enabled
|
|
159
|
+
and self.model_endpoint_uid
|
|
160
|
+
else None
|
|
193
161
|
)
|
|
194
|
-
self.
|
|
195
|
-
self._model_logger = (
|
|
196
|
-
_ModelLogPusher(self, self.context)
|
|
197
|
-
if self.context and self.context.stream.enabled and self.model_endpoint_uid
|
|
198
|
-
else None
|
|
199
|
-
)
|
|
200
|
-
self.initialized = True
|
|
162
|
+
self.initialized = True
|
|
201
163
|
|
|
202
164
|
def get_param(self, key: str, default=None):
|
|
203
165
|
"""get param by key (specified in the model or the function)"""
|
|
@@ -276,7 +238,7 @@ class V2ModelServer(StepToDict):
|
|
|
276
238
|
def do_event(self, event, *args, **kwargs):
|
|
277
239
|
"""main model event handler method"""
|
|
278
240
|
if not self.initialized:
|
|
279
|
-
self._lazy_init(event.
|
|
241
|
+
self._lazy_init(event.body)
|
|
280
242
|
start = now_date()
|
|
281
243
|
original_body = event.body
|
|
282
244
|
event_body = _extract_input_data(self._input_path, event.body)
|
mlrun/utils/async_http.py
CHANGED
|
@@ -26,8 +26,7 @@ from aiohttp_retry.client import _RequestContext
|
|
|
26
26
|
from mlrun.config import config
|
|
27
27
|
from mlrun.errors import err_to_str
|
|
28
28
|
from mlrun.errors import raise_for_status as ml_raise_for_status
|
|
29
|
-
|
|
30
|
-
from .helpers import logger as mlrun_logger
|
|
29
|
+
from mlrun.utils.helpers import logger as mlrun_logger
|
|
31
30
|
|
|
32
31
|
DEFAULT_BLACKLISTED_METHODS = [
|
|
33
32
|
"POST",
|
mlrun/utils/helpers.py
CHANGED
|
@@ -146,7 +146,7 @@ def get_artifact_target(item: dict, project=None):
|
|
|
146
146
|
return item["spec"].get("target_path")
|
|
147
147
|
|
|
148
148
|
|
|
149
|
-
# TODO:
|
|
149
|
+
# TODO: Remove once data migration v5 is obsolete
|
|
150
150
|
def is_legacy_artifact(artifact):
|
|
151
151
|
if isinstance(artifact, dict):
|
|
152
152
|
return "metadata" not in artifact
|
|
@@ -498,7 +498,6 @@ def get_in(obj, keys, default=None):
|
|
|
498
498
|
"""
|
|
499
499
|
if isinstance(keys, str):
|
|
500
500
|
keys = keys.split(".")
|
|
501
|
-
|
|
502
501
|
for key in keys:
|
|
503
502
|
if not obj or key not in obj:
|
|
504
503
|
return default
|
mlrun/utils/version/version.json
CHANGED