mlrun 1.10.0rc12__py3-none-any.whl → 1.10.0rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/llm_prompt.py +5 -1
- mlrun/common/schemas/__init__.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +1 -1
- mlrun/common/schemas/serving.py +7 -0
- mlrun/config.py +2 -0
- mlrun/datastore/model_provider/model_provider.py +81 -3
- mlrun/datastore/model_provider/openai_provider.py +52 -28
- mlrun/datastore/remote_client.py +11 -0
- mlrun/execution.py +5 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +175 -8
- mlrun/projects/pipelines.py +40 -18
- mlrun/projects/project.py +5 -0
- mlrun/run.py +25 -2
- mlrun/serving/server.py +1 -0
- mlrun/serving/states.py +411 -21
- mlrun/serving/system_steps.py +6 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/METADATA +1 -1
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/RECORD +23 -23
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc12.dist-info → mlrun-1.10.0rc13.dist-info}/top_level.txt +0 -0
mlrun/run.py
CHANGED
|
@@ -895,7 +895,7 @@ def _run_pipeline(
|
|
|
895
895
|
def retry_pipeline(
|
|
896
896
|
run_id: str,
|
|
897
897
|
project: str,
|
|
898
|
-
) -> str:
|
|
898
|
+
) -> typing.Union[str, dict[str, str]]:
|
|
899
899
|
"""Retry a pipeline run.
|
|
900
900
|
|
|
901
901
|
This function retries a previously executed pipeline run using the specified run ID. If the run is not in a
|
|
@@ -914,10 +914,33 @@ def retry_pipeline(
|
|
|
914
914
|
"Please set the dbpath URL."
|
|
915
915
|
)
|
|
916
916
|
|
|
917
|
-
|
|
917
|
+
# Invoke retry pipeline run. Depending on the context, this call returns either:
|
|
918
|
+
# 1. A simple string of a workflow-id, for direct retries or non-remote workflows, or
|
|
919
|
+
# 2. A dict payload representing a WorkflowResponse when rerunning remote workflows.
|
|
920
|
+
rerun_response = mldb.retry_pipeline(
|
|
918
921
|
run_id=run_id,
|
|
919
922
|
project=project,
|
|
920
923
|
)
|
|
924
|
+
if isinstance(rerun_response, str):
|
|
925
|
+
pipeline_run_id = rerun_response
|
|
926
|
+
else:
|
|
927
|
+
rerun_response = mlrun.common.schemas.WorkflowResponse(**rerun_response)
|
|
928
|
+
|
|
929
|
+
def _fetch_workflow_id():
|
|
930
|
+
rerun = mldb.read_run(rerun_response.run_id, project)
|
|
931
|
+
workflow_id = rerun["metadata"]["labels"].get("workflow-id")
|
|
932
|
+
if not workflow_id:
|
|
933
|
+
raise mlrun.errors.MLRunRuntimeError("workflow-id label not set yet")
|
|
934
|
+
return workflow_id
|
|
935
|
+
|
|
936
|
+
pipeline_run_id = mlrun.utils.helpers.retry_until_successful(
|
|
937
|
+
backoff=3,
|
|
938
|
+
timeout=int(mlrun.mlconf.workflows.timeouts.remote),
|
|
939
|
+
logger=logger,
|
|
940
|
+
verbose=False,
|
|
941
|
+
_function=_fetch_workflow_id,
|
|
942
|
+
)
|
|
943
|
+
|
|
921
944
|
if pipeline_run_id == run_id:
|
|
922
945
|
logger.info(
|
|
923
946
|
f"Retried pipeline run ID={pipeline_run_id}, check UI for progress."
|
mlrun/serving/server.py
CHANGED
mlrun/serving/states.py
CHANGED
|
@@ -44,7 +44,6 @@ from mlrun.datastore.datastore_profile import (
|
|
|
44
44
|
datastore_profile_read,
|
|
45
45
|
)
|
|
46
46
|
from mlrun.datastore.model_provider.model_provider import ModelProvider
|
|
47
|
-
from mlrun.datastore.store_resources import get_store_resource
|
|
48
47
|
from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
|
|
49
48
|
from mlrun.utils import logger
|
|
50
49
|
|
|
@@ -518,7 +517,7 @@ class BaseStep(ModelObj):
|
|
|
518
517
|
"ModelRunnerStep can be added to 'Flow' topology graph only"
|
|
519
518
|
)
|
|
520
519
|
step_model_endpoints_names = list(
|
|
521
|
-
step.class_args
|
|
520
|
+
step.class_args.get(schemas.ModelRunnerStepData.MODELS, {}).keys()
|
|
522
521
|
)
|
|
523
522
|
# Get all model_endpoints names that are in both lists
|
|
524
523
|
common_endpoints_names = list(
|
|
@@ -530,8 +529,77 @@ class BaseStep(ModelObj):
|
|
|
530
529
|
raise GraphError(
|
|
531
530
|
f"The graph already contains the model endpoints named - {common_endpoints_names}."
|
|
532
531
|
)
|
|
532
|
+
|
|
533
|
+
# Check if shared models are defined in the graph
|
|
534
|
+
self._verify_shared_models(root, step, step_model_endpoints_names)
|
|
535
|
+
# Update model endpoints names in the root step
|
|
533
536
|
root.update_model_endpoints_names(step_model_endpoints_names)
|
|
534
537
|
|
|
538
|
+
@staticmethod
|
|
539
|
+
def _verify_shared_models(
|
|
540
|
+
root: "RootFlowStep",
|
|
541
|
+
step: "ModelRunnerStep",
|
|
542
|
+
step_model_endpoints_names: list[str],
|
|
543
|
+
) -> None:
|
|
544
|
+
proxy_endpoints = [
|
|
545
|
+
name
|
|
546
|
+
for name in step_model_endpoints_names
|
|
547
|
+
if step.class_args.get(
|
|
548
|
+
schemas.ModelRunnerStepData.MODEL_TO_EXECUTION_MECHANISM, {}
|
|
549
|
+
).get(name)
|
|
550
|
+
== ParallelExecutionMechanisms.shared_executor
|
|
551
|
+
]
|
|
552
|
+
shared_models = []
|
|
553
|
+
|
|
554
|
+
for name in proxy_endpoints:
|
|
555
|
+
shared_runnable_name = (
|
|
556
|
+
step.class_args.get(schemas.ModelRunnerStepData.MODELS, {})
|
|
557
|
+
.get(name, ["", {}])[schemas.ModelsData.MODEL_PARAMETERS.value]
|
|
558
|
+
.get("shared_runnable_name")
|
|
559
|
+
)
|
|
560
|
+
model_artifact_uri = (
|
|
561
|
+
step.class_args.get(schemas.ModelRunnerStepData.MODELS, {})
|
|
562
|
+
.get(name, ["", {}])[schemas.ModelsData.MODEL_PARAMETERS.value]
|
|
563
|
+
.get("artifact_uri")
|
|
564
|
+
)
|
|
565
|
+
prefix, _ = mlrun.datastore.parse_store_uri(model_artifact_uri)
|
|
566
|
+
# if the model artifact is a prompt, we need to get the model URI
|
|
567
|
+
# to ensure that the shared runnable name is correct
|
|
568
|
+
if prefix == mlrun.utils.StorePrefix.LLMPrompt:
|
|
569
|
+
llm_artifact, _ = mlrun.store_manager.get_store_artifact(
|
|
570
|
+
model_artifact_uri
|
|
571
|
+
)
|
|
572
|
+
model_artifact_uri = llm_artifact.spec.parent_uri
|
|
573
|
+
actual_shared_name = root.get_shared_model_name_by_artifact_uri(
|
|
574
|
+
model_artifact_uri
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
if not shared_runnable_name:
|
|
578
|
+
if not actual_shared_name:
|
|
579
|
+
raise GraphError(
|
|
580
|
+
f"Can't find shared model for {name} model endpoint"
|
|
581
|
+
)
|
|
582
|
+
else:
|
|
583
|
+
step.class_args[schemas.ModelRunnerStepData.MODELS][name][
|
|
584
|
+
schemas.ModelsData.MODEL_PARAMETERS.value
|
|
585
|
+
]["shared_runnable_name"] = actual_shared_name
|
|
586
|
+
shared_models.append(actual_shared_name)
|
|
587
|
+
elif actual_shared_name != shared_runnable_name:
|
|
588
|
+
raise GraphError(
|
|
589
|
+
f"Model endpoint {name} shared runnable name mismatch: "
|
|
590
|
+
f"expected {actual_shared_name}, got {shared_runnable_name}"
|
|
591
|
+
)
|
|
592
|
+
else:
|
|
593
|
+
shared_models.append(actual_shared_name)
|
|
594
|
+
|
|
595
|
+
undefined_shared_models = list(
|
|
596
|
+
set(shared_models) - set(root.shared_models.keys())
|
|
597
|
+
)
|
|
598
|
+
if undefined_shared_models:
|
|
599
|
+
raise GraphError(
|
|
600
|
+
f"The following shared models are not defined in the graph: {undefined_shared_models}."
|
|
601
|
+
)
|
|
602
|
+
|
|
535
603
|
|
|
536
604
|
class TaskStep(BaseStep):
|
|
537
605
|
"""task execution step, runs a class or handler"""
|
|
@@ -1008,7 +1076,13 @@ class RouterStep(TaskStep):
|
|
|
1008
1076
|
|
|
1009
1077
|
|
|
1010
1078
|
class Model(storey.ParallelExecutionRunnable, ModelObj):
|
|
1011
|
-
_dict_fields = [
|
|
1079
|
+
_dict_fields = [
|
|
1080
|
+
"name",
|
|
1081
|
+
"raise_exception",
|
|
1082
|
+
"artifact_uri",
|
|
1083
|
+
"shared_runnable_name",
|
|
1084
|
+
]
|
|
1085
|
+
kind = "model"
|
|
1012
1086
|
|
|
1013
1087
|
def __init__(
|
|
1014
1088
|
self,
|
|
@@ -1238,16 +1312,105 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1238
1312
|
self.raise_exception = raise_exception
|
|
1239
1313
|
self.shape = "folder"
|
|
1240
1314
|
|
|
1315
|
+
def add_shared_model_proxy(
|
|
1316
|
+
self,
|
|
1317
|
+
endpoint_name: str,
|
|
1318
|
+
model_artifact: Union[str, ModelArtifact, LLMPromptArtifact],
|
|
1319
|
+
shared_model_name: Optional[str] = None,
|
|
1320
|
+
labels: Optional[Union[list[str], dict[str, str]]] = None,
|
|
1321
|
+
model_endpoint_creation_strategy: Optional[
|
|
1322
|
+
schemas.ModelEndpointCreationStrategy
|
|
1323
|
+
] = schemas.ModelEndpointCreationStrategy.INPLACE,
|
|
1324
|
+
inputs: Optional[list[str]] = None,
|
|
1325
|
+
outputs: Optional[list[str]] = None,
|
|
1326
|
+
input_path: Optional[str] = None,
|
|
1327
|
+
result_path: Optional[str] = None,
|
|
1328
|
+
override: bool = False,
|
|
1329
|
+
) -> None:
|
|
1330
|
+
"""
|
|
1331
|
+
Add a proxy model to the ModelRunnerStep, which is a proxy for a model that is already defined as shared model
|
|
1332
|
+
within the graph
|
|
1333
|
+
|
|
1334
|
+
:param endpoint_name: str, will identify the model in the ModelRunnerStep, and assign model endpoint name
|
|
1335
|
+
:param model_artifact: model artifact or mlrun model artifact uri, according to the model artifact
|
|
1336
|
+
we will match the model endpoint to the correct shared model.
|
|
1337
|
+
:param shared_model_name: str, the name of the shared model that is already defined within the graph
|
|
1338
|
+
:param labels: model endpoint labels, should be list of str or mapping of str:str
|
|
1339
|
+
:param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
|
|
1340
|
+
* **overwrite**:
|
|
1341
|
+
1. If model endpoints with the same name exist, delete the `latest` one.
|
|
1342
|
+
2. Create a new model endpoint entry and set it as `latest`.
|
|
1343
|
+
* **inplace** (default):
|
|
1344
|
+
1. If model endpoints with the same name exist, update the `latest` entry.
|
|
1345
|
+
2. Otherwise, create a new entry.
|
|
1346
|
+
* **archive**:
|
|
1347
|
+
1. If model endpoints with the same name exist, preserve them.
|
|
1348
|
+
2. Create a new model endpoint with the same name and set it to `latest`.
|
|
1349
|
+
|
|
1350
|
+
:param inputs: list of the model inputs (e.g. features) ,if provided will override the inputs
|
|
1351
|
+
that been configured in the model artifact, please note that those inputs need to
|
|
1352
|
+
be equal in length and order to the inputs that model_class predict method expects
|
|
1353
|
+
:param outputs: list of the model outputs (e.g. labels) ,if provided will override the outputs
|
|
1354
|
+
that been configured in the model artifact, please note that those outputs need to
|
|
1355
|
+
be equal to the model_class predict method outputs (length, and order)
|
|
1356
|
+
:param input_path: input path inside the user event, expect scopes to be defined by dot notation
|
|
1357
|
+
(e.g "inputs.my_model_inputs"). expects list or dictionary type object in path.
|
|
1358
|
+
:param result_path: result path inside the user output event, expect scopes to be defined by dot
|
|
1359
|
+
notation (e.g "outputs.my_model_outputs") expects list or dictionary type object
|
|
1360
|
+
in path.
|
|
1361
|
+
:param override: bool allow override existing model on the current ModelRunnerStep.
|
|
1362
|
+
"""
|
|
1363
|
+
model_class = Model(
|
|
1364
|
+
name=endpoint_name,
|
|
1365
|
+
shared_runnable_name=shared_model_name,
|
|
1366
|
+
)
|
|
1367
|
+
if isinstance(model_artifact, str):
|
|
1368
|
+
model_artifact_uri = model_artifact
|
|
1369
|
+
elif isinstance(model_artifact, ModelArtifact):
|
|
1370
|
+
model_artifact_uri = model_artifact.uri
|
|
1371
|
+
elif isinstance(model_artifact, LLMPromptArtifact):
|
|
1372
|
+
model_artifact_uri = model_artifact.model_artifact.uri
|
|
1373
|
+
else:
|
|
1374
|
+
raise MLRunInvalidArgumentError(
|
|
1375
|
+
"model_artifact must be a string, ModelArtifact or LLMPromptArtifact"
|
|
1376
|
+
)
|
|
1377
|
+
root = self._extract_root_step()
|
|
1378
|
+
if isinstance(root, RootFlowStep):
|
|
1379
|
+
shared_model_name = (
|
|
1380
|
+
shared_model_name
|
|
1381
|
+
or root.get_shared_model_name_by_artifact_uri(model_artifact_uri)
|
|
1382
|
+
)
|
|
1383
|
+
if not root.shared_models or (
|
|
1384
|
+
root.shared_models
|
|
1385
|
+
and shared_model_name
|
|
1386
|
+
and shared_model_name not in root.shared_models.keys()
|
|
1387
|
+
):
|
|
1388
|
+
raise GraphError(
|
|
1389
|
+
f"ModelRunnerStep can only add proxy models that were added to the root flow step, "
|
|
1390
|
+
f"model {shared_model_name} is not in the shared models."
|
|
1391
|
+
)
|
|
1392
|
+
self.add_model(
|
|
1393
|
+
endpoint_name=endpoint_name,
|
|
1394
|
+
model_class=model_class,
|
|
1395
|
+
execution_mechanism=ParallelExecutionMechanisms.shared_executor,
|
|
1396
|
+
model_artifact=model_artifact,
|
|
1397
|
+
labels=labels,
|
|
1398
|
+
model_endpoint_creation_strategy=model_endpoint_creation_strategy,
|
|
1399
|
+
override=override,
|
|
1400
|
+
inputs=inputs,
|
|
1401
|
+
outputs=outputs,
|
|
1402
|
+
input_path=input_path,
|
|
1403
|
+
result_path=result_path,
|
|
1404
|
+
)
|
|
1405
|
+
|
|
1241
1406
|
def add_model(
|
|
1242
1407
|
self,
|
|
1243
1408
|
endpoint_name: str,
|
|
1244
1409
|
model_class: Union[str, Model],
|
|
1245
1410
|
execution_mechanism: Union[str, ParallelExecutionMechanisms],
|
|
1246
|
-
model_artifact: Optional[
|
|
1247
|
-
Union[str, mlrun.artifacts.ModelArtifact, mlrun.artifacts.LLMPromptArtifact]
|
|
1248
|
-
] = None,
|
|
1411
|
+
model_artifact: Optional[Union[str, ModelArtifact, LLMPromptArtifact]] = None,
|
|
1249
1412
|
labels: Optional[Union[list[str], dict[str, str]]] = None,
|
|
1250
|
-
|
|
1413
|
+
model_endpoint_creation_strategy: Optional[
|
|
1251
1414
|
schemas.ModelEndpointCreationStrategy
|
|
1252
1415
|
] = schemas.ModelEndpointCreationStrategy.INPLACE,
|
|
1253
1416
|
inputs: Optional[list[str]] = None,
|
|
@@ -1285,7 +1448,7 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1285
1448
|
|
|
1286
1449
|
:param model_artifact: model artifact or mlrun model artifact uri
|
|
1287
1450
|
:param labels: model endpoint labels, should be list of str or mapping of str:str
|
|
1288
|
-
:param
|
|
1451
|
+
:param model_endpoint_creation_strategy: Strategy for creating or updating the model endpoint:
|
|
1289
1452
|
* **overwrite**:
|
|
1290
1453
|
1. If model endpoints with the same name exist, delete the `latest` one.
|
|
1291
1454
|
2. Create a new model endpoint entry and set it as `latest`.
|
|
@@ -1310,7 +1473,6 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1310
1473
|
:param override: bool allow override existing model on the current ModelRunnerStep.
|
|
1311
1474
|
:param model_parameters: Parameters for model instantiation
|
|
1312
1475
|
"""
|
|
1313
|
-
|
|
1314
1476
|
if isinstance(model_class, Model) and model_parameters:
|
|
1315
1477
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1316
1478
|
"Cannot provide a model object as argument to `model_class` and also provide `model_parameters`."
|
|
@@ -1319,10 +1481,20 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1319
1481
|
model_parameters = model_parameters or (
|
|
1320
1482
|
model_class.to_dict() if isinstance(model_class, Model) else {}
|
|
1321
1483
|
)
|
|
1322
|
-
|
|
1323
|
-
|
|
1484
|
+
|
|
1485
|
+
if isinstance(
|
|
1486
|
+
model_artifact,
|
|
1487
|
+
str,
|
|
1324
1488
|
):
|
|
1325
|
-
|
|
1489
|
+
try:
|
|
1490
|
+
model_artifact, _ = mlrun.store_manager.get_store_artifact(
|
|
1491
|
+
model_artifact
|
|
1492
|
+
)
|
|
1493
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
1494
|
+
raise mlrun.errors.MLRunInvalidArgumentError("Artifact not found.")
|
|
1495
|
+
|
|
1496
|
+
outputs = outputs or self._get_model_output_schema(model_artifact)
|
|
1497
|
+
|
|
1326
1498
|
model_artifact = (
|
|
1327
1499
|
model_artifact.uri
|
|
1328
1500
|
if isinstance(model_artifact, mlrun.artifacts.Artifact)
|
|
@@ -1369,7 +1541,7 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1369
1541
|
schemas.MonitoringData.OUTPUTS: outputs,
|
|
1370
1542
|
schemas.MonitoringData.INPUT_PATH: input_path,
|
|
1371
1543
|
schemas.MonitoringData.RESULT_PATH: result_path,
|
|
1372
|
-
schemas.MonitoringData.CREATION_STRATEGY:
|
|
1544
|
+
schemas.MonitoringData.CREATION_STRATEGY: model_endpoint_creation_strategy,
|
|
1373
1545
|
schemas.MonitoringData.LABELS: labels,
|
|
1374
1546
|
schemas.MonitoringData.MODEL_PATH: model_artifact,
|
|
1375
1547
|
schemas.MonitoringData.MODEL_CLASS: model_class,
|
|
@@ -1379,14 +1551,44 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1379
1551
|
|
|
1380
1552
|
@staticmethod
|
|
1381
1553
|
def _get_model_output_schema(
|
|
1382
|
-
|
|
1554
|
+
model_artifact: Union[ModelArtifact, LLMPromptArtifact],
|
|
1555
|
+
) -> Optional[list[str]]:
|
|
1556
|
+
if isinstance(
|
|
1557
|
+
model_artifact,
|
|
1558
|
+
ModelArtifact,
|
|
1559
|
+
):
|
|
1560
|
+
return [feature.name for feature in model_artifact.spec.outputs]
|
|
1561
|
+
elif isinstance(
|
|
1562
|
+
model_artifact,
|
|
1563
|
+
LLMPromptArtifact,
|
|
1564
|
+
):
|
|
1565
|
+
_model_artifact = model_artifact.model_artifact
|
|
1566
|
+
return [feature.name for feature in _model_artifact.spec.outputs]
|
|
1567
|
+
|
|
1568
|
+
@staticmethod
|
|
1569
|
+
def _get_model_endpoint_output_schema(
|
|
1570
|
+
name: str,
|
|
1571
|
+
project: str,
|
|
1572
|
+
uid: str,
|
|
1383
1573
|
) -> list[str]:
|
|
1384
1574
|
output_schema = None
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1575
|
+
try:
|
|
1576
|
+
model_endpoint: mlrun.common.schemas.model_monitoring.ModelEndpoint = (
|
|
1577
|
+
mlrun.db.get_run_db().get_model_endpoint(
|
|
1578
|
+
name=name,
|
|
1579
|
+
project=project,
|
|
1580
|
+
endpoint_id=uid,
|
|
1581
|
+
tsdb_metrics=False,
|
|
1582
|
+
)
|
|
1583
|
+
)
|
|
1584
|
+
output_schema = model_endpoint.spec.label_names
|
|
1585
|
+
except (
|
|
1586
|
+
mlrun.errors.MLRunNotFoundError,
|
|
1587
|
+
mlrun.errors.MLRunInvalidArgumentError,
|
|
1588
|
+
):
|
|
1589
|
+
logger.warning(
|
|
1590
|
+
f"Model endpoint not found, using default output schema for model {name}"
|
|
1388
1591
|
)
|
|
1389
|
-
output_schema = [feature.name for feature in artifact.spec.outputs]
|
|
1390
1592
|
return output_schema
|
|
1391
1593
|
|
|
1392
1594
|
@staticmethod
|
|
@@ -1407,8 +1609,14 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1407
1609
|
if isinstance(monitoring_data, dict):
|
|
1408
1610
|
for model in monitoring_data:
|
|
1409
1611
|
monitoring_data[model][schemas.MonitoringData.OUTPUTS] = (
|
|
1410
|
-
monitoring_data
|
|
1411
|
-
or self.
|
|
1612
|
+
monitoring_data.get(model, {}).get(schemas.MonitoringData.OUTPUTS)
|
|
1613
|
+
or self._get_model_endpoint_output_schema(
|
|
1614
|
+
name=model,
|
|
1615
|
+
project=self.context.project if self.context else None,
|
|
1616
|
+
uid=monitoring_data.get(model, {}).get(
|
|
1617
|
+
mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
|
|
1618
|
+
),
|
|
1619
|
+
)
|
|
1412
1620
|
)
|
|
1413
1621
|
# Prevent calling _get_model_output_schema for same model more than once
|
|
1414
1622
|
self.class_args[
|
|
@@ -1429,6 +1637,7 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1429
1637
|
return monitoring_data
|
|
1430
1638
|
|
|
1431
1639
|
def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
|
|
1640
|
+
self.context = context
|
|
1432
1641
|
if not self._is_local_function(context):
|
|
1433
1642
|
# skip init of non local functions
|
|
1434
1643
|
return
|
|
@@ -1773,7 +1982,7 @@ class FlowStep(BaseStep):
|
|
|
1773
1982
|
self._insert_all_error_handlers()
|
|
1774
1983
|
self.check_and_process_graph()
|
|
1775
1984
|
|
|
1776
|
-
for step in self.
|
|
1985
|
+
for step in self.steps.values():
|
|
1777
1986
|
step.set_parent(self)
|
|
1778
1987
|
step.init_object(context, namespace, mode, reset=reset)
|
|
1779
1988
|
self._set_error_handler()
|
|
@@ -2136,6 +2345,11 @@ class RootFlowStep(FlowStep):
|
|
|
2136
2345
|
"model_endpoints_names",
|
|
2137
2346
|
"model_endpoints_routes_names",
|
|
2138
2347
|
"track_models",
|
|
2348
|
+
"shared_max_processes",
|
|
2349
|
+
"shared_max_threads",
|
|
2350
|
+
"shared_models",
|
|
2351
|
+
"shared_models_mechanism",
|
|
2352
|
+
"pool_factor",
|
|
2139
2353
|
]
|
|
2140
2354
|
|
|
2141
2355
|
def __init__(
|
|
@@ -2156,6 +2370,140 @@ class RootFlowStep(FlowStep):
|
|
|
2156
2370
|
self._models = set()
|
|
2157
2371
|
self._route_models = set()
|
|
2158
2372
|
self._track_models = False
|
|
2373
|
+
self._shared_models: dict[str, tuple[str, dict]] = {}
|
|
2374
|
+
self._shared_models_mechanism: dict[str, ParallelExecutionMechanisms] = {}
|
|
2375
|
+
self._shared_max_processes = None
|
|
2376
|
+
self._shared_max_threads = None
|
|
2377
|
+
self._pool_factor = None
|
|
2378
|
+
|
|
2379
|
+
def add_shared_model(
|
|
2380
|
+
self,
|
|
2381
|
+
name: str,
|
|
2382
|
+
model_class: Union[str, Model],
|
|
2383
|
+
execution_mechanism: Union[str, ParallelExecutionMechanisms],
|
|
2384
|
+
model_artifact: Optional[Union[str, ModelArtifact]],
|
|
2385
|
+
override: bool = False,
|
|
2386
|
+
**model_parameters,
|
|
2387
|
+
) -> None:
|
|
2388
|
+
"""
|
|
2389
|
+
Add a shared model to the graph, this model will be available to all the ModelRunners in the graph
|
|
2390
|
+
:param name: Name of the shared model (should be unique in the graph)
|
|
2391
|
+
:param model_class: Model class name
|
|
2392
|
+
:param execution_mechanism: Parallel execution mechanism to be used to execute this model. Must be one of:
|
|
2393
|
+
* "process_pool" – To run in a separate process from a process pool. This is appropriate for CPU or GPU
|
|
2394
|
+
intensive tasks as they would otherwise block the main process by holding Python's Global Interpreter
|
|
2395
|
+
Lock (GIL).
|
|
2396
|
+
* "dedicated_process" – To run in a separate dedicated process. This is appropriate for CPU or GPU intensive
|
|
2397
|
+
tasks that also require significant Runnable-specific initialization (e.g. a large model).
|
|
2398
|
+
* "thread_pool" – To run in a separate thread. This is appropriate for blocking I/O tasks, as they would
|
|
2399
|
+
otherwise block the main event loop thread.
|
|
2400
|
+
* "asyncio" – To run in an asyncio task. This is appropriate for I/O tasks that use asyncio, allowing the
|
|
2401
|
+
event loop to continue running while waiting for a response.
|
|
2402
|
+
* "shared_executor" – Reuses an external executor (typically managed by the flow or context) to execute the
|
|
2403
|
+
runnable. Should be used only if you have multiply `ParallelExecution` in the same flow and especially
|
|
2404
|
+
useful when:
|
|
2405
|
+
- You want to share a heavy resource like a large model loaded onto a GPU.
|
|
2406
|
+
- You want to centralize task scheduling or coordination for multiple lightweight tasks.
|
|
2407
|
+
- You aim to minimize overhead from creating new executors or processes/threads per runnable.
|
|
2408
|
+
The runnable is expected to be pre-initialized and reused across events, enabling efficient use of
|
|
2409
|
+
memory and hardware accelerators.
|
|
2410
|
+
* "naive" – To run in the main event loop. This is appropriate only for trivial computation and/or file I/O.
|
|
2411
|
+
It means that the runnable will not actually be run in parallel to anything else.
|
|
2412
|
+
|
|
2413
|
+
:param model_artifact: model artifact or mlrun model artifact uri
|
|
2414
|
+
:param override: bool allow override existing model on the current ModelRunnerStep.
|
|
2415
|
+
:param model_parameters: Parameters for model instantiation
|
|
2416
|
+
"""
|
|
2417
|
+
if isinstance(model_class, Model) and model_parameters:
|
|
2418
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
2419
|
+
"Cannot provide a model object as argument to `model_class` and also provide `model_parameters`."
|
|
2420
|
+
)
|
|
2421
|
+
|
|
2422
|
+
if execution_mechanism == ParallelExecutionMechanisms.shared_executor:
|
|
2423
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
2424
|
+
"Cannot add a shared model with execution mechanism 'shared_executor'"
|
|
2425
|
+
)
|
|
2426
|
+
ParallelExecutionMechanisms.validate(execution_mechanism)
|
|
2427
|
+
|
|
2428
|
+
model_parameters = model_parameters or (
|
|
2429
|
+
model_class.to_dict() if isinstance(model_class, Model) else {}
|
|
2430
|
+
)
|
|
2431
|
+
model_artifact = (
|
|
2432
|
+
model_artifact.uri
|
|
2433
|
+
if isinstance(model_artifact, mlrun.artifacts.Artifact)
|
|
2434
|
+
else model_artifact
|
|
2435
|
+
)
|
|
2436
|
+
model_parameters["artifact_uri"] = model_parameters.get(
|
|
2437
|
+
"artifact_uri", model_artifact
|
|
2438
|
+
)
|
|
2439
|
+
|
|
2440
|
+
if model_parameters.get("name", name) != name or (
|
|
2441
|
+
isinstance(model_class, Model) and model_class.name != name
|
|
2442
|
+
):
|
|
2443
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
2444
|
+
"Inconsistent name for the added model."
|
|
2445
|
+
)
|
|
2446
|
+
model_parameters["name"] = name
|
|
2447
|
+
|
|
2448
|
+
if name in self.shared_models and not override:
|
|
2449
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
2450
|
+
f"Model with name {name} already exists in this graph."
|
|
2451
|
+
)
|
|
2452
|
+
|
|
2453
|
+
model_class = (
|
|
2454
|
+
model_class
|
|
2455
|
+
if isinstance(model_class, str)
|
|
2456
|
+
else model_class.__class__.__name__
|
|
2457
|
+
)
|
|
2458
|
+
self.shared_models[name] = (model_class, model_parameters)
|
|
2459
|
+
self.shared_models_mechanism[name] = execution_mechanism
|
|
2460
|
+
|
|
2461
|
+
def get_shared_model_name_by_artifact_uri(self, artifact_uri: str) -> Optional[str]:
|
|
2462
|
+
"""
|
|
2463
|
+
Get a shared model by its artifact URI.
|
|
2464
|
+
:param artifact_uri: The artifact URI of the model.
|
|
2465
|
+
:return: A tuple of (model_class, model_parameters) if found, otherwise None.
|
|
2466
|
+
"""
|
|
2467
|
+
for model_name, (model_class, model_params) in self.shared_models.items():
|
|
2468
|
+
if model_params.get("artifact_uri") == artifact_uri:
|
|
2469
|
+
return model_name
|
|
2470
|
+
return None
|
|
2471
|
+
|
|
2472
|
+
def config_pool_resource(
|
|
2473
|
+
self,
|
|
2474
|
+
max_processes: Optional[int] = None,
|
|
2475
|
+
max_threads: Optional[int] = None,
|
|
2476
|
+
pool_factor: Optional[int] = None,
|
|
2477
|
+
) -> None:
|
|
2478
|
+
"""
|
|
2479
|
+
Configure the resource limits for the shared models in the graph.
|
|
2480
|
+
:param max_processes: Maximum number of processes to spawn (excluding dedicated processes).
|
|
2481
|
+
Defaults to the number of CPUs or 16 if undetectable.
|
|
2482
|
+
:param max_threads: Maximum number of threads to spawn. Defaults to 32.
|
|
2483
|
+
:param pool_factor: Multiplier to scale the number of process/thread workers per runnable. Defaults to 1.
|
|
2484
|
+
"""
|
|
2485
|
+
self.shared_max_processes = max_processes
|
|
2486
|
+
self.shared_max_threads = max_threads
|
|
2487
|
+
self.pool_factor = pool_factor
|
|
2488
|
+
|
|
2489
|
+
def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
|
|
2490
|
+
self.context = context
|
|
2491
|
+
if self.shared_models:
|
|
2492
|
+
self.context.executor = storey.flow.RunnableExecutor(
|
|
2493
|
+
max_processes=self.shared_max_processes,
|
|
2494
|
+
max_threads=self.shared_max_threads,
|
|
2495
|
+
pool_factor=self.pool_factor,
|
|
2496
|
+
)
|
|
2497
|
+
|
|
2498
|
+
for model, model_params in self.shared_models.values():
|
|
2499
|
+
model = get_class(model, namespace).from_dict(
|
|
2500
|
+
model_params, init_with_params=True
|
|
2501
|
+
)
|
|
2502
|
+
model._raise_exception = False
|
|
2503
|
+
self.context.executor.add_runnable(
|
|
2504
|
+
model, self._shared_models_mechanism[model.name]
|
|
2505
|
+
)
|
|
2506
|
+
super().init_object(context, namespace, mode, reset=reset, **extra_kwargs)
|
|
2159
2507
|
|
|
2160
2508
|
@property
|
|
2161
2509
|
def model_endpoints_names(self) -> list[str]:
|
|
@@ -2184,6 +2532,48 @@ class RootFlowStep(FlowStep):
|
|
|
2184
2532
|
def track_models(self, track_models: bool):
|
|
2185
2533
|
self._track_models = track_models
|
|
2186
2534
|
|
|
2535
|
+
@property
|
|
2536
|
+
def shared_models(self) -> dict[str, tuple[str, dict]]:
|
|
2537
|
+
return self._shared_models
|
|
2538
|
+
|
|
2539
|
+
@shared_models.setter
|
|
2540
|
+
def shared_models(self, shared_models: dict[str, tuple[str, dict]]):
|
|
2541
|
+
self._shared_models = shared_models
|
|
2542
|
+
|
|
2543
|
+
@property
|
|
2544
|
+
def shared_models_mechanism(self) -> dict[str, ParallelExecutionMechanisms]:
|
|
2545
|
+
return self._shared_models_mechanism
|
|
2546
|
+
|
|
2547
|
+
@shared_models_mechanism.setter
|
|
2548
|
+
def shared_models_mechanism(
|
|
2549
|
+
self, shared_models_mechanism: dict[str, ParallelExecutionMechanisms]
|
|
2550
|
+
):
|
|
2551
|
+
self._shared_models_mechanism = shared_models_mechanism
|
|
2552
|
+
|
|
2553
|
+
@property
|
|
2554
|
+
def shared_max_processes(self) -> Optional[int]:
|
|
2555
|
+
return self._shared_max_processes
|
|
2556
|
+
|
|
2557
|
+
@shared_max_processes.setter
|
|
2558
|
+
def shared_max_processes(self, max_processes: Optional[int]):
|
|
2559
|
+
self._shared_max_processes = max_processes
|
|
2560
|
+
|
|
2561
|
+
@property
|
|
2562
|
+
def shared_max_threads(self) -> Optional[int]:
|
|
2563
|
+
return self._shared_max_threads
|
|
2564
|
+
|
|
2565
|
+
@shared_max_threads.setter
|
|
2566
|
+
def shared_max_threads(self, max_threads: Optional[int]):
|
|
2567
|
+
self._shared_max_threads = max_threads
|
|
2568
|
+
|
|
2569
|
+
@property
|
|
2570
|
+
def pool_factor(self) -> Optional[int]:
|
|
2571
|
+
return self._pool_factor
|
|
2572
|
+
|
|
2573
|
+
@pool_factor.setter
|
|
2574
|
+
def pool_factor(self, pool_factor: Optional[int]):
|
|
2575
|
+
self._pool_factor = pool_factor
|
|
2576
|
+
|
|
2187
2577
|
def update_model_endpoints_routes_names(self, model_endpoints_names: list):
|
|
2188
2578
|
self._route_models.update(model_endpoints_names)
|
|
2189
2579
|
|
mlrun/serving/system_steps.py
CHANGED
|
@@ -150,12 +150,16 @@ class MonitoringPreProcessor(storey.MapClass):
|
|
|
150
150
|
def do(self, event):
|
|
151
151
|
monitoring_event_list = []
|
|
152
152
|
model_runner_name = event._metadata.get("model_runner_name", "")
|
|
153
|
-
step = self.server.graph.steps[model_runner_name] if self.server else
|
|
153
|
+
step = self.server.graph.steps[model_runner_name] if self.server else None
|
|
154
|
+
if not step or not hasattr(step, "monitoring_data"):
|
|
155
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
156
|
+
f"ModelRunnerStep name {model_runner_name} is not found in the graph or does not have monitoring data"
|
|
157
|
+
)
|
|
154
158
|
monitoring_data = step.monitoring_data
|
|
155
159
|
logger.debug(
|
|
156
160
|
"monitoring preprocessor started",
|
|
157
161
|
event=event,
|
|
158
|
-
|
|
162
|
+
monitoring_data=monitoring_data,
|
|
159
163
|
metadata=event._metadata,
|
|
160
164
|
)
|
|
161
165
|
if len(monitoring_data) > 1:
|
mlrun/utils/version/version.json
CHANGED