mlrun 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/llm_prompt.py +6 -0
- mlrun/artifacts/manager.py +0 -5
- mlrun/common/constants.py +1 -0
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/functions.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
- mlrun/common/schemas/workflow.py +2 -0
- mlrun/config.py +1 -1
- mlrun/datastore/model_provider/model_provider.py +42 -14
- mlrun/datastore/model_provider/openai_provider.py +96 -15
- mlrun/db/base.py +20 -0
- mlrun/db/httpdb.py +64 -9
- mlrun/db/nopdb.py +13 -0
- mlrun/launcher/local.py +13 -0
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/applications/base.py +176 -20
- mlrun/model_monitoring/db/_schedules.py +84 -24
- mlrun/model_monitoring/db/tsdb/base.py +72 -1
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
- mlrun/model_monitoring/helpers.py +26 -4
- mlrun/projects/pipelines.py +44 -24
- mlrun/projects/project.py +26 -7
- mlrun/runtimes/daskjob.py +6 -0
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/application/application.py +2 -0
- mlrun/runtimes/nuclio/function.py +6 -0
- mlrun/runtimes/nuclio/serving.py +12 -11
- mlrun/runtimes/pod.py +21 -0
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +0 -2
- mlrun/serving/server.py +122 -53
- mlrun/serving/states.py +128 -44
- mlrun/serving/system_steps.py +84 -58
- mlrun/utils/helpers.py +82 -12
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/METADATA +2 -7
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/RECORD +48 -48
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
__all__ = ["GraphServer", "create_graph_server", "GraphContext", "MockEvent"]
|
|
16
16
|
|
|
17
17
|
import asyncio
|
|
18
|
+
import base64
|
|
18
19
|
import copy
|
|
19
20
|
import json
|
|
20
21
|
import os
|
|
@@ -349,33 +350,33 @@ def add_error_raiser_step(
|
|
|
349
350
|
monitored_steps_raisers = {}
|
|
350
351
|
user_steps = list(graph.steps.values())
|
|
351
352
|
for monitored_step in monitored_steps.values():
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
353
|
+
error_step = graph.add_step(
|
|
354
|
+
class_name="mlrun.serving.states.ModelRunnerErrorRaiser",
|
|
355
|
+
name=f"{monitored_step.name}_error_raise",
|
|
356
|
+
after=monitored_step.name,
|
|
357
|
+
full_event=True,
|
|
358
|
+
raise_exception=monitored_step.raise_exception,
|
|
359
|
+
models_names=list(monitored_step.class_args["models"].keys()),
|
|
360
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
361
|
+
)
|
|
362
|
+
if monitored_step.responder:
|
|
363
|
+
monitored_step.responder = False
|
|
364
|
+
error_step.respond()
|
|
365
|
+
monitored_steps_raisers[monitored_step.name] = error_step.name
|
|
366
|
+
error_step.on_error = monitored_step.on_error
|
|
367
|
+
if monitored_steps_raisers:
|
|
368
|
+
for step in user_steps:
|
|
369
|
+
if step.after:
|
|
370
|
+
if isinstance(step.after, list):
|
|
371
|
+
for i in range(len(step.after)):
|
|
372
|
+
if step.after[i] in monitored_steps_raisers:
|
|
373
|
+
step.after[i] = monitored_steps_raisers[step.after[i]]
|
|
374
|
+
else:
|
|
375
|
+
if (
|
|
376
|
+
isinstance(step.after, str)
|
|
377
|
+
and step.after in monitored_steps_raisers
|
|
378
|
+
):
|
|
379
|
+
step.after = monitored_steps_raisers[step.after]
|
|
379
380
|
return graph
|
|
380
381
|
|
|
381
382
|
|
|
@@ -384,6 +385,7 @@ def add_monitoring_general_steps(
|
|
|
384
385
|
graph: RootFlowStep,
|
|
385
386
|
context,
|
|
386
387
|
serving_spec,
|
|
388
|
+
pause_until_background_task_completion: bool,
|
|
387
389
|
) -> tuple[RootFlowStep, FlowStep]:
|
|
388
390
|
"""
|
|
389
391
|
Adding the monitoring flow connection steps, this steps allow the graph to reconstruct the serving event enrich it
|
|
@@ -392,18 +394,22 @@ def add_monitoring_general_steps(
|
|
|
392
394
|
"background_task_status_step" --> "filter_none" --> "monitoring_pre_processor_step" --> "flatten_events"
|
|
393
395
|
--> "sampling_step" --> "filter_none_sampling" --> "model_monitoring_stream"
|
|
394
396
|
"""
|
|
397
|
+
background_task_status_step = None
|
|
398
|
+
if pause_until_background_task_completion:
|
|
399
|
+
background_task_status_step = graph.add_step(
|
|
400
|
+
"mlrun.serving.system_steps.BackgroundTaskStatus",
|
|
401
|
+
"background_task_status_step",
|
|
402
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
403
|
+
)
|
|
395
404
|
monitor_flow_step = graph.add_step(
|
|
396
|
-
"mlrun.serving.system_steps.BackgroundTaskStatus",
|
|
397
|
-
"background_task_status_step",
|
|
398
|
-
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
399
|
-
)
|
|
400
|
-
graph.add_step(
|
|
401
405
|
"storey.Filter",
|
|
402
406
|
"filter_none",
|
|
403
407
|
_fn="(event is not None)",
|
|
404
|
-
after="background_task_status_step",
|
|
408
|
+
after="background_task_status_step" if background_task_status_step else None,
|
|
405
409
|
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
406
410
|
)
|
|
411
|
+
if background_task_status_step:
|
|
412
|
+
monitor_flow_step = background_task_status_step
|
|
407
413
|
graph.add_step(
|
|
408
414
|
"mlrun.serving.system_steps.MonitoringPreProcessor",
|
|
409
415
|
"monitoring_pre_processor_step",
|
|
@@ -466,14 +472,28 @@ def add_monitoring_general_steps(
|
|
|
466
472
|
|
|
467
473
|
|
|
468
474
|
def add_system_steps_to_graph(
|
|
469
|
-
project: str,
|
|
475
|
+
project: str,
|
|
476
|
+
graph: RootFlowStep,
|
|
477
|
+
track_models: bool,
|
|
478
|
+
context,
|
|
479
|
+
serving_spec,
|
|
480
|
+
pause_until_background_task_completion: bool = True,
|
|
470
481
|
) -> RootFlowStep:
|
|
482
|
+
if not (isinstance(graph, RootFlowStep) and graph.include_monitored_step()):
|
|
483
|
+
return graph
|
|
471
484
|
monitored_steps = graph.get_monitored_steps()
|
|
472
485
|
graph = add_error_raiser_step(graph, monitored_steps)
|
|
473
486
|
if track_models:
|
|
487
|
+
background_task_status_step = None
|
|
474
488
|
graph, monitor_flow_step = add_monitoring_general_steps(
|
|
475
|
-
project,
|
|
489
|
+
project,
|
|
490
|
+
graph,
|
|
491
|
+
context,
|
|
492
|
+
serving_spec,
|
|
493
|
+
pause_until_background_task_completion,
|
|
476
494
|
)
|
|
495
|
+
if background_task_status_step:
|
|
496
|
+
monitor_flow_step = background_task_status_step
|
|
477
497
|
# Connect each model runner to the monitoring step:
|
|
478
498
|
for step_name, step in monitored_steps.items():
|
|
479
499
|
if monitor_flow_step.after:
|
|
@@ -485,6 +505,10 @@ def add_system_steps_to_graph(
|
|
|
485
505
|
monitor_flow_step.after = [
|
|
486
506
|
step_name,
|
|
487
507
|
]
|
|
508
|
+
context.logger.info_with(
|
|
509
|
+
"Server graph after adding system steps",
|
|
510
|
+
graph=str(graph.steps),
|
|
511
|
+
)
|
|
488
512
|
return graph
|
|
489
513
|
|
|
490
514
|
|
|
@@ -494,18 +518,13 @@ def v2_serving_init(context, namespace=None):
|
|
|
494
518
|
context.logger.info("Initializing server from spec")
|
|
495
519
|
spec = mlrun.utils.get_serving_spec()
|
|
496
520
|
server = GraphServer.from_dict(spec)
|
|
497
|
-
|
|
498
|
-
server.
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
)
|
|
505
|
-
context.logger.info_with(
|
|
506
|
-
"Server graph after adding system steps",
|
|
507
|
-
graph=str(server.graph.steps),
|
|
508
|
-
)
|
|
521
|
+
server.graph = add_system_steps_to_graph(
|
|
522
|
+
server.project,
|
|
523
|
+
copy.deepcopy(server.graph),
|
|
524
|
+
spec.get("track_models"),
|
|
525
|
+
context,
|
|
526
|
+
spec,
|
|
527
|
+
)
|
|
509
528
|
|
|
510
529
|
if config.log_level.lower() == "debug":
|
|
511
530
|
server.verbose = True
|
|
@@ -544,17 +563,57 @@ async def async_execute_graph(
|
|
|
544
563
|
data: DataItem,
|
|
545
564
|
batching: bool,
|
|
546
565
|
batch_size: Optional[int],
|
|
566
|
+
read_as_lists: bool,
|
|
567
|
+
nest_under_inputs: bool,
|
|
547
568
|
) -> list[Any]:
|
|
548
569
|
spec = mlrun.utils.get_serving_spec()
|
|
549
570
|
|
|
550
|
-
source_filename = spec.get("filename", None)
|
|
551
571
|
namespace = {}
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
572
|
+
code = os.getenv("MLRUN_EXEC_CODE")
|
|
573
|
+
if code:
|
|
574
|
+
code = base64.b64decode(code).decode("utf-8")
|
|
575
|
+
exec(code, namespace)
|
|
576
|
+
else:
|
|
577
|
+
# TODO: find another way to get the local file path, or ensure that MLRUN_EXEC_CODE
|
|
578
|
+
# gets set in local flow and not just in the remote pod
|
|
579
|
+
source_filename = spec.get("filename", None)
|
|
580
|
+
if source_filename:
|
|
581
|
+
with open(source_filename) as f:
|
|
582
|
+
exec(f.read(), namespace)
|
|
555
583
|
|
|
556
584
|
server = GraphServer.from_dict(spec)
|
|
557
585
|
|
|
586
|
+
if server.model_endpoint_creation_task_name:
|
|
587
|
+
context.logger.info(
|
|
588
|
+
f"Waiting for model endpoint creation task '{server.model_endpoint_creation_task_name}'..."
|
|
589
|
+
)
|
|
590
|
+
background_task = (
|
|
591
|
+
mlrun.get_run_db().wait_for_background_task_to_reach_terminal_state(
|
|
592
|
+
project=server.project,
|
|
593
|
+
name=server.model_endpoint_creation_task_name,
|
|
594
|
+
)
|
|
595
|
+
)
|
|
596
|
+
task_state = background_task.status.state
|
|
597
|
+
if task_state == mlrun.common.schemas.BackgroundTaskState.failed:
|
|
598
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
599
|
+
"Aborting job due to model endpoint creation background task failure"
|
|
600
|
+
)
|
|
601
|
+
elif task_state != mlrun.common.schemas.BackgroundTaskState.succeeded:
|
|
602
|
+
# this shouldn't happen, but we need to know if it does
|
|
603
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
604
|
+
"Aborting job because the model endpoint creation background task did not succeed "
|
|
605
|
+
f"(status='{task_state}')"
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
server.graph = add_system_steps_to_graph(
|
|
609
|
+
server.project,
|
|
610
|
+
copy.deepcopy(server.graph),
|
|
611
|
+
spec.get("track_models"),
|
|
612
|
+
context,
|
|
613
|
+
spec,
|
|
614
|
+
pause_until_background_task_completion=False, # we've already awaited it
|
|
615
|
+
)
|
|
616
|
+
|
|
558
617
|
if config.log_level.lower() == "debug":
|
|
559
618
|
server.verbose = True
|
|
560
619
|
context.logger.info_with("Initializing states", namespace=namespace)
|
|
@@ -588,7 +647,9 @@ async def async_execute_graph(
|
|
|
588
647
|
|
|
589
648
|
batch = []
|
|
590
649
|
for index, row in df.iterrows():
|
|
591
|
-
data = row.to_dict()
|
|
650
|
+
data = row.to_list() if read_as_lists else row.to_dict()
|
|
651
|
+
if nest_under_inputs:
|
|
652
|
+
data = {"inputs": data}
|
|
592
653
|
if batching:
|
|
593
654
|
batch.append(data)
|
|
594
655
|
if len(batch) == batch_size:
|
|
@@ -612,6 +673,8 @@ def execute_graph(
|
|
|
612
673
|
data: DataItem,
|
|
613
674
|
batching: bool = False,
|
|
614
675
|
batch_size: Optional[int] = None,
|
|
676
|
+
read_as_lists: bool = False,
|
|
677
|
+
nest_under_inputs: bool = False,
|
|
615
678
|
) -> (list[Any], Any):
|
|
616
679
|
"""
|
|
617
680
|
Execute graph as a job, from start to finish.
|
|
@@ -621,10 +684,16 @@ def execute_graph(
|
|
|
621
684
|
:param batching: Whether to push one or more batches into the graph rather than row by row.
|
|
622
685
|
:param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
|
|
623
686
|
be pushed into the graph in one batch.
|
|
687
|
+
:param read_as_lists: Whether to read each row as a list instead of a dictionary.
|
|
688
|
+
:param nest_under_inputs: Whether to wrap each row with {"inputs": ...}.
|
|
624
689
|
|
|
625
690
|
:return: A list of responses.
|
|
626
691
|
"""
|
|
627
|
-
return asyncio.run(
|
|
692
|
+
return asyncio.run(
|
|
693
|
+
async_execute_graph(
|
|
694
|
+
context, data, batching, batch_size, read_as_lists, nest_under_inputs
|
|
695
|
+
)
|
|
696
|
+
)
|
|
628
697
|
|
|
629
698
|
|
|
630
699
|
def _set_callbacks(server, context):
|
mlrun/serving/states.py
CHANGED
|
@@ -35,7 +35,7 @@ from storey import ParallelExecutionMechanisms
|
|
|
35
35
|
import mlrun
|
|
36
36
|
import mlrun.artifacts
|
|
37
37
|
import mlrun.common.schemas as schemas
|
|
38
|
-
from mlrun.artifacts.llm_prompt import LLMPromptArtifact
|
|
38
|
+
from mlrun.artifacts.llm_prompt import LLMPromptArtifact, PlaceholderDefaultDict
|
|
39
39
|
from mlrun.artifacts.model import ModelArtifact
|
|
40
40
|
from mlrun.datastore.datastore_profile import (
|
|
41
41
|
DatastoreProfileKafkaSource,
|
|
@@ -45,7 +45,7 @@ from mlrun.datastore.datastore_profile import (
|
|
|
45
45
|
)
|
|
46
46
|
from mlrun.datastore.model_provider.model_provider import ModelProvider
|
|
47
47
|
from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
|
|
48
|
-
from mlrun.utils import logger
|
|
48
|
+
from mlrun.utils import get_data_from_path, logger, split_path
|
|
49
49
|
|
|
50
50
|
from ..config import config
|
|
51
51
|
from ..datastore import get_stream_pusher
|
|
@@ -501,10 +501,15 @@ class BaseStep(ModelObj):
|
|
|
501
501
|
def verify_model_runner_step(
|
|
502
502
|
self,
|
|
503
503
|
step: "ModelRunnerStep",
|
|
504
|
+
step_model_endpoints_names: Optional[list[str]] = None,
|
|
505
|
+
verify_shared_models: bool = True,
|
|
504
506
|
):
|
|
505
507
|
"""
|
|
506
508
|
Verify ModelRunnerStep, can be part of Flow graph and models can not repeat in graph.
|
|
507
|
-
:param step:
|
|
509
|
+
:param step: ModelRunnerStep to verify
|
|
510
|
+
:param step_model_endpoints_names: List of model endpoints names that are in the step.
|
|
511
|
+
if provided will ignore step models and verify only the models on list.
|
|
512
|
+
:param verify_shared_models: If True, verify that shared models are defined in the graph.
|
|
508
513
|
"""
|
|
509
514
|
|
|
510
515
|
if not isinstance(step, ModelRunnerStep):
|
|
@@ -516,7 +521,7 @@ class BaseStep(ModelObj):
|
|
|
516
521
|
raise GraphError(
|
|
517
522
|
"ModelRunnerStep can be added to 'Flow' topology graph only"
|
|
518
523
|
)
|
|
519
|
-
step_model_endpoints_names = list(
|
|
524
|
+
step_model_endpoints_names = step_model_endpoints_names or list(
|
|
520
525
|
step.class_args.get(schemas.ModelRunnerStepData.MODELS, {}).keys()
|
|
521
526
|
)
|
|
522
527
|
# Get all model_endpoints names that are in both lists
|
|
@@ -530,8 +535,9 @@ class BaseStep(ModelObj):
|
|
|
530
535
|
f"The graph already contains the model endpoints named - {common_endpoints_names}."
|
|
531
536
|
)
|
|
532
537
|
|
|
533
|
-
|
|
534
|
-
|
|
538
|
+
if verify_shared_models:
|
|
539
|
+
# Check if shared models are defined in the graph
|
|
540
|
+
self._verify_shared_models(root, step, step_model_endpoints_names)
|
|
535
541
|
# Update model endpoints names in the root step
|
|
536
542
|
root.update_model_endpoints_names(step_model_endpoints_names)
|
|
537
543
|
|
|
@@ -569,7 +575,9 @@ class BaseStep(ModelObj):
|
|
|
569
575
|
llm_artifact, _ = mlrun.store_manager.get_store_artifact(
|
|
570
576
|
model_artifact_uri
|
|
571
577
|
)
|
|
572
|
-
model_artifact_uri =
|
|
578
|
+
model_artifact_uri = mlrun.utils.remove_tag_from_artifact_uri(
|
|
579
|
+
llm_artifact.spec.parent_uri
|
|
580
|
+
)
|
|
573
581
|
actual_shared_name = root.get_shared_model_name_by_artifact_uri(
|
|
574
582
|
model_artifact_uri
|
|
575
583
|
)
|
|
@@ -1148,11 +1156,11 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
|
|
|
1148
1156
|
def init(self):
|
|
1149
1157
|
self.load()
|
|
1150
1158
|
|
|
1151
|
-
def predict(self, body: Any) -> Any:
|
|
1159
|
+
def predict(self, body: Any, **kwargs) -> Any:
|
|
1152
1160
|
"""Override to implement prediction logic. If the logic requires asyncio, override predict_async() instead."""
|
|
1153
1161
|
return body
|
|
1154
1162
|
|
|
1155
|
-
async def predict_async(self, body: Any) -> Any:
|
|
1163
|
+
async def predict_async(self, body: Any, **kwargs) -> Any:
|
|
1156
1164
|
"""Override to implement prediction logic if the logic requires asyncio."""
|
|
1157
1165
|
return body
|
|
1158
1166
|
|
|
@@ -1197,17 +1205,44 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
|
|
|
1197
1205
|
|
|
1198
1206
|
|
|
1199
1207
|
class LLModel(Model):
|
|
1200
|
-
def __init__(
|
|
1208
|
+
def __init__(
|
|
1209
|
+
self, name: str, input_path: Optional[Union[str, list[str]]], **kwargs
|
|
1210
|
+
):
|
|
1201
1211
|
super().__init__(name, **kwargs)
|
|
1212
|
+
self._input_path = split_path(input_path)
|
|
1202
1213
|
|
|
1203
1214
|
def predict(
|
|
1204
|
-
self,
|
|
1215
|
+
self,
|
|
1216
|
+
body: Any,
|
|
1217
|
+
messages: Optional[list[dict]] = None,
|
|
1218
|
+
model_configuration: Optional[dict] = None,
|
|
1219
|
+
**kwargs,
|
|
1205
1220
|
) -> Any:
|
|
1221
|
+
if isinstance(
|
|
1222
|
+
self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
|
|
1223
|
+
) and isinstance(self.model_provider, ModelProvider):
|
|
1224
|
+
body["result"] = self.model_provider.invoke(
|
|
1225
|
+
messages=messages,
|
|
1226
|
+
as_str=True,
|
|
1227
|
+
**(model_configuration or {}),
|
|
1228
|
+
)
|
|
1206
1229
|
return body
|
|
1207
1230
|
|
|
1208
1231
|
async def predict_async(
|
|
1209
|
-
self,
|
|
1232
|
+
self,
|
|
1233
|
+
body: Any,
|
|
1234
|
+
messages: Optional[list[dict]] = None,
|
|
1235
|
+
model_configuration: Optional[dict] = None,
|
|
1236
|
+
**kwargs,
|
|
1210
1237
|
) -> Any:
|
|
1238
|
+
if isinstance(
|
|
1239
|
+
self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
|
|
1240
|
+
) and isinstance(self.model_provider, ModelProvider):
|
|
1241
|
+
body["result"] = await self.model_provider.async_invoke(
|
|
1242
|
+
messages=messages,
|
|
1243
|
+
as_str=True,
|
|
1244
|
+
**(model_configuration or {}),
|
|
1245
|
+
)
|
|
1211
1246
|
return body
|
|
1212
1247
|
|
|
1213
1248
|
def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
|
|
@@ -1246,12 +1281,34 @@ class LLModel(Model):
|
|
|
1246
1281
|
return None, None
|
|
1247
1282
|
prompt_legend = llm_prompt_artifact.spec.prompt_legend
|
|
1248
1283
|
prompt_template = deepcopy(llm_prompt_artifact.read_prompt())
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1284
|
+
input_data = copy(get_data_from_path(self._input_path, body))
|
|
1285
|
+
if isinstance(input_data, dict):
|
|
1286
|
+
kwargs = (
|
|
1287
|
+
{
|
|
1288
|
+
place_holder: input_data.get(body_map["field"])
|
|
1289
|
+
for place_holder, body_map in prompt_legend.items()
|
|
1290
|
+
}
|
|
1291
|
+
if prompt_legend
|
|
1292
|
+
else {}
|
|
1293
|
+
)
|
|
1294
|
+
input_data.update(kwargs)
|
|
1295
|
+
default_place_holders = PlaceholderDefaultDict(lambda: None, input_data)
|
|
1296
|
+
for message in prompt_template:
|
|
1297
|
+
try:
|
|
1298
|
+
message["content"] = message["content"].format(**input_data)
|
|
1299
|
+
except KeyError as e:
|
|
1300
|
+
logger.warning(
|
|
1301
|
+
"Input data was missing a placeholder, placeholder stay unformatted",
|
|
1302
|
+
key_error=e,
|
|
1303
|
+
)
|
|
1304
|
+
message["content"] = message["content"].format_map(
|
|
1305
|
+
default_place_holders
|
|
1306
|
+
)
|
|
1307
|
+
else:
|
|
1308
|
+
logger.warning(
|
|
1309
|
+
f"Expected input data to be a dict, but received input data from type {type(input_data)} prompt "
|
|
1310
|
+
f"template stay unformatted",
|
|
1311
|
+
)
|
|
1255
1312
|
return prompt_template, llm_prompt_artifact.spec.model_configuration
|
|
1256
1313
|
|
|
1257
1314
|
|
|
@@ -1551,11 +1608,27 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1551
1608
|
:param outputs: list of the model outputs (e.g. labels) ,if provided will override the outputs
|
|
1552
1609
|
that been configured in the model artifact, please note that those outputs need to
|
|
1553
1610
|
be equal to the model_class predict method outputs (length, and order)
|
|
1554
|
-
:param input_path:
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1611
|
+
:param input_path: when specified selects the key/path in the event to use as model monitoring inputs
|
|
1612
|
+
this require that the event body will behave like a dict, expects scopes to be
|
|
1613
|
+
defined by dot notation (e.g "data.d").
|
|
1614
|
+
examples: input_path="data.b"
|
|
1615
|
+
event: {"data":{"a": 5, "b": 7}}, means monitored body will be 7.
|
|
1616
|
+
event: {"data":{"a": [5, 9], "b": [7, 8]}} means monitored body will be [7,8].
|
|
1617
|
+
event: {"data":{"a": "extra_data", "b": {"f0": [1, 2]}}} means monitored body will
|
|
1618
|
+
be {"f0": [1, 2]}.
|
|
1619
|
+
if a ``list`` or ``list of lists`` is provided, it must follow the order and
|
|
1620
|
+
size defined by the input schema.
|
|
1621
|
+
:param result_path: when specified selects the key/path in the output event to use as model monitoring
|
|
1622
|
+
outputs this require that the output event body will behave like a dict,
|
|
1623
|
+
expects scopes to be defined by dot notation (e.g "data.d").
|
|
1624
|
+
examples: result_path="out.b"
|
|
1625
|
+
event: {"out":{"a": 5, "b": 7}}, means monitored body will be 7.
|
|
1626
|
+
event: {"out":{"a": [5, 9], "b": [7, 8]}} means monitored body will be [7,8]
|
|
1627
|
+
event: {"out":{"a": "extra_data", "b": {"f0": [1, 2]}}} means monitored body will
|
|
1628
|
+
be {"f0": [1, 2]}
|
|
1629
|
+
if a ``list`` or ``list of lists`` is provided, it must follow the order and
|
|
1630
|
+
size defined by the output schema.
|
|
1631
|
+
|
|
1559
1632
|
:param override: bool allow override existing model on the current ModelRunnerStep.
|
|
1560
1633
|
:param model_parameters: Parameters for model instantiation
|
|
1561
1634
|
"""
|
|
@@ -1574,7 +1647,7 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1574
1647
|
):
|
|
1575
1648
|
try:
|
|
1576
1649
|
model_artifact, _ = mlrun.store_manager.get_store_artifact(
|
|
1577
|
-
model_artifact
|
|
1650
|
+
mlrun.utils.remove_tag_from_artifact_uri(model_artifact)
|
|
1578
1651
|
)
|
|
1579
1652
|
except mlrun.errors.MLRunNotFoundError:
|
|
1580
1653
|
raise mlrun.errors.MLRunInvalidArgumentError("Artifact not found.")
|
|
@@ -1586,6 +1659,11 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1586
1659
|
if isinstance(model_artifact, mlrun.artifacts.Artifact)
|
|
1587
1660
|
else model_artifact
|
|
1588
1661
|
)
|
|
1662
|
+
model_artifact = (
|
|
1663
|
+
mlrun.utils.remove_tag_from_artifact_uri(model_artifact)
|
|
1664
|
+
if model_artifact
|
|
1665
|
+
else None
|
|
1666
|
+
)
|
|
1589
1667
|
model_parameters["artifact_uri"] = model_parameters.get(
|
|
1590
1668
|
"artifact_uri", model_artifact
|
|
1591
1669
|
)
|
|
@@ -1601,6 +1679,11 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1601
1679
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1602
1680
|
f"Model with name {endpoint_name} already exists in this ModelRunnerStep."
|
|
1603
1681
|
)
|
|
1682
|
+
root = self._extract_root_step()
|
|
1683
|
+
if isinstance(root, RootFlowStep):
|
|
1684
|
+
self.verify_model_runner_step(
|
|
1685
|
+
self, [endpoint_name], verify_shared_models=False
|
|
1686
|
+
)
|
|
1604
1687
|
ParallelExecutionMechanisms.validate(execution_mechanism)
|
|
1605
1688
|
self.class_args[schemas.ModelRunnerStepData.MODEL_TO_EXECUTION_MECHANISM] = (
|
|
1606
1689
|
self.class_args.get(
|
|
@@ -1677,15 +1760,6 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1677
1760
|
)
|
|
1678
1761
|
return output_schema
|
|
1679
1762
|
|
|
1680
|
-
@staticmethod
|
|
1681
|
-
def _split_path(path: str) -> Union[str, list[str], None]:
|
|
1682
|
-
if path is not None:
|
|
1683
|
-
parsed_path = path.split(".")
|
|
1684
|
-
if len(parsed_path) == 1:
|
|
1685
|
-
parsed_path = parsed_path[0]
|
|
1686
|
-
return parsed_path
|
|
1687
|
-
return path
|
|
1688
|
-
|
|
1689
1763
|
def _calculate_monitoring_data(self) -> dict[str, dict[str, str]]:
|
|
1690
1764
|
monitoring_data = deepcopy(
|
|
1691
1765
|
self.class_args.get(
|
|
@@ -1710,15 +1784,11 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1710
1784
|
][model][schemas.MonitoringData.OUTPUTS] = monitoring_data[model][
|
|
1711
1785
|
schemas.MonitoringData.OUTPUTS
|
|
1712
1786
|
]
|
|
1713
|
-
monitoring_data[model][schemas.MonitoringData.INPUT_PATH] = (
|
|
1714
|
-
|
|
1715
|
-
monitoring_data[model][schemas.MonitoringData.INPUT_PATH]
|
|
1716
|
-
)
|
|
1787
|
+
monitoring_data[model][schemas.MonitoringData.INPUT_PATH] = split_path(
|
|
1788
|
+
monitoring_data[model][schemas.MonitoringData.INPUT_PATH]
|
|
1717
1789
|
)
|
|
1718
|
-
monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = (
|
|
1719
|
-
|
|
1720
|
-
monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
|
|
1721
|
-
)
|
|
1790
|
+
monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = split_path(
|
|
1791
|
+
monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
|
|
1722
1792
|
)
|
|
1723
1793
|
return monitoring_data
|
|
1724
1794
|
|
|
@@ -1736,6 +1806,13 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1736
1806
|
model_selector = get_class(model_selector, namespace)()
|
|
1737
1807
|
model_objects = []
|
|
1738
1808
|
for model, model_params in models.values():
|
|
1809
|
+
model_params[schemas.MonitoringData.INPUT_PATH] = (
|
|
1810
|
+
self.class_args.get(
|
|
1811
|
+
mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA, {}
|
|
1812
|
+
)
|
|
1813
|
+
.get(model_params.get("name"), {})
|
|
1814
|
+
.get(schemas.MonitoringData.INPUT_PATH)
|
|
1815
|
+
)
|
|
1739
1816
|
model = get_class(model, namespace).from_dict(
|
|
1740
1817
|
model_params, init_with_params=True
|
|
1741
1818
|
)
|
|
@@ -2385,7 +2462,13 @@ class FlowStep(BaseStep):
|
|
|
2385
2462
|
if not step.before and not any(
|
|
2386
2463
|
[step.name in other_step.after for other_step in self._steps.values()]
|
|
2387
2464
|
):
|
|
2388
|
-
|
|
2465
|
+
if any(
|
|
2466
|
+
[
|
|
2467
|
+
getattr(step_in_graph, "responder", False)
|
|
2468
|
+
for step_in_graph in self._steps.values()
|
|
2469
|
+
]
|
|
2470
|
+
):
|
|
2471
|
+
step.responder = True
|
|
2389
2472
|
return
|
|
2390
2473
|
|
|
2391
2474
|
for step_name in step.before:
|
|
@@ -2468,7 +2551,7 @@ class RootFlowStep(FlowStep):
|
|
|
2468
2551
|
name: str,
|
|
2469
2552
|
model_class: Union[str, Model],
|
|
2470
2553
|
execution_mechanism: Union[str, ParallelExecutionMechanisms],
|
|
2471
|
-
model_artifact:
|
|
2554
|
+
model_artifact: Union[str, ModelArtifact],
|
|
2472
2555
|
override: bool = False,
|
|
2473
2556
|
**model_parameters,
|
|
2474
2557
|
) -> None:
|
|
@@ -2520,6 +2603,7 @@ class RootFlowStep(FlowStep):
|
|
|
2520
2603
|
if isinstance(model_artifact, mlrun.artifacts.Artifact)
|
|
2521
2604
|
else model_artifact
|
|
2522
2605
|
)
|
|
2606
|
+
model_artifact = mlrun.utils.remove_tag_from_artifact_uri(model_artifact)
|
|
2523
2607
|
model_parameters["artifact_uri"] = model_parameters.get(
|
|
2524
2608
|
"artifact_uri", model_artifact
|
|
2525
2609
|
)
|
|
@@ -2907,7 +2991,7 @@ def params_to_step(
|
|
|
2907
2991
|
step = QueueStep(name, **class_args)
|
|
2908
2992
|
|
|
2909
2993
|
elif class_name and hasattr(class_name, "to_dict"):
|
|
2910
|
-
struct = class_name.to_dict()
|
|
2994
|
+
struct = deepcopy(class_name.to_dict())
|
|
2911
2995
|
kind = struct.get("kind", StepKinds.task)
|
|
2912
2996
|
name = (
|
|
2913
2997
|
name
|