mlrun 1.10.0rc15__py3-none-any.whl → 1.10.0rc17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/llm_prompt.py +6 -0
- mlrun/common/constants.py +3 -0
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +19 -0
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -0
- mlrun/config.py +1 -5
- mlrun/db/base.py +7 -0
- mlrun/db/httpdb.py +26 -0
- mlrun/db/nopdb.py +5 -0
- mlrun/launcher/local.py +13 -0
- mlrun/model_monitoring/controller.py +175 -121
- mlrun/model_monitoring/stream_processing.py +29 -2
- mlrun/projects/pipelines.py +44 -24
- mlrun/projects/project.py +7 -3
- mlrun/runtimes/utils.py +0 -2
- mlrun/serving/server.py +125 -38
- mlrun/serving/states.py +119 -62
- mlrun/serving/system_steps.py +100 -64
- mlrun/utils/helpers.py +46 -0
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc15.dist-info → mlrun-1.10.0rc17.dist-info}/METADATA +1 -1
- {mlrun-1.10.0rc15.dist-info → mlrun-1.10.0rc17.dist-info}/RECORD +28 -28
- {mlrun-1.10.0rc15.dist-info → mlrun-1.10.0rc17.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc15.dist-info → mlrun-1.10.0rc17.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc15.dist-info → mlrun-1.10.0rc17.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc15.dist-info → mlrun-1.10.0rc17.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py
CHANGED
|
@@ -3901,6 +3901,7 @@ class MlrunProject(ModelObj):
|
|
|
3901
3901
|
start: Optional[datetime.datetime] = None,
|
|
3902
3902
|
end: Optional[datetime.datetime] = None,
|
|
3903
3903
|
top_level: bool = False,
|
|
3904
|
+
mode: Optional[mlrun.common.schemas.EndpointMode] = None,
|
|
3904
3905
|
uids: Optional[list[str]] = None,
|
|
3905
3906
|
latest_only: bool = False,
|
|
3906
3907
|
tsdb_metrics: bool = False,
|
|
@@ -3916,8 +3917,9 @@ class MlrunProject(ModelObj):
|
|
|
3916
3917
|
5) function_tag
|
|
3917
3918
|
6) labels
|
|
3918
3919
|
7) top level
|
|
3919
|
-
8)
|
|
3920
|
-
9)
|
|
3920
|
+
8) mode
|
|
3921
|
+
9) uids
|
|
3922
|
+
10) start and end time, corresponding to the `created` field.
|
|
3921
3923
|
By default, when no filters are applied, all available endpoints for the given project will be listed.
|
|
3922
3924
|
|
|
3923
3925
|
In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based
|
|
@@ -3937,6 +3939,8 @@ class MlrunProject(ModelObj):
|
|
|
3937
3939
|
:param start: The start time to filter by.Corresponding to the `created` field.
|
|
3938
3940
|
:param end: The end time to filter by. Corresponding to the `created` field.
|
|
3939
3941
|
:param top_level: If true will return only routers and endpoint that are NOT children of any router.
|
|
3942
|
+
:param mode: Specifies the mode of the model endpoint. Can be "real-time", "batch", or both if set
|
|
3943
|
+
to None.
|
|
3940
3944
|
:param uids: If passed will return a list `ModelEndpoint` object with uid in uids.
|
|
3941
3945
|
:param tsdb_metrics: When True, the time series metrics will be added to the output
|
|
3942
3946
|
of the resulting.
|
|
@@ -3958,6 +3962,7 @@ class MlrunProject(ModelObj):
|
|
|
3958
3962
|
start=start,
|
|
3959
3963
|
end=end,
|
|
3960
3964
|
top_level=top_level,
|
|
3965
|
+
mode=mode,
|
|
3961
3966
|
uids=uids,
|
|
3962
3967
|
latest_only=latest_only,
|
|
3963
3968
|
tsdb_metrics=tsdb_metrics,
|
|
@@ -5073,7 +5078,6 @@ class MlrunProject(ModelObj):
|
|
|
5073
5078
|
:param states: List only runs whose state is one of the provided states.
|
|
5074
5079
|
:param sort: Whether to sort the result according to their start time. Otherwise, results will be
|
|
5075
5080
|
returned by their internal order in the DB (order will not be guaranteed).
|
|
5076
|
-
:param last: Deprecated - currently not used (will be removed in 1.10.0).
|
|
5077
5081
|
:param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
|
|
5078
5082
|
:param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
|
|
5079
5083
|
:param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -445,8 +445,6 @@ def enrich_run_labels(
|
|
|
445
445
|
labels_enrichment = {
|
|
446
446
|
mlrun_constants.MLRunInternalLabels.owner: os.environ.get("V3IO_USERNAME")
|
|
447
447
|
or getpass.getuser(),
|
|
448
|
-
# TODO: remove this in 1.10.0
|
|
449
|
-
mlrun_constants.MLRunInternalLabels.v3io_user: os.environ.get("V3IO_USERNAME"),
|
|
450
448
|
}
|
|
451
449
|
|
|
452
450
|
# Resolve which label keys to enrich
|
mlrun/serving/server.py
CHANGED
|
@@ -22,8 +22,10 @@ import os
|
|
|
22
22
|
import socket
|
|
23
23
|
import traceback
|
|
24
24
|
import uuid
|
|
25
|
+
from datetime import datetime, timezone
|
|
25
26
|
from typing import Any, Optional, Union
|
|
26
27
|
|
|
28
|
+
import pandas as pd
|
|
27
29
|
import storey
|
|
28
30
|
from nuclio import Context as NuclioContext
|
|
29
31
|
from nuclio.request import Logger as NuclioLogger
|
|
@@ -40,6 +42,7 @@ from mlrun.secrets import SecretsStore
|
|
|
40
42
|
|
|
41
43
|
from ..common.helpers import parse_versioned_object_uri
|
|
42
44
|
from ..common.schemas.model_monitoring.constants import FileTargetKind
|
|
45
|
+
from ..common.schemas.serving import MAX_BATCH_JOB_DURATION
|
|
43
46
|
from ..datastore import DataItem, get_stream_pusher
|
|
44
47
|
from ..datastore.store_resources import ResourceCache
|
|
45
48
|
from ..errors import MLRunInvalidArgumentError
|
|
@@ -350,33 +353,33 @@ def add_error_raiser_step(
|
|
|
350
353
|
monitored_steps_raisers = {}
|
|
351
354
|
user_steps = list(graph.steps.values())
|
|
352
355
|
for monitored_step in monitored_steps.values():
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
356
|
+
error_step = graph.add_step(
|
|
357
|
+
class_name="mlrun.serving.states.ModelRunnerErrorRaiser",
|
|
358
|
+
name=f"{monitored_step.name}_error_raise",
|
|
359
|
+
after=monitored_step.name,
|
|
360
|
+
full_event=True,
|
|
361
|
+
raise_exception=monitored_step.raise_exception,
|
|
362
|
+
models_names=list(monitored_step.class_args["models"].keys()),
|
|
363
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
364
|
+
)
|
|
365
|
+
if monitored_step.responder:
|
|
366
|
+
monitored_step.responder = False
|
|
367
|
+
error_step.respond()
|
|
368
|
+
monitored_steps_raisers[monitored_step.name] = error_step.name
|
|
369
|
+
error_step.on_error = monitored_step.on_error
|
|
370
|
+
if monitored_steps_raisers:
|
|
371
|
+
for step in user_steps:
|
|
372
|
+
if step.after:
|
|
373
|
+
if isinstance(step.after, list):
|
|
374
|
+
for i in range(len(step.after)):
|
|
375
|
+
if step.after[i] in monitored_steps_raisers:
|
|
376
|
+
step.after[i] = monitored_steps_raisers[step.after[i]]
|
|
377
|
+
else:
|
|
378
|
+
if (
|
|
379
|
+
isinstance(step.after, str)
|
|
380
|
+
and step.after in monitored_steps_raisers
|
|
381
|
+
):
|
|
382
|
+
step.after = monitored_steps_raisers[step.after]
|
|
380
383
|
return graph
|
|
381
384
|
|
|
382
385
|
|
|
@@ -561,6 +564,7 @@ def v2_serving_init(context, namespace=None):
|
|
|
561
564
|
async def async_execute_graph(
|
|
562
565
|
context: MLClientCtx,
|
|
563
566
|
data: DataItem,
|
|
567
|
+
timestamp_column: Optional[str],
|
|
564
568
|
batching: bool,
|
|
565
569
|
batch_size: Optional[int],
|
|
566
570
|
read_as_lists: bool,
|
|
@@ -605,10 +609,43 @@ async def async_execute_graph(
|
|
|
605
609
|
f"(status='{task_state}')"
|
|
606
610
|
)
|
|
607
611
|
|
|
612
|
+
df = data.as_df()
|
|
613
|
+
|
|
614
|
+
if df.empty:
|
|
615
|
+
context.logger.warn("Job terminated due to empty inputs (0 rows)")
|
|
616
|
+
return []
|
|
617
|
+
|
|
618
|
+
track_models = spec.get("track_models")
|
|
619
|
+
|
|
620
|
+
if track_models and timestamp_column:
|
|
621
|
+
context.logger.info(f"Sorting dataframe by {timestamp_column}")
|
|
622
|
+
df[timestamp_column] = pd.to_datetime( # in case it's a string
|
|
623
|
+
df[timestamp_column]
|
|
624
|
+
)
|
|
625
|
+
df.sort_values(by=timestamp_column, inplace=True)
|
|
626
|
+
if len(df) > 1:
|
|
627
|
+
start_time = df[timestamp_column].iloc[0]
|
|
628
|
+
end_time = df[timestamp_column].iloc[-1]
|
|
629
|
+
time_range = end_time - start_time
|
|
630
|
+
start_time = start_time.isoformat()
|
|
631
|
+
end_time = end_time.isoformat()
|
|
632
|
+
# TODO: tie this to the controller's base period
|
|
633
|
+
if time_range > pd.Timedelta(MAX_BATCH_JOB_DURATION):
|
|
634
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
635
|
+
f"Dataframe time range is too long: {time_range}. "
|
|
636
|
+
"Please disable tracking or reduce the input dataset's time range below the defined limit "
|
|
637
|
+
f"of {MAX_BATCH_JOB_DURATION}."
|
|
638
|
+
)
|
|
639
|
+
else:
|
|
640
|
+
start_time = end_time = df["timestamp"].iloc[0].isoformat()
|
|
641
|
+
else:
|
|
642
|
+
# end time will be set from clock time when the batch completes
|
|
643
|
+
start_time = datetime.now(tz=timezone.utc).isoformat()
|
|
644
|
+
|
|
608
645
|
server.graph = add_system_steps_to_graph(
|
|
609
646
|
server.project,
|
|
610
647
|
copy.deepcopy(server.graph),
|
|
611
|
-
|
|
648
|
+
track_models,
|
|
612
649
|
context,
|
|
613
650
|
spec,
|
|
614
651
|
pause_until_background_task_completion=False, # we've already awaited it
|
|
@@ -633,19 +670,28 @@ async def async_execute_graph(
|
|
|
633
670
|
if server.verbose:
|
|
634
671
|
context.logger.info(server.to_yaml())
|
|
635
672
|
|
|
636
|
-
df = data.as_df()
|
|
637
|
-
|
|
638
|
-
responses = []
|
|
639
|
-
|
|
640
673
|
async def run(body):
|
|
641
674
|
event = storey.Event(id=index, body=body)
|
|
642
|
-
|
|
643
|
-
|
|
675
|
+
if timestamp_column:
|
|
676
|
+
if batching:
|
|
677
|
+
# we use the first row in the batch to determine the timestamp for the whole batch
|
|
678
|
+
body = body[0]
|
|
679
|
+
if not isinstance(body, dict):
|
|
680
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
681
|
+
f"When timestamp_column=True, event body must be a dict – got {type(body).__name__} instead"
|
|
682
|
+
)
|
|
683
|
+
if timestamp_column not in body:
|
|
684
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
685
|
+
f"Event body '{body}' did not contain timestamp column '{timestamp_column}'"
|
|
686
|
+
)
|
|
687
|
+
event._original_timestamp = body[timestamp_column]
|
|
688
|
+
return await server.run(event, context)
|
|
644
689
|
|
|
645
690
|
if batching and not batch_size:
|
|
646
691
|
batch_size = len(df)
|
|
647
692
|
|
|
648
693
|
batch = []
|
|
694
|
+
tasks = []
|
|
649
695
|
for index, row in df.iterrows():
|
|
650
696
|
data = row.to_list() if read_as_lists else row.to_dict()
|
|
651
697
|
if nest_under_inputs:
|
|
@@ -653,24 +699,56 @@ async def async_execute_graph(
|
|
|
653
699
|
if batching:
|
|
654
700
|
batch.append(data)
|
|
655
701
|
if len(batch) == batch_size:
|
|
656
|
-
|
|
702
|
+
tasks.append(asyncio.create_task(run(batch)))
|
|
657
703
|
batch = []
|
|
658
704
|
else:
|
|
659
|
-
|
|
705
|
+
tasks.append(asyncio.create_task(run(data)))
|
|
660
706
|
|
|
661
707
|
if batch:
|
|
662
|
-
|
|
708
|
+
tasks.append(asyncio.create_task(run(batch)))
|
|
709
|
+
|
|
710
|
+
responses = await asyncio.gather(*tasks)
|
|
663
711
|
|
|
664
712
|
termination_result = server.wait_for_completion()
|
|
665
713
|
if asyncio.iscoroutine(termination_result):
|
|
666
714
|
await termination_result
|
|
667
715
|
|
|
716
|
+
model_endpoint_uids = spec.get("model_endpoint_uids", [])
|
|
717
|
+
|
|
718
|
+
# needed for output_stream to be created
|
|
719
|
+
server = GraphServer.from_dict(spec)
|
|
720
|
+
server.init_states(None, namespace)
|
|
721
|
+
|
|
722
|
+
batch_completion_time = datetime.now(tz=timezone.utc).isoformat()
|
|
723
|
+
|
|
724
|
+
if not timestamp_column:
|
|
725
|
+
end_time = batch_completion_time
|
|
726
|
+
|
|
727
|
+
mm_stream_record = dict(
|
|
728
|
+
kind="batch_complete",
|
|
729
|
+
project=context.project,
|
|
730
|
+
first_timestamp=start_time,
|
|
731
|
+
last_timestamp=end_time,
|
|
732
|
+
batch_completion_time=batch_completion_time,
|
|
733
|
+
)
|
|
734
|
+
output_stream = server.context.stream.output_stream
|
|
735
|
+
for mep_uid in spec.get("model_endpoint_uids", []):
|
|
736
|
+
mm_stream_record["endpoint_id"] = mep_uid
|
|
737
|
+
output_stream.push(mm_stream_record, partition_key=mep_uid)
|
|
738
|
+
|
|
739
|
+
context.logger.info(
|
|
740
|
+
f"Job completed processing {len(df)} rows",
|
|
741
|
+
timestamp_column=timestamp_column,
|
|
742
|
+
model_endpoint_uids=model_endpoint_uids,
|
|
743
|
+
)
|
|
744
|
+
|
|
668
745
|
return responses
|
|
669
746
|
|
|
670
747
|
|
|
671
748
|
def execute_graph(
|
|
672
749
|
context: MLClientCtx,
|
|
673
750
|
data: DataItem,
|
|
751
|
+
timestamp_column: Optional[str] = None,
|
|
674
752
|
batching: bool = False,
|
|
675
753
|
batch_size: Optional[int] = None,
|
|
676
754
|
read_as_lists: bool = False,
|
|
@@ -681,6 +759,9 @@ def execute_graph(
|
|
|
681
759
|
|
|
682
760
|
:param context: The job's execution client context.
|
|
683
761
|
:param data: The input data to the job, to be pushed into the graph row by row, or in batches.
|
|
762
|
+
:param timestamp_column: The name of the column that will be used as the timestamp for model monitoring purposes.
|
|
763
|
+
when timestamp_column is used in conjunction with batching, the first timestamp will be used for the entire
|
|
764
|
+
batch.
|
|
684
765
|
:param batching: Whether to push one or more batches into the graph rather than row by row.
|
|
685
766
|
:param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
|
|
686
767
|
be pushed into the graph in one batch.
|
|
@@ -691,7 +772,13 @@ def execute_graph(
|
|
|
691
772
|
"""
|
|
692
773
|
return asyncio.run(
|
|
693
774
|
async_execute_graph(
|
|
694
|
-
context,
|
|
775
|
+
context,
|
|
776
|
+
data,
|
|
777
|
+
timestamp_column,
|
|
778
|
+
batching,
|
|
779
|
+
batch_size,
|
|
780
|
+
read_as_lists,
|
|
781
|
+
nest_under_inputs,
|
|
695
782
|
)
|
|
696
783
|
)
|
|
697
784
|
|
mlrun/serving/states.py
CHANGED
|
@@ -35,7 +35,7 @@ from storey import ParallelExecutionMechanisms
|
|
|
35
35
|
import mlrun
|
|
36
36
|
import mlrun.artifacts
|
|
37
37
|
import mlrun.common.schemas as schemas
|
|
38
|
-
from mlrun.artifacts.llm_prompt import LLMPromptArtifact
|
|
38
|
+
from mlrun.artifacts.llm_prompt import LLMPromptArtifact, PlaceholderDefaultDict
|
|
39
39
|
from mlrun.artifacts.model import ModelArtifact
|
|
40
40
|
from mlrun.datastore.datastore_profile import (
|
|
41
41
|
DatastoreProfileKafkaSource,
|
|
@@ -45,7 +45,7 @@ from mlrun.datastore.datastore_profile import (
|
|
|
45
45
|
)
|
|
46
46
|
from mlrun.datastore.model_provider.model_provider import ModelProvider
|
|
47
47
|
from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
|
|
48
|
-
from mlrun.utils import logger
|
|
48
|
+
from mlrun.utils import get_data_from_path, logger, split_path
|
|
49
49
|
|
|
50
50
|
from ..config import config
|
|
51
51
|
from ..datastore import get_stream_pusher
|
|
@@ -501,10 +501,15 @@ class BaseStep(ModelObj):
|
|
|
501
501
|
def verify_model_runner_step(
|
|
502
502
|
self,
|
|
503
503
|
step: "ModelRunnerStep",
|
|
504
|
+
step_model_endpoints_names: Optional[list[str]] = None,
|
|
505
|
+
verify_shared_models: bool = True,
|
|
504
506
|
):
|
|
505
507
|
"""
|
|
506
508
|
Verify ModelRunnerStep, can be part of Flow graph and models can not repeat in graph.
|
|
507
|
-
:param step:
|
|
509
|
+
:param step: ModelRunnerStep to verify
|
|
510
|
+
:param step_model_endpoints_names: List of model endpoints names that are in the step.
|
|
511
|
+
if provided will ignore step models and verify only the models on list.
|
|
512
|
+
:param verify_shared_models: If True, verify that shared models are defined in the graph.
|
|
508
513
|
"""
|
|
509
514
|
|
|
510
515
|
if not isinstance(step, ModelRunnerStep):
|
|
@@ -516,7 +521,7 @@ class BaseStep(ModelObj):
|
|
|
516
521
|
raise GraphError(
|
|
517
522
|
"ModelRunnerStep can be added to 'Flow' topology graph only"
|
|
518
523
|
)
|
|
519
|
-
step_model_endpoints_names = list(
|
|
524
|
+
step_model_endpoints_names = step_model_endpoints_names or list(
|
|
520
525
|
step.class_args.get(schemas.ModelRunnerStepData.MODELS, {}).keys()
|
|
521
526
|
)
|
|
522
527
|
# Get all model_endpoints names that are in both lists
|
|
@@ -530,8 +535,9 @@ class BaseStep(ModelObj):
|
|
|
530
535
|
f"The graph already contains the model endpoints named - {common_endpoints_names}."
|
|
531
536
|
)
|
|
532
537
|
|
|
533
|
-
|
|
534
|
-
|
|
538
|
+
if verify_shared_models:
|
|
539
|
+
# Check if shared models are defined in the graph
|
|
540
|
+
self._verify_shared_models(root, step, step_model_endpoints_names)
|
|
535
541
|
# Update model endpoints names in the root step
|
|
536
542
|
root.update_model_endpoints_names(step_model_endpoints_names)
|
|
537
543
|
|
|
@@ -569,7 +575,9 @@ class BaseStep(ModelObj):
|
|
|
569
575
|
llm_artifact, _ = mlrun.store_manager.get_store_artifact(
|
|
570
576
|
model_artifact_uri
|
|
571
577
|
)
|
|
572
|
-
model_artifact_uri =
|
|
578
|
+
model_artifact_uri = mlrun.utils.remove_tag_from_artifact_uri(
|
|
579
|
+
llm_artifact.spec.parent_uri
|
|
580
|
+
)
|
|
573
581
|
actual_shared_name = root.get_shared_model_name_by_artifact_uri(
|
|
574
582
|
model_artifact_uri
|
|
575
583
|
)
|
|
@@ -1148,11 +1156,11 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
|
|
|
1148
1156
|
def init(self):
|
|
1149
1157
|
self.load()
|
|
1150
1158
|
|
|
1151
|
-
def predict(self, body: Any) -> Any:
|
|
1159
|
+
def predict(self, body: Any, **kwargs) -> Any:
|
|
1152
1160
|
"""Override to implement prediction logic. If the logic requires asyncio, override predict_async() instead."""
|
|
1153
1161
|
return body
|
|
1154
1162
|
|
|
1155
|
-
async def predict_async(self, body: Any) -> Any:
|
|
1163
|
+
async def predict_async(self, body: Any, **kwargs) -> Any:
|
|
1156
1164
|
"""Override to implement prediction logic if the logic requires asyncio."""
|
|
1157
1165
|
return body
|
|
1158
1166
|
|
|
@@ -1197,11 +1205,18 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
|
|
|
1197
1205
|
|
|
1198
1206
|
|
|
1199
1207
|
class LLModel(Model):
|
|
1200
|
-
def __init__(
|
|
1208
|
+
def __init__(
|
|
1209
|
+
self, name: str, input_path: Optional[Union[str, list[str]]], **kwargs
|
|
1210
|
+
):
|
|
1201
1211
|
super().__init__(name, **kwargs)
|
|
1212
|
+
self._input_path = split_path(input_path)
|
|
1202
1213
|
|
|
1203
1214
|
def predict(
|
|
1204
|
-
self,
|
|
1215
|
+
self,
|
|
1216
|
+
body: Any,
|
|
1217
|
+
messages: Optional[list[dict]] = None,
|
|
1218
|
+
model_configuration: Optional[dict] = None,
|
|
1219
|
+
**kwargs,
|
|
1205
1220
|
) -> Any:
|
|
1206
1221
|
if isinstance(
|
|
1207
1222
|
self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
|
|
@@ -1214,7 +1229,11 @@ class LLModel(Model):
|
|
|
1214
1229
|
return body
|
|
1215
1230
|
|
|
1216
1231
|
async def predict_async(
|
|
1217
|
-
self,
|
|
1232
|
+
self,
|
|
1233
|
+
body: Any,
|
|
1234
|
+
messages: Optional[list[dict]] = None,
|
|
1235
|
+
model_configuration: Optional[dict] = None,
|
|
1236
|
+
**kwargs,
|
|
1218
1237
|
) -> Any:
|
|
1219
1238
|
if isinstance(
|
|
1220
1239
|
self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
|
|
@@ -1262,12 +1281,34 @@ class LLModel(Model):
|
|
|
1262
1281
|
return None, None
|
|
1263
1282
|
prompt_legend = llm_prompt_artifact.spec.prompt_legend
|
|
1264
1283
|
prompt_template = deepcopy(llm_prompt_artifact.read_prompt())
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1284
|
+
input_data = copy(get_data_from_path(self._input_path, body))
|
|
1285
|
+
if isinstance(input_data, dict):
|
|
1286
|
+
kwargs = (
|
|
1287
|
+
{
|
|
1288
|
+
place_holder: input_data.get(body_map["field"])
|
|
1289
|
+
for place_holder, body_map in prompt_legend.items()
|
|
1290
|
+
}
|
|
1291
|
+
if prompt_legend
|
|
1292
|
+
else {}
|
|
1293
|
+
)
|
|
1294
|
+
input_data.update(kwargs)
|
|
1295
|
+
default_place_holders = PlaceholderDefaultDict(lambda: None, input_data)
|
|
1296
|
+
for message in prompt_template:
|
|
1297
|
+
try:
|
|
1298
|
+
message["content"] = message["content"].format(**input_data)
|
|
1299
|
+
except KeyError as e:
|
|
1300
|
+
logger.warning(
|
|
1301
|
+
"Input data was missing a placeholder, placeholder stay unformatted",
|
|
1302
|
+
key_error=e,
|
|
1303
|
+
)
|
|
1304
|
+
message["content"] = message["content"].format_map(
|
|
1305
|
+
default_place_holders
|
|
1306
|
+
)
|
|
1307
|
+
else:
|
|
1308
|
+
logger.warning(
|
|
1309
|
+
f"Expected input data to be a dict, but received input data from type {type(input_data)} prompt "
|
|
1310
|
+
f"template stay unformatted",
|
|
1311
|
+
)
|
|
1271
1312
|
return prompt_template, llm_prompt_artifact.spec.model_configuration
|
|
1272
1313
|
|
|
1273
1314
|
|
|
@@ -1567,11 +1608,27 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1567
1608
|
:param outputs: list of the model outputs (e.g. labels) ,if provided will override the outputs
|
|
1568
1609
|
that been configured in the model artifact, please note that those outputs need to
|
|
1569
1610
|
be equal to the model_class predict method outputs (length, and order)
|
|
1570
|
-
:param input_path:
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1611
|
+
:param input_path: when specified selects the key/path in the event to use as model monitoring inputs
|
|
1612
|
+
this require that the event body will behave like a dict, expects scopes to be
|
|
1613
|
+
defined by dot notation (e.g "data.d").
|
|
1614
|
+
examples: input_path="data.b"
|
|
1615
|
+
event: {"data":{"a": 5, "b": 7}}, means monitored body will be 7.
|
|
1616
|
+
event: {"data":{"a": [5, 9], "b": [7, 8]}} means monitored body will be [7,8].
|
|
1617
|
+
event: {"data":{"a": "extra_data", "b": {"f0": [1, 2]}}} means monitored body will
|
|
1618
|
+
be {"f0": [1, 2]}.
|
|
1619
|
+
if a ``list`` or ``list of lists`` is provided, it must follow the order and
|
|
1620
|
+
size defined by the input schema.
|
|
1621
|
+
:param result_path: when specified selects the key/path in the output event to use as model monitoring
|
|
1622
|
+
outputs this require that the output event body will behave like a dict,
|
|
1623
|
+
expects scopes to be defined by dot notation (e.g "data.d").
|
|
1624
|
+
examples: result_path="out.b"
|
|
1625
|
+
event: {"out":{"a": 5, "b": 7}}, means monitored body will be 7.
|
|
1626
|
+
event: {"out":{"a": [5, 9], "b": [7, 8]}} means monitored body will be [7,8]
|
|
1627
|
+
event: {"out":{"a": "extra_data", "b": {"f0": [1, 2]}}} means monitored body will
|
|
1628
|
+
be {"f0": [1, 2]}
|
|
1629
|
+
if a ``list`` or ``list of lists`` is provided, it must follow the order and
|
|
1630
|
+
size defined by the output schema.
|
|
1631
|
+
|
|
1575
1632
|
:param override: bool allow override existing model on the current ModelRunnerStep.
|
|
1576
1633
|
:param model_parameters: Parameters for model instantiation
|
|
1577
1634
|
"""
|
|
@@ -1590,7 +1647,7 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1590
1647
|
):
|
|
1591
1648
|
try:
|
|
1592
1649
|
model_artifact, _ = mlrun.store_manager.get_store_artifact(
|
|
1593
|
-
model_artifact
|
|
1650
|
+
mlrun.utils.remove_tag_from_artifact_uri(model_artifact)
|
|
1594
1651
|
)
|
|
1595
1652
|
except mlrun.errors.MLRunNotFoundError:
|
|
1596
1653
|
raise mlrun.errors.MLRunInvalidArgumentError("Artifact not found.")
|
|
@@ -1602,6 +1659,11 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1602
1659
|
if isinstance(model_artifact, mlrun.artifacts.Artifact)
|
|
1603
1660
|
else model_artifact
|
|
1604
1661
|
)
|
|
1662
|
+
model_artifact = (
|
|
1663
|
+
mlrun.utils.remove_tag_from_artifact_uri(model_artifact)
|
|
1664
|
+
if model_artifact
|
|
1665
|
+
else None
|
|
1666
|
+
)
|
|
1605
1667
|
model_parameters["artifact_uri"] = model_parameters.get(
|
|
1606
1668
|
"artifact_uri", model_artifact
|
|
1607
1669
|
)
|
|
@@ -1617,6 +1679,11 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1617
1679
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1618
1680
|
f"Model with name {endpoint_name} already exists in this ModelRunnerStep."
|
|
1619
1681
|
)
|
|
1682
|
+
root = self._extract_root_step()
|
|
1683
|
+
if isinstance(root, RootFlowStep):
|
|
1684
|
+
self.verify_model_runner_step(
|
|
1685
|
+
self, [endpoint_name], verify_shared_models=False
|
|
1686
|
+
)
|
|
1620
1687
|
ParallelExecutionMechanisms.validate(execution_mechanism)
|
|
1621
1688
|
self.class_args[schemas.ModelRunnerStepData.MODEL_TO_EXECUTION_MECHANISM] = (
|
|
1622
1689
|
self.class_args.get(
|
|
@@ -1687,21 +1754,13 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1687
1754
|
except (
|
|
1688
1755
|
mlrun.errors.MLRunNotFoundError,
|
|
1689
1756
|
mlrun.errors.MLRunInvalidArgumentError,
|
|
1690
|
-
):
|
|
1757
|
+
) as ex:
|
|
1691
1758
|
logger.warning(
|
|
1692
|
-
f"Model endpoint not found, using default output schema for model {name}"
|
|
1759
|
+
f"Model endpoint not found, using default output schema for model {name}",
|
|
1760
|
+
error=f"{type(ex).__name__}: {ex}",
|
|
1693
1761
|
)
|
|
1694
1762
|
return output_schema
|
|
1695
1763
|
|
|
1696
|
-
@staticmethod
|
|
1697
|
-
def _split_path(path: str) -> Union[str, list[str], None]:
|
|
1698
|
-
if path is not None:
|
|
1699
|
-
parsed_path = path.split(".")
|
|
1700
|
-
if len(parsed_path) == 1:
|
|
1701
|
-
parsed_path = parsed_path[0]
|
|
1702
|
-
return parsed_path
|
|
1703
|
-
return path
|
|
1704
|
-
|
|
1705
1764
|
def _calculate_monitoring_data(self) -> dict[str, dict[str, str]]:
|
|
1706
1765
|
monitoring_data = deepcopy(
|
|
1707
1766
|
self.class_args.get(
|
|
@@ -1710,33 +1769,17 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1710
1769
|
)
|
|
1711
1770
|
if isinstance(monitoring_data, dict):
|
|
1712
1771
|
for model in monitoring_data:
|
|
1713
|
-
monitoring_data[model][schemas.MonitoringData.
|
|
1714
|
-
monitoring_data
|
|
1715
|
-
or self._get_model_endpoint_output_schema(
|
|
1716
|
-
name=model,
|
|
1717
|
-
project=self.context.project if self.context else None,
|
|
1718
|
-
uid=monitoring_data.get(model, {}).get(
|
|
1719
|
-
mlrun.common.schemas.MonitoringData.MODEL_ENDPOINT_UID
|
|
1720
|
-
),
|
|
1721
|
-
)
|
|
1772
|
+
monitoring_data[model][schemas.MonitoringData.INPUT_PATH] = split_path(
|
|
1773
|
+
monitoring_data[model][schemas.MonitoringData.INPUT_PATH]
|
|
1722
1774
|
)
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
|
|
1726
|
-
][model][schemas.MonitoringData.OUTPUTS] = monitoring_data[model][
|
|
1727
|
-
schemas.MonitoringData.OUTPUTS
|
|
1728
|
-
]
|
|
1729
|
-
monitoring_data[model][schemas.MonitoringData.INPUT_PATH] = (
|
|
1730
|
-
self._split_path(
|
|
1731
|
-
monitoring_data[model][schemas.MonitoringData.INPUT_PATH]
|
|
1732
|
-
)
|
|
1733
|
-
)
|
|
1734
|
-
monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = (
|
|
1735
|
-
self._split_path(
|
|
1736
|
-
monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
|
|
1737
|
-
)
|
|
1775
|
+
monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = split_path(
|
|
1776
|
+
monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
|
|
1738
1777
|
)
|
|
1739
1778
|
return monitoring_data
|
|
1779
|
+
else:
|
|
1780
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1781
|
+
"Monitoring data must be a dictionary."
|
|
1782
|
+
)
|
|
1740
1783
|
|
|
1741
1784
|
def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
|
|
1742
1785
|
self.context = context
|
|
@@ -1752,6 +1795,13 @@ class ModelRunnerStep(MonitoredStep):
|
|
|
1752
1795
|
model_selector = get_class(model_selector, namespace)()
|
|
1753
1796
|
model_objects = []
|
|
1754
1797
|
for model, model_params in models.values():
|
|
1798
|
+
model_params[schemas.MonitoringData.INPUT_PATH] = (
|
|
1799
|
+
self.class_args.get(
|
|
1800
|
+
mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA, {}
|
|
1801
|
+
)
|
|
1802
|
+
.get(model_params.get("name"), {})
|
|
1803
|
+
.get(schemas.MonitoringData.INPUT_PATH)
|
|
1804
|
+
)
|
|
1755
1805
|
model = get_class(model, namespace).from_dict(
|
|
1756
1806
|
model_params, init_with_params=True
|
|
1757
1807
|
)
|
|
@@ -2401,7 +2451,13 @@ class FlowStep(BaseStep):
|
|
|
2401
2451
|
if not step.before and not any(
|
|
2402
2452
|
[step.name in other_step.after for other_step in self._steps.values()]
|
|
2403
2453
|
):
|
|
2404
|
-
|
|
2454
|
+
if any(
|
|
2455
|
+
[
|
|
2456
|
+
getattr(step_in_graph, "responder", False)
|
|
2457
|
+
for step_in_graph in self._steps.values()
|
|
2458
|
+
]
|
|
2459
|
+
):
|
|
2460
|
+
step.responder = True
|
|
2405
2461
|
return
|
|
2406
2462
|
|
|
2407
2463
|
for step_name in step.before:
|
|
@@ -2484,7 +2540,7 @@ class RootFlowStep(FlowStep):
|
|
|
2484
2540
|
name: str,
|
|
2485
2541
|
model_class: Union[str, Model],
|
|
2486
2542
|
execution_mechanism: Union[str, ParallelExecutionMechanisms],
|
|
2487
|
-
model_artifact:
|
|
2543
|
+
model_artifact: Union[str, ModelArtifact],
|
|
2488
2544
|
override: bool = False,
|
|
2489
2545
|
**model_parameters,
|
|
2490
2546
|
) -> None:
|
|
@@ -2536,6 +2592,7 @@ class RootFlowStep(FlowStep):
|
|
|
2536
2592
|
if isinstance(model_artifact, mlrun.artifacts.Artifact)
|
|
2537
2593
|
else model_artifact
|
|
2538
2594
|
)
|
|
2595
|
+
model_artifact = mlrun.utils.remove_tag_from_artifact_uri(model_artifact)
|
|
2539
2596
|
model_parameters["artifact_uri"] = model_parameters.get(
|
|
2540
2597
|
"artifact_uri", model_artifact
|
|
2541
2598
|
)
|
|
@@ -2923,7 +2980,7 @@ def params_to_step(
|
|
|
2923
2980
|
step = QueueStep(name, **class_args)
|
|
2924
2981
|
|
|
2925
2982
|
elif class_name and hasattr(class_name, "to_dict"):
|
|
2926
|
-
struct = class_name.to_dict()
|
|
2983
|
+
struct = deepcopy(class_name.to_dict())
|
|
2927
2984
|
kind = struct.get("kind", StepKinds.task)
|
|
2928
2985
|
name = (
|
|
2929
2986
|
name
|