mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +21 -15
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +9 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +13 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +34 -21
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/base.py +265 -7
- mlrun/datastore/datastore.py +10 -5
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +367 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +211 -74
- mlrun/datastore/model_provider/openai_provider.py +243 -71
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/store_resources.py +4 -4
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +27 -19
- mlrun/db/httpdb.py +57 -48
- mlrun/db/nopdb.py +25 -10
- mlrun/execution.py +55 -13
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +2 -0
- mlrun/model.py +9 -3
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +388 -138
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +36 -13
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +29 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
- mlrun/model_monitoring/helpers.py +28 -5
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +16 -11
- mlrun/projects/pipelines.py +2 -2
- mlrun/projects/project.py +157 -69
- mlrun/run.py +97 -20
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +147 -17
- mlrun/runtimes/nuclio/function.py +72 -27
- mlrun/runtimes/nuclio/serving.py +102 -20
- mlrun/runtimes/pod.py +213 -21
- mlrun/runtimes/utils.py +49 -9
- mlrun/secrets.py +54 -13
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +230 -40
- mlrun/serving/states.py +605 -232
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +136 -81
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +215 -83
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/mail.py +38 -15
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py
CHANGED
|
@@ -17,21 +17,26 @@ __all__ = ["GraphServer", "create_graph_server", "GraphContext", "MockEvent"]
|
|
|
17
17
|
import asyncio
|
|
18
18
|
import base64
|
|
19
19
|
import copy
|
|
20
|
+
import importlib
|
|
20
21
|
import json
|
|
21
22
|
import os
|
|
22
23
|
import socket
|
|
23
24
|
import traceback
|
|
24
25
|
import uuid
|
|
26
|
+
from collections import defaultdict
|
|
27
|
+
from datetime import datetime, timezone
|
|
25
28
|
from typing import Any, Optional, Union
|
|
26
29
|
|
|
30
|
+
import pandas as pd
|
|
27
31
|
import storey
|
|
28
32
|
from nuclio import Context as NuclioContext
|
|
29
33
|
from nuclio.request import Logger as NuclioLogger
|
|
30
34
|
|
|
31
35
|
import mlrun
|
|
32
|
-
import mlrun.common.constants
|
|
33
36
|
import mlrun.common.helpers
|
|
34
37
|
import mlrun.common.schemas
|
|
38
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
39
|
+
import mlrun.datastore.datastore_profile as ds_profile
|
|
35
40
|
import mlrun.model_monitoring
|
|
36
41
|
import mlrun.utils
|
|
37
42
|
from mlrun.config import config
|
|
@@ -40,12 +45,13 @@ from mlrun.secrets import SecretsStore
|
|
|
40
45
|
|
|
41
46
|
from ..common.helpers import parse_versioned_object_uri
|
|
42
47
|
from ..common.schemas.model_monitoring.constants import FileTargetKind
|
|
48
|
+
from ..common.schemas.serving import MAX_BATCH_JOB_DURATION
|
|
43
49
|
from ..datastore import DataItem, get_stream_pusher
|
|
44
50
|
from ..datastore.store_resources import ResourceCache
|
|
45
51
|
from ..errors import MLRunInvalidArgumentError
|
|
46
52
|
from ..execution import MLClientCtx
|
|
47
53
|
from ..model import ModelObj
|
|
48
|
-
from ..utils import get_caller_globals
|
|
54
|
+
from ..utils import get_caller_globals, get_relative_module_name_from_path
|
|
49
55
|
from .states import (
|
|
50
56
|
FlowStep,
|
|
51
57
|
MonitoredStep,
|
|
@@ -77,7 +83,6 @@ class _StreamContext:
|
|
|
77
83
|
self.hostname = socket.gethostname()
|
|
78
84
|
self.function_uri = function_uri
|
|
79
85
|
self.output_stream = None
|
|
80
|
-
stream_uri = None
|
|
81
86
|
log_stream = parameters.get(FileTargetKind.LOG_STREAM, "")
|
|
82
87
|
|
|
83
88
|
if (enabled or log_stream) and function_uri:
|
|
@@ -88,20 +93,16 @@ class _StreamContext:
|
|
|
88
93
|
|
|
89
94
|
stream_args = parameters.get("stream_args", {})
|
|
90
95
|
|
|
91
|
-
if log_stream == DUMMY_STREAM:
|
|
92
|
-
# Dummy stream used for testing, see tests/serving/test_serving.py
|
|
93
|
-
stream_uri = DUMMY_STREAM
|
|
94
|
-
elif not stream_args.get("mock"): # if not a mock: `context.is_mock = True`
|
|
95
|
-
stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
|
|
96
|
-
|
|
97
96
|
if log_stream:
|
|
98
|
-
#
|
|
99
|
-
|
|
100
|
-
self.output_stream = get_stream_pusher(
|
|
97
|
+
# Get the output stream from the log stream path
|
|
98
|
+
stream_path = log_stream.format(project=project)
|
|
99
|
+
self.output_stream = get_stream_pusher(stream_path, **stream_args)
|
|
101
100
|
else:
|
|
102
101
|
# Get the output stream from the profile
|
|
103
102
|
self.output_stream = mlrun.model_monitoring.helpers.get_output_stream(
|
|
104
|
-
project=project,
|
|
103
|
+
project=project,
|
|
104
|
+
profile=parameters.get("stream_profile"),
|
|
105
|
+
mock=stream_args.get("mock", False),
|
|
105
106
|
)
|
|
106
107
|
|
|
107
108
|
|
|
@@ -179,11 +180,12 @@ class GraphServer(ModelObj):
|
|
|
179
180
|
self,
|
|
180
181
|
context,
|
|
181
182
|
namespace,
|
|
182
|
-
resource_cache: ResourceCache = None,
|
|
183
|
+
resource_cache: Optional[ResourceCache] = None,
|
|
183
184
|
logger=None,
|
|
184
185
|
is_mock=False,
|
|
185
186
|
monitoring_mock=False,
|
|
186
|
-
|
|
187
|
+
stream_profile: Optional[ds_profile.DatastoreProfile] = None,
|
|
188
|
+
) -> None:
|
|
187
189
|
"""for internal use, initialize all steps (recursively)"""
|
|
188
190
|
|
|
189
191
|
if self.secret_sources:
|
|
@@ -198,6 +200,20 @@ class GraphServer(ModelObj):
|
|
|
198
200
|
context.monitoring_mock = monitoring_mock
|
|
199
201
|
context.root = self.graph
|
|
200
202
|
|
|
203
|
+
if is_mock and monitoring_mock:
|
|
204
|
+
if stream_profile:
|
|
205
|
+
# Add the user-defined stream profile to the parameters
|
|
206
|
+
self.parameters["stream_profile"] = stream_profile
|
|
207
|
+
elif not (
|
|
208
|
+
self.parameters.get(FileTargetKind.LOG_STREAM)
|
|
209
|
+
or mlrun.get_secret_or_env(
|
|
210
|
+
mm_constants.ProjectSecretKeys.STREAM_PROFILE_NAME
|
|
211
|
+
)
|
|
212
|
+
):
|
|
213
|
+
# Set a dummy log stream for mocking purposes if there is no direct
|
|
214
|
+
# user-defined stream profile and no information in the environment
|
|
215
|
+
self.parameters[FileTargetKind.LOG_STREAM] = DUMMY_STREAM
|
|
216
|
+
|
|
201
217
|
context.stream = _StreamContext(
|
|
202
218
|
self.track_models, self.parameters, self.function_uri
|
|
203
219
|
)
|
|
@@ -358,6 +374,7 @@ def add_error_raiser_step(
|
|
|
358
374
|
raise_exception=monitored_step.raise_exception,
|
|
359
375
|
models_names=list(monitored_step.class_args["models"].keys()),
|
|
360
376
|
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
377
|
+
function=monitored_step.function,
|
|
361
378
|
)
|
|
362
379
|
if monitored_step.responder:
|
|
363
380
|
monitored_step.responder = False
|
|
@@ -400,6 +417,7 @@ def add_monitoring_general_steps(
|
|
|
400
417
|
"mlrun.serving.system_steps.BackgroundTaskStatus",
|
|
401
418
|
"background_task_status_step",
|
|
402
419
|
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
420
|
+
full_event=True,
|
|
403
421
|
)
|
|
404
422
|
monitor_flow_step = graph.add_step(
|
|
405
423
|
"storey.Filter",
|
|
@@ -505,10 +523,6 @@ def add_system_steps_to_graph(
|
|
|
505
523
|
monitor_flow_step.after = [
|
|
506
524
|
step_name,
|
|
507
525
|
]
|
|
508
|
-
context.logger.info_with(
|
|
509
|
-
"Server graph after adding system steps",
|
|
510
|
-
graph=str(graph.steps),
|
|
511
|
-
)
|
|
512
526
|
return graph
|
|
513
527
|
|
|
514
528
|
|
|
@@ -561,25 +575,51 @@ def v2_serving_init(context, namespace=None):
|
|
|
561
575
|
async def async_execute_graph(
|
|
562
576
|
context: MLClientCtx,
|
|
563
577
|
data: DataItem,
|
|
578
|
+
timestamp_column: Optional[str],
|
|
564
579
|
batching: bool,
|
|
565
580
|
batch_size: Optional[int],
|
|
566
581
|
read_as_lists: bool,
|
|
567
582
|
nest_under_inputs: bool,
|
|
568
|
-
) ->
|
|
583
|
+
) -> None:
|
|
584
|
+
# Validate that data parameter is a DataItem and not passed via params
|
|
585
|
+
if not isinstance(data, DataItem):
|
|
586
|
+
raise MLRunInvalidArgumentError(
|
|
587
|
+
f"Parameter 'data' has type hint 'DataItem' but got {type(data).__name__} instead. "
|
|
588
|
+
f"Data files and artifacts must be passed via the 'inputs' parameter, not 'params'. "
|
|
589
|
+
f"The 'params' parameter is for simple configuration values (strings, numbers, booleans), "
|
|
590
|
+
f"while 'inputs' is for data files that need to be loaded. "
|
|
591
|
+
f"Example: run_function(..., inputs={{'data': 'path/to/data.csv'}}, params={{other_config: value}})"
|
|
592
|
+
)
|
|
593
|
+
run_call_count = 0
|
|
569
594
|
spec = mlrun.utils.get_serving_spec()
|
|
570
|
-
|
|
571
|
-
namespace = {}
|
|
595
|
+
modname = None
|
|
572
596
|
code = os.getenv("MLRUN_EXEC_CODE")
|
|
573
597
|
if code:
|
|
574
598
|
code = base64.b64decode(code).decode("utf-8")
|
|
575
|
-
|
|
599
|
+
with open("user_code.py", "w") as fp:
|
|
600
|
+
fp.write(code)
|
|
601
|
+
modname = "user_code"
|
|
576
602
|
else:
|
|
577
603
|
# TODO: find another way to get the local file path, or ensure that MLRUN_EXEC_CODE
|
|
578
604
|
# gets set in local flow and not just in the remote pod
|
|
579
|
-
|
|
580
|
-
if
|
|
581
|
-
|
|
582
|
-
|
|
605
|
+
source_file_path = spec.get("filename", None)
|
|
606
|
+
if source_file_path:
|
|
607
|
+
source_file_path_object, working_dir_path_object = (
|
|
608
|
+
mlrun.utils.helpers.get_source_and_working_dir_paths(source_file_path)
|
|
609
|
+
)
|
|
610
|
+
if not source_file_path_object.is_relative_to(working_dir_path_object):
|
|
611
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
612
|
+
f"Source file path '{source_file_path}' is not under the current working directory "
|
|
613
|
+
f"(which is required when running with local=True)"
|
|
614
|
+
)
|
|
615
|
+
modname = get_relative_module_name_from_path(
|
|
616
|
+
source_file_path_object, working_dir_path_object
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
namespace = {}
|
|
620
|
+
if modname:
|
|
621
|
+
mod = importlib.import_module(modname)
|
|
622
|
+
namespace = mod.__dict__
|
|
583
623
|
|
|
584
624
|
server = GraphServer.from_dict(spec)
|
|
585
625
|
|
|
@@ -605,10 +645,43 @@ async def async_execute_graph(
|
|
|
605
645
|
f"(status='{task_state}')"
|
|
606
646
|
)
|
|
607
647
|
|
|
648
|
+
df = data.as_df()
|
|
649
|
+
|
|
650
|
+
if df.empty:
|
|
651
|
+
context.logger.warn("Job terminated due to empty inputs (0 rows)")
|
|
652
|
+
return
|
|
653
|
+
|
|
654
|
+
track_models = spec.get("track_models")
|
|
655
|
+
|
|
656
|
+
if track_models and timestamp_column:
|
|
657
|
+
context.logger.info(f"Sorting dataframe by {timestamp_column}")
|
|
658
|
+
df[timestamp_column] = pd.to_datetime( # in case it's a string
|
|
659
|
+
df[timestamp_column]
|
|
660
|
+
)
|
|
661
|
+
df.sort_values(by=timestamp_column, inplace=True)
|
|
662
|
+
if len(df) > 1:
|
|
663
|
+
start_time = df[timestamp_column].iloc[0]
|
|
664
|
+
end_time = df[timestamp_column].iloc[-1]
|
|
665
|
+
time_range = end_time - start_time
|
|
666
|
+
start_time = start_time.isoformat()
|
|
667
|
+
end_time = end_time.isoformat()
|
|
668
|
+
# TODO: tie this to the controller's base period
|
|
669
|
+
if time_range > pd.Timedelta(MAX_BATCH_JOB_DURATION):
|
|
670
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
671
|
+
f"Dataframe time range is too long: {time_range}. "
|
|
672
|
+
"Please disable tracking or reduce the input dataset's time range below the defined limit "
|
|
673
|
+
f"of {MAX_BATCH_JOB_DURATION}."
|
|
674
|
+
)
|
|
675
|
+
else:
|
|
676
|
+
start_time = end_time = df["timestamp"].iloc[0].isoformat()
|
|
677
|
+
else:
|
|
678
|
+
# end time will be set from clock time when the batch completes
|
|
679
|
+
start_time = datetime.now(tz=timezone.utc).isoformat()
|
|
680
|
+
|
|
608
681
|
server.graph = add_system_steps_to_graph(
|
|
609
682
|
server.project,
|
|
610
683
|
copy.deepcopy(server.graph),
|
|
611
|
-
|
|
684
|
+
track_models,
|
|
612
685
|
context,
|
|
613
686
|
spec,
|
|
614
687
|
pause_until_background_task_completion=False, # we've already awaited it
|
|
@@ -616,7 +689,6 @@ async def async_execute_graph(
|
|
|
616
689
|
|
|
617
690
|
if config.log_level.lower() == "debug":
|
|
618
691
|
server.verbose = True
|
|
619
|
-
context.logger.info_with("Initializing states", namespace=namespace)
|
|
620
692
|
kwargs = {}
|
|
621
693
|
if hasattr(context, "is_mock"):
|
|
622
694
|
kwargs["is_mock"] = context.is_mock
|
|
@@ -633,19 +705,30 @@ async def async_execute_graph(
|
|
|
633
705
|
if server.verbose:
|
|
634
706
|
context.logger.info(server.to_yaml())
|
|
635
707
|
|
|
636
|
-
df = data.as_df()
|
|
637
|
-
|
|
638
|
-
responses = []
|
|
639
|
-
|
|
640
708
|
async def run(body):
|
|
709
|
+
nonlocal run_call_count
|
|
641
710
|
event = storey.Event(id=index, body=body)
|
|
642
|
-
|
|
643
|
-
|
|
711
|
+
if timestamp_column:
|
|
712
|
+
if batching:
|
|
713
|
+
# we use the first row in the batch to determine the timestamp for the whole batch
|
|
714
|
+
body = body[0]
|
|
715
|
+
if not isinstance(body, dict):
|
|
716
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
717
|
+
f"When timestamp_column=True, event body must be a dict – got {type(body).__name__} instead"
|
|
718
|
+
)
|
|
719
|
+
if timestamp_column not in body:
|
|
720
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
721
|
+
f"Event body '{body}' did not contain timestamp column '{timestamp_column}'"
|
|
722
|
+
)
|
|
723
|
+
event._original_timestamp = body[timestamp_column]
|
|
724
|
+
run_call_count += 1
|
|
725
|
+
return await server.run(event, context)
|
|
644
726
|
|
|
645
727
|
if batching and not batch_size:
|
|
646
728
|
batch_size = len(df)
|
|
647
729
|
|
|
648
730
|
batch = []
|
|
731
|
+
tasks = []
|
|
649
732
|
for index, row in df.iterrows():
|
|
650
733
|
data = row.to_list() if read_as_lists else row.to_dict()
|
|
651
734
|
if nest_under_inputs:
|
|
@@ -653,24 +736,119 @@ async def async_execute_graph(
|
|
|
653
736
|
if batching:
|
|
654
737
|
batch.append(data)
|
|
655
738
|
if len(batch) == batch_size:
|
|
656
|
-
|
|
739
|
+
tasks.append(asyncio.create_task(run(batch)))
|
|
657
740
|
batch = []
|
|
658
741
|
else:
|
|
659
|
-
|
|
742
|
+
tasks.append(asyncio.create_task(run(data)))
|
|
660
743
|
|
|
661
744
|
if batch:
|
|
662
|
-
|
|
745
|
+
tasks.append(asyncio.create_task(run(batch)))
|
|
746
|
+
|
|
747
|
+
responses = await asyncio.gather(*tasks)
|
|
663
748
|
|
|
664
749
|
termination_result = server.wait_for_completion()
|
|
665
750
|
if asyncio.iscoroutine(termination_result):
|
|
666
751
|
await termination_result
|
|
667
752
|
|
|
668
|
-
|
|
753
|
+
model_endpoint_uids = spec.get("model_endpoint_uids", [])
|
|
754
|
+
|
|
755
|
+
# needed for output_stream to be created
|
|
756
|
+
server = GraphServer.from_dict(spec)
|
|
757
|
+
server.init_states(None, namespace)
|
|
758
|
+
|
|
759
|
+
batch_completion_time = datetime.now(tz=timezone.utc).isoformat()
|
|
760
|
+
|
|
761
|
+
if not timestamp_column:
|
|
762
|
+
end_time = batch_completion_time
|
|
763
|
+
|
|
764
|
+
mm_stream_record = dict(
|
|
765
|
+
kind="batch_complete",
|
|
766
|
+
project=context.project,
|
|
767
|
+
first_timestamp=start_time,
|
|
768
|
+
last_timestamp=end_time,
|
|
769
|
+
batch_completion_time=batch_completion_time,
|
|
770
|
+
)
|
|
771
|
+
output_stream = server.context.stream.output_stream
|
|
772
|
+
for mep_uid in spec.get("model_endpoint_uids", []):
|
|
773
|
+
mm_stream_record["endpoint_id"] = mep_uid
|
|
774
|
+
output_stream.push(mm_stream_record, partition_key=mep_uid)
|
|
775
|
+
|
|
776
|
+
context.logger.info(
|
|
777
|
+
f"Job completed processing {len(df)} rows",
|
|
778
|
+
timestamp_column=timestamp_column,
|
|
779
|
+
model_endpoint_uids=model_endpoint_uids,
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
has_responder = False
|
|
783
|
+
for step in server.graph.steps.values():
|
|
784
|
+
if getattr(step, "responder", False):
|
|
785
|
+
has_responder = True
|
|
786
|
+
break
|
|
787
|
+
|
|
788
|
+
if has_responder:
|
|
789
|
+
# log the results as a dataset artifact
|
|
790
|
+
artifact_path = None
|
|
791
|
+
if (
|
|
792
|
+
"{{run.uid}}" not in context.artifact_path
|
|
793
|
+
): # TODO: delete when IG-22841 is resolved
|
|
794
|
+
artifact_path = "+/{{run.uid}}" # will be concatenated to the context's path in extend_artifact_path
|
|
795
|
+
context.log_dataset(
|
|
796
|
+
"prediction", df=pd.DataFrame(responses), artifact_path=artifact_path
|
|
797
|
+
)
|
|
798
|
+
|
|
799
|
+
# if we got responses that appear to be in the right format, try to log per-model datasets too
|
|
800
|
+
if (
|
|
801
|
+
responses
|
|
802
|
+
and responses[0]
|
|
803
|
+
and isinstance(responses[0], dict)
|
|
804
|
+
and isinstance(next(iter(responses[0].values())), (dict, list))
|
|
805
|
+
):
|
|
806
|
+
try:
|
|
807
|
+
# turn this list of samples into a dict of lists, one per model endpoint
|
|
808
|
+
grouped = defaultdict(list)
|
|
809
|
+
for sample in responses:
|
|
810
|
+
for model_name, features in sample.items():
|
|
811
|
+
grouped[model_name].append(features)
|
|
812
|
+
# create a dataframe per model endpoint and log it
|
|
813
|
+
for model_name, features in grouped.items():
|
|
814
|
+
context.log_dataset(
|
|
815
|
+
f"prediction_{model_name}",
|
|
816
|
+
df=pd.DataFrame(features),
|
|
817
|
+
artifact_path=artifact_path,
|
|
818
|
+
)
|
|
819
|
+
except Exception as e:
|
|
820
|
+
context.logger.warning(
|
|
821
|
+
"Failed to log per-model prediction datasets",
|
|
822
|
+
error=err_to_str(e),
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
context.log_result("num_rows", run_call_count)
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _is_inside_asyncio_loop():
|
|
829
|
+
try:
|
|
830
|
+
asyncio.get_running_loop()
|
|
831
|
+
return True
|
|
832
|
+
except RuntimeError:
|
|
833
|
+
return False
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
# Workaround for running with local=True in Jupyter (ML-10620)
|
|
837
|
+
def _workaround_asyncio_nesting():
|
|
838
|
+
try:
|
|
839
|
+
import nest_asyncio
|
|
840
|
+
except ImportError:
|
|
841
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
842
|
+
"Cannot execute graph from within an already running asyncio loop. "
|
|
843
|
+
"Attempt to import nest_asyncio as a workaround failed as well."
|
|
844
|
+
)
|
|
845
|
+
nest_asyncio.apply()
|
|
669
846
|
|
|
670
847
|
|
|
671
848
|
def execute_graph(
|
|
672
849
|
context: MLClientCtx,
|
|
673
850
|
data: DataItem,
|
|
851
|
+
timestamp_column: Optional[str] = None,
|
|
674
852
|
batching: bool = False,
|
|
675
853
|
batch_size: Optional[int] = None,
|
|
676
854
|
read_as_lists: bool = False,
|
|
@@ -681,6 +859,9 @@ def execute_graph(
|
|
|
681
859
|
|
|
682
860
|
:param context: The job's execution client context.
|
|
683
861
|
:param data: The input data to the job, to be pushed into the graph row by row, or in batches.
|
|
862
|
+
:param timestamp_column: The name of the column that will be used as the timestamp for model monitoring purposes.
|
|
863
|
+
when timestamp_column is used in conjunction with batching, the first timestamp will be used for the entire
|
|
864
|
+
batch.
|
|
684
865
|
:param batching: Whether to push one or more batches into the graph rather than row by row.
|
|
685
866
|
:param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
|
|
686
867
|
be pushed into the graph in one batch.
|
|
@@ -689,9 +870,18 @@ def execute_graph(
|
|
|
689
870
|
|
|
690
871
|
:return: A list of responses.
|
|
691
872
|
"""
|
|
873
|
+
if _is_inside_asyncio_loop():
|
|
874
|
+
_workaround_asyncio_nesting()
|
|
875
|
+
|
|
692
876
|
return asyncio.run(
|
|
693
877
|
async_execute_graph(
|
|
694
|
-
context,
|
|
878
|
+
context,
|
|
879
|
+
data,
|
|
880
|
+
timestamp_column,
|
|
881
|
+
batching,
|
|
882
|
+
batch_size,
|
|
883
|
+
read_as_lists,
|
|
884
|
+
nest_under_inputs,
|
|
695
885
|
)
|
|
696
886
|
)
|
|
697
887
|
|