mlrun 1.10.0rc6__py3-none-any.whl → 1.10.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -1
- mlrun/__main__.py +47 -4
- mlrun/artifacts/base.py +0 -27
- mlrun/artifacts/dataset.py +0 -8
- mlrun/artifacts/model.py +0 -7
- mlrun/artifacts/plots.py +0 -13
- mlrun/common/schemas/background_task.py +5 -0
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +16 -0
- mlrun/common/schemas/project.py +4 -0
- mlrun/common/schemas/serving.py +2 -0
- mlrun/config.py +11 -22
- mlrun/datastore/utils.py +3 -1
- mlrun/db/base.py +0 -19
- mlrun/db/httpdb.py +73 -65
- mlrun/db/nopdb.py +0 -12
- mlrun/frameworks/tf_keras/__init__.py +4 -4
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
- mlrun/frameworks/tf_keras/model_handler.py +69 -9
- mlrun/frameworks/tf_keras/utils.py +12 -1
- mlrun/launcher/base.py +7 -0
- mlrun/launcher/client.py +2 -21
- mlrun/launcher/local.py +4 -0
- mlrun/model_monitoring/applications/_application_steps.py +23 -39
- mlrun/model_monitoring/applications/base.py +167 -32
- mlrun/model_monitoring/helpers.py +0 -3
- mlrun/projects/operations.py +11 -24
- mlrun/projects/pipelines.py +33 -3
- mlrun/projects/project.py +45 -89
- mlrun/run.py +37 -5
- mlrun/runtimes/daskjob.py +2 -0
- mlrun/runtimes/kubejob.py +5 -8
- mlrun/runtimes/mpijob/abstract.py +2 -0
- mlrun/runtimes/mpijob/v1.py +2 -0
- mlrun/runtimes/nuclio/function.py +2 -0
- mlrun/runtimes/nuclio/serving.py +60 -5
- mlrun/runtimes/pod.py +3 -0
- mlrun/runtimes/remotesparkjob.py +2 -0
- mlrun/runtimes/sparkjob/spark3job.py +2 -0
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/server.py +253 -29
- mlrun/serving/states.py +215 -18
- mlrun/serving/system_steps.py +391 -0
- mlrun/serving/v2_serving.py +9 -8
- mlrun/utils/helpers.py +18 -4
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/METADATA +9 -9
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/RECORD +52 -51
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py
CHANGED
|
@@ -21,14 +21,16 @@ import os
|
|
|
21
21
|
import socket
|
|
22
22
|
import traceback
|
|
23
23
|
import uuid
|
|
24
|
-
from typing import Optional, Union
|
|
24
|
+
from typing import Any, Optional, Union
|
|
25
25
|
|
|
26
|
+
import storey
|
|
26
27
|
from nuclio import Context as NuclioContext
|
|
27
28
|
from nuclio.request import Logger as NuclioLogger
|
|
28
29
|
|
|
29
30
|
import mlrun
|
|
30
31
|
import mlrun.common.constants
|
|
31
32
|
import mlrun.common.helpers
|
|
33
|
+
import mlrun.common.schemas
|
|
32
34
|
import mlrun.model_monitoring
|
|
33
35
|
import mlrun.utils
|
|
34
36
|
from mlrun.config import config
|
|
@@ -37,12 +39,20 @@ from mlrun.secrets import SecretsStore
|
|
|
37
39
|
|
|
38
40
|
from ..common.helpers import parse_versioned_object_uri
|
|
39
41
|
from ..common.schemas.model_monitoring.constants import FileTargetKind
|
|
40
|
-
from ..datastore import get_stream_pusher
|
|
42
|
+
from ..datastore import DataItem, get_stream_pusher
|
|
41
43
|
from ..datastore.store_resources import ResourceCache
|
|
42
44
|
from ..errors import MLRunInvalidArgumentError
|
|
45
|
+
from ..execution import MLClientCtx
|
|
43
46
|
from ..model import ModelObj
|
|
44
47
|
from ..utils import get_caller_globals
|
|
45
|
-
from .states import
|
|
48
|
+
from .states import (
|
|
49
|
+
FlowStep,
|
|
50
|
+
MonitoredStep,
|
|
51
|
+
RootFlowStep,
|
|
52
|
+
RouterStep,
|
|
53
|
+
get_function,
|
|
54
|
+
graph_root_setter,
|
|
55
|
+
)
|
|
46
56
|
from .utils import event_id_key, event_path_key
|
|
47
57
|
|
|
48
58
|
DUMMY_STREAM = "dummy://"
|
|
@@ -314,7 +324,11 @@ class GraphServer(ModelObj):
|
|
|
314
324
|
|
|
315
325
|
def _process_response(self, context, response, get_body):
|
|
316
326
|
body = response.body
|
|
317
|
-
if
|
|
327
|
+
if (
|
|
328
|
+
isinstance(context, MLClientCtx)
|
|
329
|
+
or isinstance(body, context.Response)
|
|
330
|
+
or get_body
|
|
331
|
+
):
|
|
318
332
|
return body
|
|
319
333
|
|
|
320
334
|
if body and not isinstance(body, (str, bytes)):
|
|
@@ -329,34 +343,150 @@ class GraphServer(ModelObj):
|
|
|
329
343
|
return self.graph.wait_for_completion()
|
|
330
344
|
|
|
331
345
|
|
|
332
|
-
def
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
):
|
|
346
|
+
def add_error_raiser_step(
|
|
347
|
+
graph: RootFlowStep, monitored_steps: dict[str, MonitoredStep]
|
|
348
|
+
) -> RootFlowStep:
|
|
349
|
+
monitored_steps_raisers = {}
|
|
350
|
+
user_steps = list(graph.steps.values())
|
|
351
|
+
for monitored_step in monitored_steps.values():
|
|
352
|
+
if monitored_step.raise_exception:
|
|
340
353
|
error_step = graph.add_step(
|
|
341
354
|
class_name="mlrun.serving.states.ModelRunnerErrorRaiser",
|
|
342
|
-
name=f"{
|
|
343
|
-
after=
|
|
355
|
+
name=f"{monitored_step.name}_error_raise",
|
|
356
|
+
after=monitored_step.name,
|
|
344
357
|
full_event=True,
|
|
345
|
-
raise_exception=
|
|
346
|
-
models_names=list(
|
|
358
|
+
raise_exception=monitored_step.raise_exception,
|
|
359
|
+
models_names=list(monitored_step.class_args["models"].keys()),
|
|
360
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
347
361
|
)
|
|
348
|
-
if
|
|
349
|
-
|
|
362
|
+
if monitored_step.responder:
|
|
363
|
+
monitored_step.responder = False
|
|
350
364
|
error_step.respond()
|
|
351
|
-
|
|
352
|
-
error_step.on_error =
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
365
|
+
monitored_steps_raisers[monitored_step.name] = error_step.name
|
|
366
|
+
error_step.on_error = monitored_step.on_error
|
|
367
|
+
for step in user_steps:
|
|
368
|
+
if step.after:
|
|
369
|
+
if isinstance(step.after, list):
|
|
370
|
+
for i in range(len(step.after)):
|
|
371
|
+
if step.after[i] in monitored_steps_raisers:
|
|
372
|
+
step.after[i] = monitored_steps_raisers[step.after[i]]
|
|
373
|
+
else:
|
|
374
|
+
if (
|
|
375
|
+
isinstance(step.after, str)
|
|
376
|
+
and step.after in monitored_steps_raisers
|
|
377
|
+
):
|
|
378
|
+
step.after = monitored_steps_raisers[step.after]
|
|
379
|
+
return graph
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def add_monitoring_general_steps(
|
|
383
|
+
project: str,
|
|
384
|
+
graph: RootFlowStep,
|
|
385
|
+
context,
|
|
386
|
+
serving_spec,
|
|
387
|
+
) -> tuple[RootFlowStep, FlowStep]:
|
|
388
|
+
"""
|
|
389
|
+
Adding the monitoring flow connection steps, this steps allow the graph to reconstruct the serving event enrich it
|
|
390
|
+
and push it to the model monitoring stream
|
|
391
|
+
system_steps structure -
|
|
392
|
+
"background_task_status_step" --> "filter_none" --> "monitoring_pre_processor_step" --> "flatten_events"
|
|
393
|
+
--> "sampling_step" --> "filter_none_sampling" --> "model_monitoring_stream"
|
|
394
|
+
"""
|
|
395
|
+
monitor_flow_step = graph.add_step(
|
|
396
|
+
"mlrun.serving.system_steps.BackgroundTaskStatus",
|
|
397
|
+
"background_task_status_step",
|
|
398
|
+
context=context,
|
|
399
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
400
|
+
)
|
|
401
|
+
graph.add_step(
|
|
402
|
+
"storey.Filter",
|
|
403
|
+
"filter_none",
|
|
404
|
+
_fn="(event is not None)",
|
|
405
|
+
after="background_task_status_step",
|
|
406
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
407
|
+
)
|
|
408
|
+
graph.add_step(
|
|
409
|
+
"mlrun.serving.system_steps.MonitoringPreProcessor",
|
|
410
|
+
"monitoring_pre_processor_step",
|
|
411
|
+
after="filter_none",
|
|
412
|
+
full_event=True,
|
|
413
|
+
context=context,
|
|
414
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
415
|
+
)
|
|
416
|
+
# flatten the events
|
|
417
|
+
graph.add_step(
|
|
418
|
+
"storey.FlatMap",
|
|
419
|
+
"flatten_events",
|
|
420
|
+
_fn="(event)",
|
|
421
|
+
after="monitoring_pre_processor_step",
|
|
422
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
423
|
+
)
|
|
424
|
+
graph.add_step(
|
|
425
|
+
"mlrun.serving.system_steps.SamplingStep",
|
|
426
|
+
"sampling_step",
|
|
427
|
+
after="flatten_events",
|
|
428
|
+
sampling_percentage=float(
|
|
429
|
+
serving_spec.get("parameters", {}).get("sampling_percentage", 100.0)
|
|
430
|
+
if isinstance(serving_spec, dict)
|
|
431
|
+
else getattr(serving_spec, "parameters", {}).get(
|
|
432
|
+
"sampling_percentage", 100.0
|
|
433
|
+
),
|
|
434
|
+
),
|
|
435
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
436
|
+
)
|
|
437
|
+
graph.add_step(
|
|
438
|
+
"storey.Filter",
|
|
439
|
+
"filter_none_sampling",
|
|
440
|
+
_fn="(event is not None)",
|
|
441
|
+
after="sampling_step",
|
|
442
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
if getattr(context, "is_mock", False):
|
|
446
|
+
graph.add_step(
|
|
447
|
+
"mlrun.serving.system_steps.MockStreamPusher",
|
|
448
|
+
"model_monitoring_stream",
|
|
449
|
+
after="filter_none_sampling",
|
|
450
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
451
|
+
)
|
|
452
|
+
else:
|
|
453
|
+
stream_uri = mlrun.model_monitoring.get_stream_path(
|
|
454
|
+
project=project,
|
|
455
|
+
function_name=mlrun.common.schemas.MonitoringFunctionNames.STREAM,
|
|
456
|
+
)
|
|
457
|
+
context.logger.info_with(
|
|
458
|
+
"Creating Model Monitoring stream target using uri:", uri=stream_uri
|
|
459
|
+
)
|
|
460
|
+
graph.add_step(
|
|
461
|
+
">>",
|
|
462
|
+
"model_monitoring_stream",
|
|
463
|
+
path=stream_uri,
|
|
464
|
+
sharding_func=mlrun.common.schemas.model_monitoring.constants.StreamProcessingEvent.ENDPOINT_ID,
|
|
465
|
+
after="filter_none_sampling",
|
|
466
|
+
)
|
|
467
|
+
return graph, monitor_flow_step
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def add_system_steps_to_graph(
|
|
471
|
+
project: str, graph: RootFlowStep, track_models: bool, context, serving_spec
|
|
472
|
+
) -> RootFlowStep:
|
|
473
|
+
monitored_steps = graph.get_monitored_steps()
|
|
474
|
+
graph = add_error_raiser_step(graph, monitored_steps)
|
|
475
|
+
if track_models:
|
|
476
|
+
graph, monitor_flow_step = add_monitoring_general_steps(
|
|
477
|
+
project, graph, context, serving_spec
|
|
478
|
+
)
|
|
479
|
+
# Connect each model runner to the monitoring step:
|
|
480
|
+
for step_name, step in monitored_steps.items():
|
|
481
|
+
if monitor_flow_step.after:
|
|
482
|
+
if isinstance(monitor_flow_step.after, list):
|
|
483
|
+
monitor_flow_step.after.append(step_name)
|
|
484
|
+
elif isinstance(monitor_flow_step.after, str):
|
|
485
|
+
monitor_flow_step.after = [monitor_flow_step.after, step_name]
|
|
486
|
+
else:
|
|
487
|
+
monitor_flow_step.after = [
|
|
488
|
+
step_name,
|
|
489
|
+
]
|
|
360
490
|
return graph
|
|
361
491
|
|
|
362
492
|
|
|
@@ -366,8 +496,14 @@ def v2_serving_init(context, namespace=None):
|
|
|
366
496
|
context.logger.info("Initializing server from spec")
|
|
367
497
|
spec = mlrun.utils.get_serving_spec()
|
|
368
498
|
server = GraphServer.from_dict(spec)
|
|
369
|
-
if isinstance(server.graph, RootFlowStep):
|
|
370
|
-
server.graph = add_system_steps_to_graph(
|
|
499
|
+
if isinstance(server.graph, RootFlowStep) and server.graph.include_monitored_step():
|
|
500
|
+
server.graph = add_system_steps_to_graph(
|
|
501
|
+
server.project,
|
|
502
|
+
copy.deepcopy(server.graph),
|
|
503
|
+
spec.get("track_models"),
|
|
504
|
+
context,
|
|
505
|
+
spec,
|
|
506
|
+
)
|
|
371
507
|
context.logger.info_with(
|
|
372
508
|
"Server graph after adding system steps",
|
|
373
509
|
graph=str(server.graph.steps),
|
|
@@ -405,6 +541,94 @@ def v2_serving_init(context, namespace=None):
|
|
|
405
541
|
_set_callbacks(server, context)
|
|
406
542
|
|
|
407
543
|
|
|
544
|
+
async def async_execute_graph(
|
|
545
|
+
context: MLClientCtx,
|
|
546
|
+
data: DataItem,
|
|
547
|
+
batching: bool,
|
|
548
|
+
batch_size: Optional[int],
|
|
549
|
+
) -> list[Any]:
|
|
550
|
+
spec = mlrun.utils.get_serving_spec()
|
|
551
|
+
|
|
552
|
+
source_filename = spec.get("filename", None)
|
|
553
|
+
namespace = {}
|
|
554
|
+
if source_filename:
|
|
555
|
+
with open(source_filename) as f:
|
|
556
|
+
exec(f.read(), namespace)
|
|
557
|
+
|
|
558
|
+
server = GraphServer.from_dict(spec)
|
|
559
|
+
|
|
560
|
+
if config.log_level.lower() == "debug":
|
|
561
|
+
server.verbose = True
|
|
562
|
+
context.logger.info_with("Initializing states", namespace=namespace)
|
|
563
|
+
kwargs = {}
|
|
564
|
+
if hasattr(context, "is_mock"):
|
|
565
|
+
kwargs["is_mock"] = context.is_mock
|
|
566
|
+
server.init_states(
|
|
567
|
+
context=None, # this context is expected to be a nuclio context, which we don't have in this flow
|
|
568
|
+
namespace=namespace,
|
|
569
|
+
**kwargs,
|
|
570
|
+
)
|
|
571
|
+
context.logger.info("Initializing graph steps")
|
|
572
|
+
server.init_object(namespace)
|
|
573
|
+
|
|
574
|
+
context.logger.info_with("Graph was initialized", verbose=server.verbose)
|
|
575
|
+
|
|
576
|
+
if server.verbose:
|
|
577
|
+
context.logger.info(server.to_yaml())
|
|
578
|
+
|
|
579
|
+
df = data.as_df()
|
|
580
|
+
|
|
581
|
+
responses = []
|
|
582
|
+
|
|
583
|
+
async def run(body):
|
|
584
|
+
event = storey.Event(id=index, body=body)
|
|
585
|
+
response = await server.run(event, context)
|
|
586
|
+
responses.append(response)
|
|
587
|
+
|
|
588
|
+
if batching and not batch_size:
|
|
589
|
+
batch_size = len(df)
|
|
590
|
+
|
|
591
|
+
batch = []
|
|
592
|
+
for index, row in df.iterrows():
|
|
593
|
+
data = row.to_dict()
|
|
594
|
+
if batching:
|
|
595
|
+
batch.append(data)
|
|
596
|
+
if len(batch) == batch_size:
|
|
597
|
+
await run(batch)
|
|
598
|
+
batch = []
|
|
599
|
+
else:
|
|
600
|
+
await run(data)
|
|
601
|
+
|
|
602
|
+
if batch:
|
|
603
|
+
await run(batch)
|
|
604
|
+
|
|
605
|
+
termination_result = server.wait_for_completion()
|
|
606
|
+
if asyncio.iscoroutine(termination_result):
|
|
607
|
+
await termination_result
|
|
608
|
+
|
|
609
|
+
return responses
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def execute_graph(
|
|
613
|
+
context: MLClientCtx,
|
|
614
|
+
data: DataItem,
|
|
615
|
+
batching: bool = False,
|
|
616
|
+
batch_size: Optional[int] = None,
|
|
617
|
+
) -> (list[Any], Any):
|
|
618
|
+
"""
|
|
619
|
+
Execute graph as a job, from start to finish.
|
|
620
|
+
|
|
621
|
+
:param context: The job's execution client context.
|
|
622
|
+
:param data: The input data to the job, to be pushed into the graph row by row, or in batches.
|
|
623
|
+
:param batching: Whether to push one or more batches into the graph rather than row by row.
|
|
624
|
+
:param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
|
|
625
|
+
be pushed into the graph in one batch.
|
|
626
|
+
|
|
627
|
+
:return: A list of responses.
|
|
628
|
+
"""
|
|
629
|
+
return asyncio.run(async_execute_graph(context, data, batching, batch_size))
|
|
630
|
+
|
|
631
|
+
|
|
408
632
|
def _set_callbacks(server, context):
|
|
409
633
|
if not server.graph.supports_termination() or not hasattr(context, "platform"):
|
|
410
634
|
return
|