mlrun 1.10.0rc7__py3-none-any.whl → 1.10.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +3 -1
- mlrun/common/db/dialects.py +25 -0
- mlrun/common/schemas/background_task.py +5 -0
- mlrun/common/schemas/function.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +2 -0
- mlrun/common/schemas/model_monitoring/constants.py +16 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +8 -0
- mlrun/common/schemas/partition.py +13 -3
- mlrun/common/schemas/project.py +4 -0
- mlrun/common/schemas/serving.py +2 -0
- mlrun/config.py +11 -22
- mlrun/datastore/utils.py +3 -2
- mlrun/db/__init__.py +1 -0
- mlrun/db/base.py +11 -10
- mlrun/db/httpdb.py +97 -25
- mlrun/db/nopdb.py +5 -4
- mlrun/db/sql_types.py +160 -0
- mlrun/frameworks/tf_keras/__init__.py +4 -4
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
- mlrun/frameworks/tf_keras/mlrun_interface.py +4 -1
- mlrun/frameworks/tf_keras/model_handler.py +80 -9
- mlrun/frameworks/tf_keras/utils.py +12 -1
- mlrun/launcher/base.py +6 -1
- mlrun/launcher/client.py +1 -22
- mlrun/launcher/local.py +0 -4
- mlrun/model_monitoring/applications/base.py +21 -1
- mlrun/model_monitoring/applications/context.py +2 -1
- mlrun/projects/pipelines.py +35 -3
- mlrun/projects/project.py +13 -29
- mlrun/run.py +37 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/kubejob.py +0 -4
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/nuclio/function.py +0 -2
- mlrun/runtimes/nuclio/serving.py +14 -51
- mlrun/runtimes/pod.py +0 -3
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/server.py +159 -123
- mlrun/serving/states.py +215 -18
- mlrun/serving/system_steps.py +391 -0
- mlrun/serving/v2_serving.py +9 -8
- mlrun/utils/helpers.py +19 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/METADATA +22 -18
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/RECORD +52 -50
- mlrun/common/db/sql_session.py +0 -79
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py
CHANGED
|
@@ -21,15 +21,15 @@ import os
|
|
|
21
21
|
import socket
|
|
22
22
|
import traceback
|
|
23
23
|
import uuid
|
|
24
|
-
from typing import
|
|
24
|
+
from typing import Optional, Union
|
|
25
25
|
|
|
26
|
-
import storey
|
|
27
26
|
from nuclio import Context as NuclioContext
|
|
28
27
|
from nuclio.request import Logger as NuclioLogger
|
|
29
28
|
|
|
30
29
|
import mlrun
|
|
31
30
|
import mlrun.common.constants
|
|
32
31
|
import mlrun.common.helpers
|
|
32
|
+
import mlrun.common.schemas
|
|
33
33
|
import mlrun.model_monitoring
|
|
34
34
|
import mlrun.utils
|
|
35
35
|
from mlrun.config import config
|
|
@@ -38,13 +38,19 @@ from mlrun.secrets import SecretsStore
|
|
|
38
38
|
|
|
39
39
|
from ..common.helpers import parse_versioned_object_uri
|
|
40
40
|
from ..common.schemas.model_monitoring.constants import FileTargetKind
|
|
41
|
-
from ..datastore import
|
|
41
|
+
from ..datastore import get_stream_pusher
|
|
42
42
|
from ..datastore.store_resources import ResourceCache
|
|
43
43
|
from ..errors import MLRunInvalidArgumentError
|
|
44
|
-
from ..execution import MLClientCtx
|
|
45
44
|
from ..model import ModelObj
|
|
46
45
|
from ..utils import get_caller_globals
|
|
47
|
-
from .states import
|
|
46
|
+
from .states import (
|
|
47
|
+
FlowStep,
|
|
48
|
+
MonitoredStep,
|
|
49
|
+
RootFlowStep,
|
|
50
|
+
RouterStep,
|
|
51
|
+
get_function,
|
|
52
|
+
graph_root_setter,
|
|
53
|
+
)
|
|
48
54
|
from .utils import event_id_key, event_path_key
|
|
49
55
|
|
|
50
56
|
DUMMY_STREAM = "dummy://"
|
|
@@ -316,11 +322,7 @@ class GraphServer(ModelObj):
|
|
|
316
322
|
|
|
317
323
|
def _process_response(self, context, response, get_body):
|
|
318
324
|
body = response.body
|
|
319
|
-
if (
|
|
320
|
-
isinstance(context, MLClientCtx)
|
|
321
|
-
or isinstance(body, context.Response)
|
|
322
|
-
or get_body
|
|
323
|
-
):
|
|
325
|
+
if isinstance(body, context.Response) or get_body:
|
|
324
326
|
return body
|
|
325
327
|
|
|
326
328
|
if body and not isinstance(body, (str, bytes)):
|
|
@@ -335,34 +337,150 @@ class GraphServer(ModelObj):
|
|
|
335
337
|
return self.graph.wait_for_completion()
|
|
336
338
|
|
|
337
339
|
|
|
338
|
-
def
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
):
|
|
340
|
+
def add_error_raiser_step(
|
|
341
|
+
graph: RootFlowStep, monitored_steps: dict[str, MonitoredStep]
|
|
342
|
+
) -> RootFlowStep:
|
|
343
|
+
monitored_steps_raisers = {}
|
|
344
|
+
user_steps = list(graph.steps.values())
|
|
345
|
+
for monitored_step in monitored_steps.values():
|
|
346
|
+
if monitored_step.raise_exception:
|
|
346
347
|
error_step = graph.add_step(
|
|
347
348
|
class_name="mlrun.serving.states.ModelRunnerErrorRaiser",
|
|
348
|
-
name=f"{
|
|
349
|
-
after=
|
|
349
|
+
name=f"{monitored_step.name}_error_raise",
|
|
350
|
+
after=monitored_step.name,
|
|
350
351
|
full_event=True,
|
|
351
|
-
raise_exception=
|
|
352
|
-
models_names=list(
|
|
352
|
+
raise_exception=monitored_step.raise_exception,
|
|
353
|
+
models_names=list(monitored_step.class_args["models"].keys()),
|
|
354
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
353
355
|
)
|
|
354
|
-
if
|
|
355
|
-
|
|
356
|
+
if monitored_step.responder:
|
|
357
|
+
monitored_step.responder = False
|
|
356
358
|
error_step.respond()
|
|
357
|
-
|
|
358
|
-
error_step.on_error =
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
359
|
+
monitored_steps_raisers[monitored_step.name] = error_step.name
|
|
360
|
+
error_step.on_error = monitored_step.on_error
|
|
361
|
+
for step in user_steps:
|
|
362
|
+
if step.after:
|
|
363
|
+
if isinstance(step.after, list):
|
|
364
|
+
for i in range(len(step.after)):
|
|
365
|
+
if step.after[i] in monitored_steps_raisers:
|
|
366
|
+
step.after[i] = monitored_steps_raisers[step.after[i]]
|
|
367
|
+
else:
|
|
368
|
+
if (
|
|
369
|
+
isinstance(step.after, str)
|
|
370
|
+
and step.after in monitored_steps_raisers
|
|
371
|
+
):
|
|
372
|
+
step.after = monitored_steps_raisers[step.after]
|
|
373
|
+
return graph
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def add_monitoring_general_steps(
|
|
377
|
+
project: str,
|
|
378
|
+
graph: RootFlowStep,
|
|
379
|
+
context,
|
|
380
|
+
serving_spec,
|
|
381
|
+
) -> tuple[RootFlowStep, FlowStep]:
|
|
382
|
+
"""
|
|
383
|
+
Adding the monitoring flow connection steps, this steps allow the graph to reconstruct the serving event enrich it
|
|
384
|
+
and push it to the model monitoring stream
|
|
385
|
+
system_steps structure -
|
|
386
|
+
"background_task_status_step" --> "filter_none" --> "monitoring_pre_processor_step" --> "flatten_events"
|
|
387
|
+
--> "sampling_step" --> "filter_none_sampling" --> "model_monitoring_stream"
|
|
388
|
+
"""
|
|
389
|
+
monitor_flow_step = graph.add_step(
|
|
390
|
+
"mlrun.serving.system_steps.BackgroundTaskStatus",
|
|
391
|
+
"background_task_status_step",
|
|
392
|
+
context=context,
|
|
393
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
394
|
+
)
|
|
395
|
+
graph.add_step(
|
|
396
|
+
"storey.Filter",
|
|
397
|
+
"filter_none",
|
|
398
|
+
_fn="(event is not None)",
|
|
399
|
+
after="background_task_status_step",
|
|
400
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
401
|
+
)
|
|
402
|
+
graph.add_step(
|
|
403
|
+
"mlrun.serving.system_steps.MonitoringPreProcessor",
|
|
404
|
+
"monitoring_pre_processor_step",
|
|
405
|
+
after="filter_none",
|
|
406
|
+
full_event=True,
|
|
407
|
+
context=context,
|
|
408
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
409
|
+
)
|
|
410
|
+
# flatten the events
|
|
411
|
+
graph.add_step(
|
|
412
|
+
"storey.FlatMap",
|
|
413
|
+
"flatten_events",
|
|
414
|
+
_fn="(event)",
|
|
415
|
+
after="monitoring_pre_processor_step",
|
|
416
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
417
|
+
)
|
|
418
|
+
graph.add_step(
|
|
419
|
+
"mlrun.serving.system_steps.SamplingStep",
|
|
420
|
+
"sampling_step",
|
|
421
|
+
after="flatten_events",
|
|
422
|
+
sampling_percentage=float(
|
|
423
|
+
serving_spec.get("parameters", {}).get("sampling_percentage", 100.0)
|
|
424
|
+
if isinstance(serving_spec, dict)
|
|
425
|
+
else getattr(serving_spec, "parameters", {}).get(
|
|
426
|
+
"sampling_percentage", 100.0
|
|
427
|
+
),
|
|
428
|
+
),
|
|
429
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
430
|
+
)
|
|
431
|
+
graph.add_step(
|
|
432
|
+
"storey.Filter",
|
|
433
|
+
"filter_none_sampling",
|
|
434
|
+
_fn="(event is not None)",
|
|
435
|
+
after="sampling_step",
|
|
436
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
if getattr(context, "is_mock", False):
|
|
440
|
+
graph.add_step(
|
|
441
|
+
"mlrun.serving.system_steps.MockStreamPusher",
|
|
442
|
+
"model_monitoring_stream",
|
|
443
|
+
after="filter_none_sampling",
|
|
444
|
+
model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
|
|
445
|
+
)
|
|
446
|
+
else:
|
|
447
|
+
stream_uri = mlrun.model_monitoring.get_stream_path(
|
|
448
|
+
project=project,
|
|
449
|
+
function_name=mlrun.common.schemas.MonitoringFunctionNames.STREAM,
|
|
450
|
+
)
|
|
451
|
+
context.logger.info_with(
|
|
452
|
+
"Creating Model Monitoring stream target using uri:", uri=stream_uri
|
|
453
|
+
)
|
|
454
|
+
graph.add_step(
|
|
455
|
+
">>",
|
|
456
|
+
"model_monitoring_stream",
|
|
457
|
+
path=stream_uri,
|
|
458
|
+
sharding_func=mlrun.common.schemas.model_monitoring.constants.StreamProcessingEvent.ENDPOINT_ID,
|
|
459
|
+
after="filter_none_sampling",
|
|
460
|
+
)
|
|
461
|
+
return graph, monitor_flow_step
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def add_system_steps_to_graph(
|
|
465
|
+
project: str, graph: RootFlowStep, track_models: bool, context, serving_spec
|
|
466
|
+
) -> RootFlowStep:
|
|
467
|
+
monitored_steps = graph.get_monitored_steps()
|
|
468
|
+
graph = add_error_raiser_step(graph, monitored_steps)
|
|
469
|
+
if track_models:
|
|
470
|
+
graph, monitor_flow_step = add_monitoring_general_steps(
|
|
471
|
+
project, graph, context, serving_spec
|
|
472
|
+
)
|
|
473
|
+
# Connect each model runner to the monitoring step:
|
|
474
|
+
for step_name, step in monitored_steps.items():
|
|
475
|
+
if monitor_flow_step.after:
|
|
476
|
+
if isinstance(monitor_flow_step.after, list):
|
|
477
|
+
monitor_flow_step.after.append(step_name)
|
|
478
|
+
elif isinstance(monitor_flow_step.after, str):
|
|
479
|
+
monitor_flow_step.after = [monitor_flow_step.after, step_name]
|
|
480
|
+
else:
|
|
481
|
+
monitor_flow_step.after = [
|
|
482
|
+
step_name,
|
|
483
|
+
]
|
|
366
484
|
return graph
|
|
367
485
|
|
|
368
486
|
|
|
@@ -372,8 +490,14 @@ def v2_serving_init(context, namespace=None):
|
|
|
372
490
|
context.logger.info("Initializing server from spec")
|
|
373
491
|
spec = mlrun.utils.get_serving_spec()
|
|
374
492
|
server = GraphServer.from_dict(spec)
|
|
375
|
-
if isinstance(server.graph, RootFlowStep):
|
|
376
|
-
server.graph = add_system_steps_to_graph(
|
|
493
|
+
if isinstance(server.graph, RootFlowStep) and server.graph.include_monitored_step():
|
|
494
|
+
server.graph = add_system_steps_to_graph(
|
|
495
|
+
server.project,
|
|
496
|
+
copy.deepcopy(server.graph),
|
|
497
|
+
spec.get("track_models"),
|
|
498
|
+
context,
|
|
499
|
+
spec,
|
|
500
|
+
)
|
|
377
501
|
context.logger.info_with(
|
|
378
502
|
"Server graph after adding system steps",
|
|
379
503
|
graph=str(server.graph.steps),
|
|
@@ -411,94 +535,6 @@ def v2_serving_init(context, namespace=None):
|
|
|
411
535
|
_set_callbacks(server, context)
|
|
412
536
|
|
|
413
537
|
|
|
414
|
-
async def async_execute_graph(
|
|
415
|
-
context: MLClientCtx,
|
|
416
|
-
data: DataItem,
|
|
417
|
-
batching: bool,
|
|
418
|
-
batch_size: Optional[int],
|
|
419
|
-
) -> list[Any]:
|
|
420
|
-
spec = mlrun.utils.get_serving_spec()
|
|
421
|
-
|
|
422
|
-
source_filename = spec.get("filename", None)
|
|
423
|
-
namespace = {}
|
|
424
|
-
if source_filename:
|
|
425
|
-
with open(source_filename) as f:
|
|
426
|
-
exec(f.read(), namespace)
|
|
427
|
-
|
|
428
|
-
server = GraphServer.from_dict(spec)
|
|
429
|
-
|
|
430
|
-
if config.log_level.lower() == "debug":
|
|
431
|
-
server.verbose = True
|
|
432
|
-
context.logger.info_with("Initializing states", namespace=namespace)
|
|
433
|
-
kwargs = {}
|
|
434
|
-
if hasattr(context, "is_mock"):
|
|
435
|
-
kwargs["is_mock"] = context.is_mock
|
|
436
|
-
server.init_states(
|
|
437
|
-
context=None, # this context is expected to be a nuclio context, which we don't have in this flow
|
|
438
|
-
namespace=namespace,
|
|
439
|
-
**kwargs,
|
|
440
|
-
)
|
|
441
|
-
context.logger.info("Initializing graph steps")
|
|
442
|
-
server.init_object(namespace)
|
|
443
|
-
|
|
444
|
-
context.logger.info_with("Graph was initialized", verbose=server.verbose)
|
|
445
|
-
|
|
446
|
-
if server.verbose:
|
|
447
|
-
context.logger.info(server.to_yaml())
|
|
448
|
-
|
|
449
|
-
df = data.as_df()
|
|
450
|
-
|
|
451
|
-
responses = []
|
|
452
|
-
|
|
453
|
-
async def run(body):
|
|
454
|
-
event = storey.Event(id=index, body=body)
|
|
455
|
-
response = await server.run(event, context)
|
|
456
|
-
responses.append(response)
|
|
457
|
-
|
|
458
|
-
if batching and not batch_size:
|
|
459
|
-
batch_size = len(df)
|
|
460
|
-
|
|
461
|
-
batch = []
|
|
462
|
-
for index, row in df.iterrows():
|
|
463
|
-
data = row.to_dict()
|
|
464
|
-
if batching:
|
|
465
|
-
batch.append(data)
|
|
466
|
-
if len(batch) == batch_size:
|
|
467
|
-
await run(batch)
|
|
468
|
-
batch = []
|
|
469
|
-
else:
|
|
470
|
-
await run(data)
|
|
471
|
-
|
|
472
|
-
if batch:
|
|
473
|
-
await run(batch)
|
|
474
|
-
|
|
475
|
-
termination_result = server.wait_for_completion()
|
|
476
|
-
if asyncio.iscoroutine(termination_result):
|
|
477
|
-
await termination_result
|
|
478
|
-
|
|
479
|
-
return responses
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
def execute_graph(
|
|
483
|
-
context: MLClientCtx,
|
|
484
|
-
data: DataItem,
|
|
485
|
-
batching: bool = False,
|
|
486
|
-
batch_size: Optional[int] = None,
|
|
487
|
-
) -> (list[Any], Any):
|
|
488
|
-
"""
|
|
489
|
-
Execute graph as a job, from start to finish.
|
|
490
|
-
|
|
491
|
-
:param context: The job's execution client context.
|
|
492
|
-
:param data: The input data to the job, to be pushed into the graph row by row, or in batches.
|
|
493
|
-
:param batching: Whether to push one or more batches into the graph rather than row by row.
|
|
494
|
-
:param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
|
|
495
|
-
be pushed into the graph in one batch.
|
|
496
|
-
|
|
497
|
-
:return: A list of responses.
|
|
498
|
-
"""
|
|
499
|
-
return asyncio.run(async_execute_graph(context, data, batching, batch_size))
|
|
500
|
-
|
|
501
|
-
|
|
502
538
|
def _set_callbacks(server, context):
|
|
503
539
|
if not server.graph.supports_termination() or not hasattr(context, "platform"):
|
|
504
540
|
return
|