mlrun 1.10.0rc6__py3-none-any.whl → 1.10.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (52) hide show
  1. mlrun/__init__.py +3 -1
  2. mlrun/__main__.py +47 -4
  3. mlrun/artifacts/base.py +0 -27
  4. mlrun/artifacts/dataset.py +0 -8
  5. mlrun/artifacts/model.py +0 -7
  6. mlrun/artifacts/plots.py +0 -13
  7. mlrun/common/schemas/background_task.py +5 -0
  8. mlrun/common/schemas/model_monitoring/__init__.py +2 -0
  9. mlrun/common/schemas/model_monitoring/constants.py +16 -0
  10. mlrun/common/schemas/project.py +4 -0
  11. mlrun/common/schemas/serving.py +2 -0
  12. mlrun/config.py +11 -22
  13. mlrun/datastore/utils.py +3 -1
  14. mlrun/db/base.py +0 -19
  15. mlrun/db/httpdb.py +73 -65
  16. mlrun/db/nopdb.py +0 -12
  17. mlrun/frameworks/tf_keras/__init__.py +4 -4
  18. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
  19. mlrun/frameworks/tf_keras/model_handler.py +69 -9
  20. mlrun/frameworks/tf_keras/utils.py +12 -1
  21. mlrun/launcher/base.py +7 -0
  22. mlrun/launcher/client.py +2 -21
  23. mlrun/launcher/local.py +4 -0
  24. mlrun/model_monitoring/applications/_application_steps.py +23 -39
  25. mlrun/model_monitoring/applications/base.py +167 -32
  26. mlrun/model_monitoring/helpers.py +0 -3
  27. mlrun/projects/operations.py +11 -24
  28. mlrun/projects/pipelines.py +33 -3
  29. mlrun/projects/project.py +45 -89
  30. mlrun/run.py +37 -5
  31. mlrun/runtimes/daskjob.py +2 -0
  32. mlrun/runtimes/kubejob.py +5 -8
  33. mlrun/runtimes/mpijob/abstract.py +2 -0
  34. mlrun/runtimes/mpijob/v1.py +2 -0
  35. mlrun/runtimes/nuclio/function.py +2 -0
  36. mlrun/runtimes/nuclio/serving.py +60 -5
  37. mlrun/runtimes/pod.py +3 -0
  38. mlrun/runtimes/remotesparkjob.py +2 -0
  39. mlrun/runtimes/sparkjob/spark3job.py +2 -0
  40. mlrun/serving/__init__.py +2 -0
  41. mlrun/serving/server.py +253 -29
  42. mlrun/serving/states.py +215 -18
  43. mlrun/serving/system_steps.py +391 -0
  44. mlrun/serving/v2_serving.py +9 -8
  45. mlrun/utils/helpers.py +18 -4
  46. mlrun/utils/version/version.json +2 -2
  47. {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/METADATA +9 -9
  48. {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/RECORD +52 -51
  49. {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/WHEEL +0 -0
  50. {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/entry_points.txt +0 -0
  51. {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/licenses/LICENSE +0 -0
  52. {mlrun-1.10.0rc6.dist-info → mlrun-1.10.0rc8.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py CHANGED
@@ -21,14 +21,16 @@ import os
21
21
  import socket
22
22
  import traceback
23
23
  import uuid
24
- from typing import Optional, Union
24
+ from typing import Any, Optional, Union
25
25
 
26
+ import storey
26
27
  from nuclio import Context as NuclioContext
27
28
  from nuclio.request import Logger as NuclioLogger
28
29
 
29
30
  import mlrun
30
31
  import mlrun.common.constants
31
32
  import mlrun.common.helpers
33
+ import mlrun.common.schemas
32
34
  import mlrun.model_monitoring
33
35
  import mlrun.utils
34
36
  from mlrun.config import config
@@ -37,12 +39,20 @@ from mlrun.secrets import SecretsStore
37
39
 
38
40
  from ..common.helpers import parse_versioned_object_uri
39
41
  from ..common.schemas.model_monitoring.constants import FileTargetKind
40
- from ..datastore import get_stream_pusher
42
+ from ..datastore import DataItem, get_stream_pusher
41
43
  from ..datastore.store_resources import ResourceCache
42
44
  from ..errors import MLRunInvalidArgumentError
45
+ from ..execution import MLClientCtx
43
46
  from ..model import ModelObj
44
47
  from ..utils import get_caller_globals
45
- from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
48
+ from .states import (
49
+ FlowStep,
50
+ MonitoredStep,
51
+ RootFlowStep,
52
+ RouterStep,
53
+ get_function,
54
+ graph_root_setter,
55
+ )
46
56
  from .utils import event_id_key, event_path_key
47
57
 
48
58
  DUMMY_STREAM = "dummy://"
@@ -314,7 +324,11 @@ class GraphServer(ModelObj):
314
324
 
315
325
  def _process_response(self, context, response, get_body):
316
326
  body = response.body
317
- if isinstance(body, context.Response) or get_body:
327
+ if (
328
+ isinstance(context, MLClientCtx)
329
+ or isinstance(body, context.Response)
330
+ or get_body
331
+ ):
318
332
  return body
319
333
 
320
334
  if body and not isinstance(body, (str, bytes)):
@@ -329,34 +343,150 @@ class GraphServer(ModelObj):
329
343
  return self.graph.wait_for_completion()
330
344
 
331
345
 
332
- def add_system_steps_to_graph(graph: RootFlowStep):
333
- model_runner_raisers = {}
334
- steps = list(graph.steps.values())
335
- for step in steps:
336
- if (
337
- isinstance(step, mlrun.serving.states.ModelRunnerStep)
338
- and step.raise_exception
339
- ):
346
+ def add_error_raiser_step(
347
+ graph: RootFlowStep, monitored_steps: dict[str, MonitoredStep]
348
+ ) -> RootFlowStep:
349
+ monitored_steps_raisers = {}
350
+ user_steps = list(graph.steps.values())
351
+ for monitored_step in monitored_steps.values():
352
+ if monitored_step.raise_exception:
340
353
  error_step = graph.add_step(
341
354
  class_name="mlrun.serving.states.ModelRunnerErrorRaiser",
342
- name=f"{step.name}_error_raise",
343
- after=step.name,
355
+ name=f"{monitored_step.name}_error_raise",
356
+ after=monitored_step.name,
344
357
  full_event=True,
345
- raise_exception=step.raise_exception,
346
- models_names=list(step.class_args["models"].keys()),
358
+ raise_exception=monitored_step.raise_exception,
359
+ models_names=list(monitored_step.class_args["models"].keys()),
360
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
347
361
  )
348
- if step.responder:
349
- step.responder = False
362
+ if monitored_step.responder:
363
+ monitored_step.responder = False
350
364
  error_step.respond()
351
- model_runner_raisers[step.name] = error_step.name
352
- error_step.on_error = step.on_error
353
- if isinstance(step.after, list):
354
- for i in range(len(step.after)):
355
- if step.after[i] in model_runner_raisers:
356
- step.after[i] = model_runner_raisers[step.after[i]]
357
- else:
358
- if step.after in model_runner_raisers:
359
- step.after = model_runner_raisers[step.after]
365
+ monitored_steps_raisers[monitored_step.name] = error_step.name
366
+ error_step.on_error = monitored_step.on_error
367
+ for step in user_steps:
368
+ if step.after:
369
+ if isinstance(step.after, list):
370
+ for i in range(len(step.after)):
371
+ if step.after[i] in monitored_steps_raisers:
372
+ step.after[i] = monitored_steps_raisers[step.after[i]]
373
+ else:
374
+ if (
375
+ isinstance(step.after, str)
376
+ and step.after in monitored_steps_raisers
377
+ ):
378
+ step.after = monitored_steps_raisers[step.after]
379
+ return graph
380
+
381
+
382
+ def add_monitoring_general_steps(
383
+ project: str,
384
+ graph: RootFlowStep,
385
+ context,
386
+ serving_spec,
387
+ ) -> tuple[RootFlowStep, FlowStep]:
388
+ """
389
+ Adding the monitoring flow connection steps, this steps allow the graph to reconstruct the serving event enrich it
390
+ and push it to the model monitoring stream
391
+ system_steps structure -
392
+ "background_task_status_step" --> "filter_none" --> "monitoring_pre_processor_step" --> "flatten_events"
393
+ --> "sampling_step" --> "filter_none_sampling" --> "model_monitoring_stream"
394
+ """
395
+ monitor_flow_step = graph.add_step(
396
+ "mlrun.serving.system_steps.BackgroundTaskStatus",
397
+ "background_task_status_step",
398
+ context=context,
399
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
400
+ )
401
+ graph.add_step(
402
+ "storey.Filter",
403
+ "filter_none",
404
+ _fn="(event is not None)",
405
+ after="background_task_status_step",
406
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
407
+ )
408
+ graph.add_step(
409
+ "mlrun.serving.system_steps.MonitoringPreProcessor",
410
+ "monitoring_pre_processor_step",
411
+ after="filter_none",
412
+ full_event=True,
413
+ context=context,
414
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
415
+ )
416
+ # flatten the events
417
+ graph.add_step(
418
+ "storey.FlatMap",
419
+ "flatten_events",
420
+ _fn="(event)",
421
+ after="monitoring_pre_processor_step",
422
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
423
+ )
424
+ graph.add_step(
425
+ "mlrun.serving.system_steps.SamplingStep",
426
+ "sampling_step",
427
+ after="flatten_events",
428
+ sampling_percentage=float(
429
+ serving_spec.get("parameters", {}).get("sampling_percentage", 100.0)
430
+ if isinstance(serving_spec, dict)
431
+ else getattr(serving_spec, "parameters", {}).get(
432
+ "sampling_percentage", 100.0
433
+ ),
434
+ ),
435
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
436
+ )
437
+ graph.add_step(
438
+ "storey.Filter",
439
+ "filter_none_sampling",
440
+ _fn="(event is not None)",
441
+ after="sampling_step",
442
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
443
+ )
444
+
445
+ if getattr(context, "is_mock", False):
446
+ graph.add_step(
447
+ "mlrun.serving.system_steps.MockStreamPusher",
448
+ "model_monitoring_stream",
449
+ after="filter_none_sampling",
450
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
451
+ )
452
+ else:
453
+ stream_uri = mlrun.model_monitoring.get_stream_path(
454
+ project=project,
455
+ function_name=mlrun.common.schemas.MonitoringFunctionNames.STREAM,
456
+ )
457
+ context.logger.info_with(
458
+ "Creating Model Monitoring stream target using uri:", uri=stream_uri
459
+ )
460
+ graph.add_step(
461
+ ">>",
462
+ "model_monitoring_stream",
463
+ path=stream_uri,
464
+ sharding_func=mlrun.common.schemas.model_monitoring.constants.StreamProcessingEvent.ENDPOINT_ID,
465
+ after="filter_none_sampling",
466
+ )
467
+ return graph, monitor_flow_step
468
+
469
+
470
+ def add_system_steps_to_graph(
471
+ project: str, graph: RootFlowStep, track_models: bool, context, serving_spec
472
+ ) -> RootFlowStep:
473
+ monitored_steps = graph.get_monitored_steps()
474
+ graph = add_error_raiser_step(graph, monitored_steps)
475
+ if track_models:
476
+ graph, monitor_flow_step = add_monitoring_general_steps(
477
+ project, graph, context, serving_spec
478
+ )
479
+ # Connect each model runner to the monitoring step:
480
+ for step_name, step in monitored_steps.items():
481
+ if monitor_flow_step.after:
482
+ if isinstance(monitor_flow_step.after, list):
483
+ monitor_flow_step.after.append(step_name)
484
+ elif isinstance(monitor_flow_step.after, str):
485
+ monitor_flow_step.after = [monitor_flow_step.after, step_name]
486
+ else:
487
+ monitor_flow_step.after = [
488
+ step_name,
489
+ ]
360
490
  return graph
361
491
 
362
492
 
@@ -366,8 +496,14 @@ def v2_serving_init(context, namespace=None):
366
496
  context.logger.info("Initializing server from spec")
367
497
  spec = mlrun.utils.get_serving_spec()
368
498
  server = GraphServer.from_dict(spec)
369
- if isinstance(server.graph, RootFlowStep):
370
- server.graph = add_system_steps_to_graph(copy.deepcopy(server.graph))
499
+ if isinstance(server.graph, RootFlowStep) and server.graph.include_monitored_step():
500
+ server.graph = add_system_steps_to_graph(
501
+ server.project,
502
+ copy.deepcopy(server.graph),
503
+ spec.get("track_models"),
504
+ context,
505
+ spec,
506
+ )
371
507
  context.logger.info_with(
372
508
  "Server graph after adding system steps",
373
509
  graph=str(server.graph.steps),
@@ -405,6 +541,94 @@ def v2_serving_init(context, namespace=None):
405
541
  _set_callbacks(server, context)
406
542
 
407
543
 
544
+ async def async_execute_graph(
545
+ context: MLClientCtx,
546
+ data: DataItem,
547
+ batching: bool,
548
+ batch_size: Optional[int],
549
+ ) -> list[Any]:
550
+ spec = mlrun.utils.get_serving_spec()
551
+
552
+ source_filename = spec.get("filename", None)
553
+ namespace = {}
554
+ if source_filename:
555
+ with open(source_filename) as f:
556
+ exec(f.read(), namespace)
557
+
558
+ server = GraphServer.from_dict(spec)
559
+
560
+ if config.log_level.lower() == "debug":
561
+ server.verbose = True
562
+ context.logger.info_with("Initializing states", namespace=namespace)
563
+ kwargs = {}
564
+ if hasattr(context, "is_mock"):
565
+ kwargs["is_mock"] = context.is_mock
566
+ server.init_states(
567
+ context=None, # this context is expected to be a nuclio context, which we don't have in this flow
568
+ namespace=namespace,
569
+ **kwargs,
570
+ )
571
+ context.logger.info("Initializing graph steps")
572
+ server.init_object(namespace)
573
+
574
+ context.logger.info_with("Graph was initialized", verbose=server.verbose)
575
+
576
+ if server.verbose:
577
+ context.logger.info(server.to_yaml())
578
+
579
+ df = data.as_df()
580
+
581
+ responses = []
582
+
583
+ async def run(body):
584
+ event = storey.Event(id=index, body=body)
585
+ response = await server.run(event, context)
586
+ responses.append(response)
587
+
588
+ if batching and not batch_size:
589
+ batch_size = len(df)
590
+
591
+ batch = []
592
+ for index, row in df.iterrows():
593
+ data = row.to_dict()
594
+ if batching:
595
+ batch.append(data)
596
+ if len(batch) == batch_size:
597
+ await run(batch)
598
+ batch = []
599
+ else:
600
+ await run(data)
601
+
602
+ if batch:
603
+ await run(batch)
604
+
605
+ termination_result = server.wait_for_completion()
606
+ if asyncio.iscoroutine(termination_result):
607
+ await termination_result
608
+
609
+ return responses
610
+
611
+
612
+ def execute_graph(
613
+ context: MLClientCtx,
614
+ data: DataItem,
615
+ batching: bool = False,
616
+ batch_size: Optional[int] = None,
617
+ ) -> (list[Any], Any):
618
+ """
619
+ Execute graph as a job, from start to finish.
620
+
621
+ :param context: The job's execution client context.
622
+ :param data: The input data to the job, to be pushed into the graph row by row, or in batches.
623
+ :param batching: Whether to push one or more batches into the graph rather than row by row.
624
+ :param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
625
+ be pushed into the graph in one batch.
626
+
627
+ :return: A list of responses.
628
+ """
629
+ return asyncio.run(async_execute_graph(context, data, batching, batch_size))
630
+
631
+
408
632
  def _set_callbacks(server, context):
409
633
  if not server.graph.supports_termination() or not hasattr(context, "platform"):
410
634
  return