mlrun 1.10.0rc7__py3-none-any.whl → 1.10.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (53) hide show
  1. mlrun/__init__.py +3 -1
  2. mlrun/common/db/dialects.py +25 -0
  3. mlrun/common/schemas/background_task.py +5 -0
  4. mlrun/common/schemas/function.py +1 -0
  5. mlrun/common/schemas/model_monitoring/__init__.py +2 -0
  6. mlrun/common/schemas/model_monitoring/constants.py +16 -0
  7. mlrun/common/schemas/model_monitoring/model_endpoints.py +8 -0
  8. mlrun/common/schemas/partition.py +13 -3
  9. mlrun/common/schemas/project.py +4 -0
  10. mlrun/common/schemas/serving.py +2 -0
  11. mlrun/config.py +11 -22
  12. mlrun/datastore/utils.py +3 -2
  13. mlrun/db/__init__.py +1 -0
  14. mlrun/db/base.py +11 -10
  15. mlrun/db/httpdb.py +97 -25
  16. mlrun/db/nopdb.py +5 -4
  17. mlrun/db/sql_types.py +160 -0
  18. mlrun/frameworks/tf_keras/__init__.py +4 -4
  19. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
  20. mlrun/frameworks/tf_keras/mlrun_interface.py +4 -1
  21. mlrun/frameworks/tf_keras/model_handler.py +80 -9
  22. mlrun/frameworks/tf_keras/utils.py +12 -1
  23. mlrun/launcher/base.py +6 -1
  24. mlrun/launcher/client.py +1 -22
  25. mlrun/launcher/local.py +0 -4
  26. mlrun/model_monitoring/applications/base.py +21 -1
  27. mlrun/model_monitoring/applications/context.py +2 -1
  28. mlrun/projects/pipelines.py +35 -3
  29. mlrun/projects/project.py +13 -29
  30. mlrun/run.py +37 -5
  31. mlrun/runtimes/daskjob.py +0 -2
  32. mlrun/runtimes/kubejob.py +0 -4
  33. mlrun/runtimes/mpijob/abstract.py +0 -2
  34. mlrun/runtimes/mpijob/v1.py +0 -2
  35. mlrun/runtimes/nuclio/function.py +0 -2
  36. mlrun/runtimes/nuclio/serving.py +14 -51
  37. mlrun/runtimes/pod.py +0 -3
  38. mlrun/runtimes/remotesparkjob.py +0 -2
  39. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  40. mlrun/serving/__init__.py +2 -0
  41. mlrun/serving/server.py +159 -123
  42. mlrun/serving/states.py +215 -18
  43. mlrun/serving/system_steps.py +391 -0
  44. mlrun/serving/v2_serving.py +9 -8
  45. mlrun/utils/helpers.py +19 -1
  46. mlrun/utils/version/version.json +2 -2
  47. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/METADATA +22 -18
  48. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/RECORD +52 -50
  49. mlrun/common/db/sql_session.py +0 -79
  50. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/WHEEL +0 -0
  51. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/entry_points.txt +0 -0
  52. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/licenses/LICENSE +0 -0
  53. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py CHANGED
@@ -21,15 +21,15 @@ import os
21
21
  import socket
22
22
  import traceback
23
23
  import uuid
24
- from typing import Any, Optional, Union
24
+ from typing import Optional, Union
25
25
 
26
- import storey
27
26
  from nuclio import Context as NuclioContext
28
27
  from nuclio.request import Logger as NuclioLogger
29
28
 
30
29
  import mlrun
31
30
  import mlrun.common.constants
32
31
  import mlrun.common.helpers
32
+ import mlrun.common.schemas
33
33
  import mlrun.model_monitoring
34
34
  import mlrun.utils
35
35
  from mlrun.config import config
@@ -38,13 +38,19 @@ from mlrun.secrets import SecretsStore
38
38
 
39
39
  from ..common.helpers import parse_versioned_object_uri
40
40
  from ..common.schemas.model_monitoring.constants import FileTargetKind
41
- from ..datastore import DataItem, get_stream_pusher
41
+ from ..datastore import get_stream_pusher
42
42
  from ..datastore.store_resources import ResourceCache
43
43
  from ..errors import MLRunInvalidArgumentError
44
- from ..execution import MLClientCtx
45
44
  from ..model import ModelObj
46
45
  from ..utils import get_caller_globals
47
- from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
46
+ from .states import (
47
+ FlowStep,
48
+ MonitoredStep,
49
+ RootFlowStep,
50
+ RouterStep,
51
+ get_function,
52
+ graph_root_setter,
53
+ )
48
54
  from .utils import event_id_key, event_path_key
49
55
 
50
56
  DUMMY_STREAM = "dummy://"
@@ -316,11 +322,7 @@ class GraphServer(ModelObj):
316
322
 
317
323
  def _process_response(self, context, response, get_body):
318
324
  body = response.body
319
- if (
320
- isinstance(context, MLClientCtx)
321
- or isinstance(body, context.Response)
322
- or get_body
323
- ):
325
+ if isinstance(body, context.Response) or get_body:
324
326
  return body
325
327
 
326
328
  if body and not isinstance(body, (str, bytes)):
@@ -335,34 +337,150 @@ class GraphServer(ModelObj):
335
337
  return self.graph.wait_for_completion()
336
338
 
337
339
 
338
- def add_system_steps_to_graph(graph: RootFlowStep):
339
- model_runner_raisers = {}
340
- steps = list(graph.steps.values())
341
- for step in steps:
342
- if (
343
- isinstance(step, mlrun.serving.states.ModelRunnerStep)
344
- and step.raise_exception
345
- ):
340
+ def add_error_raiser_step(
341
+ graph: RootFlowStep, monitored_steps: dict[str, MonitoredStep]
342
+ ) -> RootFlowStep:
343
+ monitored_steps_raisers = {}
344
+ user_steps = list(graph.steps.values())
345
+ for monitored_step in monitored_steps.values():
346
+ if monitored_step.raise_exception:
346
347
  error_step = graph.add_step(
347
348
  class_name="mlrun.serving.states.ModelRunnerErrorRaiser",
348
- name=f"{step.name}_error_raise",
349
- after=step.name,
349
+ name=f"{monitored_step.name}_error_raise",
350
+ after=monitored_step.name,
350
351
  full_event=True,
351
- raise_exception=step.raise_exception,
352
- models_names=list(step.class_args["models"].keys()),
352
+ raise_exception=monitored_step.raise_exception,
353
+ models_names=list(monitored_step.class_args["models"].keys()),
354
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
353
355
  )
354
- if step.responder:
355
- step.responder = False
356
+ if monitored_step.responder:
357
+ monitored_step.responder = False
356
358
  error_step.respond()
357
- model_runner_raisers[step.name] = error_step.name
358
- error_step.on_error = step.on_error
359
- if isinstance(step.after, list):
360
- for i in range(len(step.after)):
361
- if step.after[i] in model_runner_raisers:
362
- step.after[i] = model_runner_raisers[step.after[i]]
363
- else:
364
- if step.after in model_runner_raisers:
365
- step.after = model_runner_raisers[step.after]
359
+ monitored_steps_raisers[monitored_step.name] = error_step.name
360
+ error_step.on_error = monitored_step.on_error
361
+ for step in user_steps:
362
+ if step.after:
363
+ if isinstance(step.after, list):
364
+ for i in range(len(step.after)):
365
+ if step.after[i] in monitored_steps_raisers:
366
+ step.after[i] = monitored_steps_raisers[step.after[i]]
367
+ else:
368
+ if (
369
+ isinstance(step.after, str)
370
+ and step.after in monitored_steps_raisers
371
+ ):
372
+ step.after = monitored_steps_raisers[step.after]
373
+ return graph
374
+
375
+
376
+ def add_monitoring_general_steps(
377
+ project: str,
378
+ graph: RootFlowStep,
379
+ context,
380
+ serving_spec,
381
+ ) -> tuple[RootFlowStep, FlowStep]:
382
+ """
383
+ Adding the monitoring flow connection steps, this steps allow the graph to reconstruct the serving event enrich it
384
+ and push it to the model monitoring stream
385
+ system_steps structure -
386
+ "background_task_status_step" --> "filter_none" --> "monitoring_pre_processor_step" --> "flatten_events"
387
+ --> "sampling_step" --> "filter_none_sampling" --> "model_monitoring_stream"
388
+ """
389
+ monitor_flow_step = graph.add_step(
390
+ "mlrun.serving.system_steps.BackgroundTaskStatus",
391
+ "background_task_status_step",
392
+ context=context,
393
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
394
+ )
395
+ graph.add_step(
396
+ "storey.Filter",
397
+ "filter_none",
398
+ _fn="(event is not None)",
399
+ after="background_task_status_step",
400
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
401
+ )
402
+ graph.add_step(
403
+ "mlrun.serving.system_steps.MonitoringPreProcessor",
404
+ "monitoring_pre_processor_step",
405
+ after="filter_none",
406
+ full_event=True,
407
+ context=context,
408
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
409
+ )
410
+ # flatten the events
411
+ graph.add_step(
412
+ "storey.FlatMap",
413
+ "flatten_events",
414
+ _fn="(event)",
415
+ after="monitoring_pre_processor_step",
416
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
417
+ )
418
+ graph.add_step(
419
+ "mlrun.serving.system_steps.SamplingStep",
420
+ "sampling_step",
421
+ after="flatten_events",
422
+ sampling_percentage=float(
423
+ serving_spec.get("parameters", {}).get("sampling_percentage", 100.0)
424
+ if isinstance(serving_spec, dict)
425
+ else getattr(serving_spec, "parameters", {}).get(
426
+ "sampling_percentage", 100.0
427
+ ),
428
+ ),
429
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
430
+ )
431
+ graph.add_step(
432
+ "storey.Filter",
433
+ "filter_none_sampling",
434
+ _fn="(event is not None)",
435
+ after="sampling_step",
436
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
437
+ )
438
+
439
+ if getattr(context, "is_mock", False):
440
+ graph.add_step(
441
+ "mlrun.serving.system_steps.MockStreamPusher",
442
+ "model_monitoring_stream",
443
+ after="filter_none_sampling",
444
+ model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
445
+ )
446
+ else:
447
+ stream_uri = mlrun.model_monitoring.get_stream_path(
448
+ project=project,
449
+ function_name=mlrun.common.schemas.MonitoringFunctionNames.STREAM,
450
+ )
451
+ context.logger.info_with(
452
+ "Creating Model Monitoring stream target using uri:", uri=stream_uri
453
+ )
454
+ graph.add_step(
455
+ ">>",
456
+ "model_monitoring_stream",
457
+ path=stream_uri,
458
+ sharding_func=mlrun.common.schemas.model_monitoring.constants.StreamProcessingEvent.ENDPOINT_ID,
459
+ after="filter_none_sampling",
460
+ )
461
+ return graph, monitor_flow_step
462
+
463
+
464
+ def add_system_steps_to_graph(
465
+ project: str, graph: RootFlowStep, track_models: bool, context, serving_spec
466
+ ) -> RootFlowStep:
467
+ monitored_steps = graph.get_monitored_steps()
468
+ graph = add_error_raiser_step(graph, monitored_steps)
469
+ if track_models:
470
+ graph, monitor_flow_step = add_monitoring_general_steps(
471
+ project, graph, context, serving_spec
472
+ )
473
+ # Connect each model runner to the monitoring step:
474
+ for step_name, step in monitored_steps.items():
475
+ if monitor_flow_step.after:
476
+ if isinstance(monitor_flow_step.after, list):
477
+ monitor_flow_step.after.append(step_name)
478
+ elif isinstance(monitor_flow_step.after, str):
479
+ monitor_flow_step.after = [monitor_flow_step.after, step_name]
480
+ else:
481
+ monitor_flow_step.after = [
482
+ step_name,
483
+ ]
366
484
  return graph
367
485
 
368
486
 
@@ -372,8 +490,14 @@ def v2_serving_init(context, namespace=None):
372
490
  context.logger.info("Initializing server from spec")
373
491
  spec = mlrun.utils.get_serving_spec()
374
492
  server = GraphServer.from_dict(spec)
375
- if isinstance(server.graph, RootFlowStep):
376
- server.graph = add_system_steps_to_graph(copy.deepcopy(server.graph))
493
+ if isinstance(server.graph, RootFlowStep) and server.graph.include_monitored_step():
494
+ server.graph = add_system_steps_to_graph(
495
+ server.project,
496
+ copy.deepcopy(server.graph),
497
+ spec.get("track_models"),
498
+ context,
499
+ spec,
500
+ )
377
501
  context.logger.info_with(
378
502
  "Server graph after adding system steps",
379
503
  graph=str(server.graph.steps),
@@ -411,94 +535,6 @@ def v2_serving_init(context, namespace=None):
411
535
  _set_callbacks(server, context)
412
536
 
413
537
 
414
- async def async_execute_graph(
415
- context: MLClientCtx,
416
- data: DataItem,
417
- batching: bool,
418
- batch_size: Optional[int],
419
- ) -> list[Any]:
420
- spec = mlrun.utils.get_serving_spec()
421
-
422
- source_filename = spec.get("filename", None)
423
- namespace = {}
424
- if source_filename:
425
- with open(source_filename) as f:
426
- exec(f.read(), namespace)
427
-
428
- server = GraphServer.from_dict(spec)
429
-
430
- if config.log_level.lower() == "debug":
431
- server.verbose = True
432
- context.logger.info_with("Initializing states", namespace=namespace)
433
- kwargs = {}
434
- if hasattr(context, "is_mock"):
435
- kwargs["is_mock"] = context.is_mock
436
- server.init_states(
437
- context=None, # this context is expected to be a nuclio context, which we don't have in this flow
438
- namespace=namespace,
439
- **kwargs,
440
- )
441
- context.logger.info("Initializing graph steps")
442
- server.init_object(namespace)
443
-
444
- context.logger.info_with("Graph was initialized", verbose=server.verbose)
445
-
446
- if server.verbose:
447
- context.logger.info(server.to_yaml())
448
-
449
- df = data.as_df()
450
-
451
- responses = []
452
-
453
- async def run(body):
454
- event = storey.Event(id=index, body=body)
455
- response = await server.run(event, context)
456
- responses.append(response)
457
-
458
- if batching and not batch_size:
459
- batch_size = len(df)
460
-
461
- batch = []
462
- for index, row in df.iterrows():
463
- data = row.to_dict()
464
- if batching:
465
- batch.append(data)
466
- if len(batch) == batch_size:
467
- await run(batch)
468
- batch = []
469
- else:
470
- await run(data)
471
-
472
- if batch:
473
- await run(batch)
474
-
475
- termination_result = server.wait_for_completion()
476
- if asyncio.iscoroutine(termination_result):
477
- await termination_result
478
-
479
- return responses
480
-
481
-
482
- def execute_graph(
483
- context: MLClientCtx,
484
- data: DataItem,
485
- batching: bool = False,
486
- batch_size: Optional[int] = None,
487
- ) -> (list[Any], Any):
488
- """
489
- Execute graph as a job, from start to finish.
490
-
491
- :param context: The job's execution client context.
492
- :param data: The input data to the job, to be pushed into the graph row by row, or in batches.
493
- :param batching: Whether to push one or more batches into the graph rather than row by row.
494
- :param batch_size: The number of rows to push per batch. If not set, and batching=True, the entire dataset will
495
- be pushed into the graph in one batch.
496
-
497
- :return: A list of responses.
498
- """
499
- return asyncio.run(async_execute_graph(context, data, batching, batch_size))
500
-
501
-
502
538
  def _set_callbacks(server, context):
503
539
  if not server.graph.supports_termination() or not hasattr(context, "platform"):
504
540
  return