mlrun 1.10.0rc7__py3-none-any.whl → 1.10.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (53) hide show
  1. mlrun/__init__.py +3 -1
  2. mlrun/common/db/dialects.py +25 -0
  3. mlrun/common/schemas/background_task.py +5 -0
  4. mlrun/common/schemas/function.py +1 -0
  5. mlrun/common/schemas/model_monitoring/__init__.py +2 -0
  6. mlrun/common/schemas/model_monitoring/constants.py +16 -0
  7. mlrun/common/schemas/model_monitoring/model_endpoints.py +8 -0
  8. mlrun/common/schemas/partition.py +13 -3
  9. mlrun/common/schemas/project.py +4 -0
  10. mlrun/common/schemas/serving.py +2 -0
  11. mlrun/config.py +11 -22
  12. mlrun/datastore/utils.py +3 -2
  13. mlrun/db/__init__.py +1 -0
  14. mlrun/db/base.py +11 -10
  15. mlrun/db/httpdb.py +97 -25
  16. mlrun/db/nopdb.py +5 -4
  17. mlrun/db/sql_types.py +160 -0
  18. mlrun/frameworks/tf_keras/__init__.py +4 -4
  19. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +23 -20
  20. mlrun/frameworks/tf_keras/mlrun_interface.py +4 -1
  21. mlrun/frameworks/tf_keras/model_handler.py +80 -9
  22. mlrun/frameworks/tf_keras/utils.py +12 -1
  23. mlrun/launcher/base.py +6 -1
  24. mlrun/launcher/client.py +1 -22
  25. mlrun/launcher/local.py +0 -4
  26. mlrun/model_monitoring/applications/base.py +21 -1
  27. mlrun/model_monitoring/applications/context.py +2 -1
  28. mlrun/projects/pipelines.py +35 -3
  29. mlrun/projects/project.py +13 -29
  30. mlrun/run.py +37 -5
  31. mlrun/runtimes/daskjob.py +0 -2
  32. mlrun/runtimes/kubejob.py +0 -4
  33. mlrun/runtimes/mpijob/abstract.py +0 -2
  34. mlrun/runtimes/mpijob/v1.py +0 -2
  35. mlrun/runtimes/nuclio/function.py +0 -2
  36. mlrun/runtimes/nuclio/serving.py +14 -51
  37. mlrun/runtimes/pod.py +0 -3
  38. mlrun/runtimes/remotesparkjob.py +0 -2
  39. mlrun/runtimes/sparkjob/spark3job.py +0 -2
  40. mlrun/serving/__init__.py +2 -0
  41. mlrun/serving/server.py +159 -123
  42. mlrun/serving/states.py +215 -18
  43. mlrun/serving/system_steps.py +391 -0
  44. mlrun/serving/v2_serving.py +9 -8
  45. mlrun/utils/helpers.py +19 -1
  46. mlrun/utils/version/version.json +2 -2
  47. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/METADATA +22 -18
  48. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/RECORD +52 -50
  49. mlrun/common/db/sql_session.py +0 -79
  50. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/WHEEL +0 -0
  51. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/entry_points.txt +0 -0
  52. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/licenses/LICENSE +0 -0
  53. {mlrun-1.10.0rc7.dist-info → mlrun-1.10.0rc9.dist-info}/top_level.txt +0 -0
mlrun/serving/states.py CHANGED
@@ -23,6 +23,7 @@ __all__ = [
23
23
  import os
24
24
  import pathlib
25
25
  import traceback
26
+ from abc import ABC
26
27
  from copy import copy, deepcopy
27
28
  from inspect import getfullargspec, signature
28
29
  from typing import Any, Optional, Union, cast
@@ -392,7 +393,8 @@ class BaseStep(ModelObj):
392
393
  raise GraphError(
393
394
  f"step {self.name} parent is not set or it's not part of a graph"
394
395
  )
395
-
396
+ if not name and isinstance(class_name, BaseStep):
397
+ name = class_name.name
396
398
  name, step = params_to_step(
397
399
  class_name,
398
400
  name,
@@ -405,7 +407,8 @@ class BaseStep(ModelObj):
405
407
  class_args=class_args,
406
408
  model_endpoint_creation_strategy=model_endpoint_creation_strategy,
407
409
  )
408
-
410
+ # Make sure model endpoint was not introduce in ModelRunnerStep
411
+ self.check_model_endpoint_existence(step, model_endpoint_creation_strategy)
409
412
  self.verify_model_runner_step(step)
410
413
 
411
414
  step = parent._steps.update(name, step)
@@ -452,17 +455,58 @@ class BaseStep(ModelObj):
452
455
  def supports_termination(self):
453
456
  return False
454
457
 
455
- def verify_model_runner_step(self, step: "ModelRunnerStep"):
458
+ def check_model_endpoint_existence(self, step, model_endpoint_creation_strategy):
456
459
  """
457
- Verify ModelRunnerStep, can be part of Flow graph and models can not repeat in graph.
460
+ Verify that model endpoint name is not duplicate, in flow graph.
458
461
  :param step: ModelRunnerStep to verify
462
+ :param model_endpoint_creation_strategy: model_endpoint_creation_strategy: Strategy for creating or updating
463
+ the model endpoint:
459
464
  """
460
- if not isinstance(step, ModelRunnerStep):
465
+ if (
466
+ isinstance(step, TaskStep)
467
+ and not isinstance(step, ModelRunnerStep)
468
+ and model_endpoint_creation_strategy
469
+ != schemas.ModelEndpointCreationStrategy.SKIP
470
+ ):
471
+ root = self._extract_root_step()
472
+ if not isinstance(root, RootFlowStep):
473
+ return
474
+ models = []
475
+ if isinstance(step, RouterStep):
476
+ for route in step.routes.values():
477
+ if route.name in root.model_endpoints_names:
478
+ raise GraphError(
479
+ f"The graph already contains the model endpoints named - {route.name}."
480
+ )
481
+ models.append(route.name)
482
+ else:
483
+ if step.name in root.model_endpoints_names:
484
+ raise GraphError(
485
+ f"The graph already contains the model endpoints named - {step.name}."
486
+ )
487
+ models.append(step.name)
488
+ root.update_model_endpoints_routes_names(models)
461
489
  return
462
490
 
491
+ def _extract_root_step(self):
463
492
  root = self
464
493
  while root.parent is not None:
465
494
  root = root.parent
495
+ return root
496
+
497
+ def verify_model_runner_step(
498
+ self,
499
+ step: "ModelRunnerStep",
500
+ ):
501
+ """
502
+ Verify ModelRunnerStep, can be part of Flow graph and models can not repeat in graph.
503
+ :param step: ModelRunnerStep to verify
504
+ """
505
+
506
+ if not isinstance(step, ModelRunnerStep):
507
+ return
508
+
509
+ root = self._extract_root_step()
466
510
 
467
511
  if not isinstance(root, RootFlowStep):
468
512
  raise GraphError(
@@ -474,13 +518,14 @@ class BaseStep(ModelObj):
474
518
  # Get all model_endpoints names that are in both lists
475
519
  common_endpoints_names = list(
476
520
  set(root.model_endpoints_names) & set(step_model_endpoints_names)
521
+ ) or list(
522
+ set(root.model_endpoints_routes_names) & set(step_model_endpoints_names)
477
523
  )
478
524
  if common_endpoints_names:
479
525
  raise GraphError(
480
526
  f"The graph already contains the model endpoints named - {common_endpoints_names}."
481
527
  )
482
- else:
483
- root.extend_model_endpoints_names(step_model_endpoints_names)
528
+ root.update_model_endpoints_names(step_model_endpoints_names)
484
529
 
485
530
 
486
531
  class TaskStep(BaseStep):
@@ -663,7 +708,7 @@ class TaskStep(BaseStep):
663
708
  # todo invoke remote via REST call
664
709
  return event
665
710
 
666
- if self.context.verbose:
711
+ if self.context and self.context.verbose:
667
712
  self.context.logger.info(f"step {self.name} got event {event.body}")
668
713
 
669
714
  # inject context parameter if it is expected by the handler
@@ -873,7 +918,6 @@ class RouterStep(TaskStep):
873
918
  2. Create a new model endpoint with the same name and set it to `latest`.
874
919
 
875
920
  """
876
-
877
921
  if len(self.routes.keys()) >= MAX_MODELS_PER_ROUTER and key not in self.routes:
878
922
  raise mlrun.errors.MLRunModelLimitExceededError(
879
923
  f"Router cannot support more than {MAX_MODELS_PER_ROUTER} model endpoints. "
@@ -887,14 +931,16 @@ class RouterStep(TaskStep):
887
931
  route = TaskStep(
888
932
  class_name,
889
933
  class_args,
934
+ name=key,
890
935
  handler=handler,
891
936
  model_endpoint_creation_strategy=creation_strategy,
892
937
  endpoint_type=schemas.EndpointType.LEAF_EP
893
938
  if self.class_name and "serving.VotingEnsemble" in self.class_name
894
939
  else schemas.EndpointType.NODE_EP,
895
940
  )
896
- route.function = function or route.function
897
941
 
942
+ route.function = function or route.function
943
+ self.check_model_endpoint_existence(route, creation_strategy)
898
944
  route = self._routes.update(key, route)
899
945
  route.set_parent(self)
900
946
  return route
@@ -1057,16 +1103,67 @@ class ModelRunner(storey.ParallelExecution):
1057
1103
  event. Optional. If not passed, all models will be run.
1058
1104
  """
1059
1105
 
1060
- def __init__(self, *args, model_selector: Optional[ModelSelector] = None, **kwargs):
1106
+ def __init__(
1107
+ self, *args, context, model_selector: Optional[ModelSelector] = None, **kwargs
1108
+ ):
1061
1109
  super().__init__(*args, **kwargs)
1062
1110
  self.model_selector = model_selector or ModelSelector()
1111
+ self.context = context
1112
+
1113
+ def preprocess_event(self, event):
1114
+ if not hasattr(event, "_metadata"):
1115
+ event._metadata = {}
1116
+
1117
+ event._metadata["model_runner_name"] = self.name
1118
+ event._metadata["inputs"] = deepcopy(event.body)
1119
+
1120
+ return event
1063
1121
 
1064
1122
  def select_runnables(self, event):
1065
1123
  models = cast(list[Model], self.runnables)
1066
1124
  return self.model_selector.select(event, models)
1067
1125
 
1068
1126
 
1069
- class ModelRunnerStep(TaskStep, StepToDict):
1127
+ class MonitoredStep(ABC, TaskStep, StepToDict):
1128
+ kind = "monitored"
1129
+ _dict_fields = TaskStep._dict_fields + ["raise_exception"]
1130
+
1131
+ def __init__(self, *args, name: str, raise_exception=True, **kwargs):
1132
+ super().__init__(*args, name=name, **kwargs)
1133
+ self.raise_exception = raise_exception
1134
+ self._monitoring_data = None
1135
+
1136
+ def _calculate_monitoring_data(self) -> dict[str, Any]:
1137
+ """
1138
+ Child class must override `_calculate_monitoring_data()` method and provide meaningful data-structure
1139
+ to the pre-process step in the monitoring flow.
1140
+
1141
+ Monitoring data structure should support the following schema:
1142
+
1143
+ ::
1144
+
1145
+ {
1146
+ "inputs": inputs features,
1147
+ "outputs": output schema expected,
1148
+ "input_path": the path where inputs are,
1149
+ "result_path": the path where results are,
1150
+ "creation_strategy": model endpoint creation strategy,
1151
+ "labels": model endpoint labels,
1152
+ "model_endpoint_uid": model endpoint uid (added in deployment),
1153
+ "model_class": the model class
1154
+ }
1155
+
1156
+ """
1157
+
1158
+ raise NotImplementedError
1159
+
1160
+ @property
1161
+ def monitoring_data(self) -> dict[str, Any]:
1162
+ self._monitoring_data = self._calculate_monitoring_data()
1163
+ return self._monitoring_data
1164
+
1165
+
1166
+ class ModelRunnerStep(MonitoredStep):
1070
1167
  """
1071
1168
  Runs multiple Models on each event.
1072
1169
 
@@ -1080,20 +1177,27 @@ class ModelRunnerStep(TaskStep, StepToDict):
1080
1177
  event. Optional. If not passed, all models will be run.
1081
1178
  :param raise_exception: If True, an error will be raised when model selection fails or if one of the models raised
1082
1179
  an error. If False, the error will appear in the output event.
1180
+
1181
+ :raise ModelRunnerError - when a model raise an error the ModelRunnerStep will handle it, collect errors and outputs
1182
+ from added models, If raise_exception is True will raise ModelRunnerError Else will add
1183
+ the error msg as part of the event body mapped by model name if more than one model was
1184
+ added to the ModelRunnerStep
1083
1185
  """
1084
1186
 
1085
1187
  kind = "model_runner"
1086
- _dict_fields = TaskStep._dict_fields + ["raise_exception"]
1087
1188
 
1088
1189
  def __init__(
1089
1190
  self,
1090
1191
  *args,
1192
+ name: Optional[str] = None,
1091
1193
  model_selector: Optional[Union[str, ModelSelector]] = None,
1092
1194
  raise_exception: bool = True,
1093
1195
  **kwargs,
1094
1196
  ):
1095
1197
  super().__init__(
1096
1198
  *args,
1199
+ name=name,
1200
+ raise_exception=raise_exception,
1097
1201
  class_name="mlrun.serving.ModelRunner",
1098
1202
  class_args=dict(model_selector=model_selector),
1099
1203
  **kwargs,
@@ -1112,6 +1216,7 @@ class ModelRunnerStep(TaskStep, StepToDict):
1112
1216
  inputs: Optional[list[str]] = None,
1113
1217
  outputs: Optional[list[str]] = None,
1114
1218
  input_path: Optional[str] = None,
1219
+ result_path: Optional[str] = None,
1115
1220
  override: bool = False,
1116
1221
  **model_parameters,
1117
1222
  ) -> None:
@@ -1140,11 +1245,18 @@ class ModelRunnerStep(TaskStep, StepToDict):
1140
1245
  equal to the model_class predict method outputs (length, and order)
1141
1246
  :param input_path: input path inside the user event, expect scopes to be defined by dot notation
1142
1247
  (e.g "inputs.my_model_inputs"). expects list or dictionary type object in path.
1248
+ :param result_path: result path inside the user output event, expect scopes to be defined by dot
1249
+ notation (e.g "outputs.my_model_outputs") expects list or dictionary type object in
1250
+ path.
1143
1251
  :param override: bool allow override existing model on the current ModelRunnerStep.
1144
1252
  :param model_parameters: Parameters for model instantiation
1145
1253
  """
1146
1254
  # TODO allow model_class as Model object as part of ML-9924
1147
1255
  model_parameters = model_parameters or {}
1256
+ if outputs is None and isinstance(
1257
+ model_artifact, mlrun.artifacts.ModelArtifact
1258
+ ):
1259
+ outputs = [feature.name for feature in model_artifact.spec.outputs]
1148
1260
  model_artifact = (
1149
1261
  model_artifact.uri
1150
1262
  if isinstance(model_artifact, mlrun.artifacts.Artifact)
@@ -1173,13 +1285,66 @@ class ModelRunnerStep(TaskStep, StepToDict):
1173
1285
  schemas.MonitoringData.INPUTS: inputs,
1174
1286
  schemas.MonitoringData.OUTPUTS: outputs,
1175
1287
  schemas.MonitoringData.INPUT_PATH: input_path,
1288
+ schemas.MonitoringData.RESULT_PATH: result_path,
1176
1289
  schemas.MonitoringData.CREATION_STRATEGY: creation_strategy,
1177
1290
  schemas.MonitoringData.LABELS: labels,
1178
1291
  schemas.MonitoringData.MODEL_PATH: model_artifact,
1292
+ schemas.MonitoringData.MODEL_CLASS: model_class,
1179
1293
  }
1180
1294
  self.class_args[schemas.ModelRunnerStepData.MODELS] = models
1181
1295
  self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = monitoring_data
1182
1296
 
1297
+ @staticmethod
1298
+ def _get_model_output_schema(
1299
+ model: str, monitoring_data: dict[str, dict[str, str]]
1300
+ ) -> list[str]:
1301
+ output_schema = None
1302
+ if monitoring_data[model].get(schemas.MonitoringData.MODEL_PATH) is not None:
1303
+ artifact = get_store_resource(
1304
+ monitoring_data[model].get(schemas.MonitoringData.MODEL_PATH)
1305
+ )
1306
+ output_schema = [feature.name for feature in artifact.spec.outputs]
1307
+ return output_schema
1308
+
1309
+ @staticmethod
1310
+ def _split_path(path: str) -> Union[str, list[str], None]:
1311
+ if path is not None:
1312
+ parsed_path = path.split(".")
1313
+ if len(parsed_path) == 1:
1314
+ parsed_path = parsed_path[0]
1315
+ return parsed_path
1316
+ return path
1317
+
1318
+ def _calculate_monitoring_data(self) -> dict[str, dict[str, str]]:
1319
+ monitoring_data = deepcopy(
1320
+ self.class_args.get(
1321
+ mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
1322
+ )
1323
+ )
1324
+ if isinstance(monitoring_data, dict):
1325
+ for model in monitoring_data:
1326
+ monitoring_data[model][schemas.MonitoringData.OUTPUTS] = (
1327
+ monitoring_data[model][schemas.MonitoringData.OUTPUTS]
1328
+ or self._get_model_output_schema(model, monitoring_data)
1329
+ )
1330
+ # Prevent calling _get_model_output_schema for same model more than once
1331
+ self.class_args[
1332
+ mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
1333
+ ][model][schemas.MonitoringData.OUTPUTS] = monitoring_data[model][
1334
+ schemas.MonitoringData.OUTPUTS
1335
+ ]
1336
+ monitoring_data[model][schemas.MonitoringData.INPUT_PATH] = (
1337
+ self._split_path(
1338
+ monitoring_data[model][schemas.MonitoringData.INPUT_PATH]
1339
+ )
1340
+ )
1341
+ monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = (
1342
+ self._split_path(
1343
+ monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
1344
+ )
1345
+ )
1346
+ return monitoring_data
1347
+
1183
1348
  def init_object(self, context, namespace, mode="sync", reset=False, **extra_kwargs):
1184
1349
  model_selector = self.class_args.get("model_selector")
1185
1350
  models = self.class_args.get(schemas.ModelRunnerStepData.MODELS, {})
@@ -1198,6 +1363,8 @@ class ModelRunnerStep(TaskStep, StepToDict):
1198
1363
  self._async_object = ModelRunner(
1199
1364
  model_selector=model_selector,
1200
1365
  runnables=model_objects,
1366
+ name=self.name,
1367
+ context=context,
1201
1368
  )
1202
1369
 
1203
1370
 
@@ -1435,6 +1602,8 @@ class FlowStep(BaseStep):
1435
1602
  :param class_args: class init arguments
1436
1603
  """
1437
1604
 
1605
+ if not name and isinstance(class_name, BaseStep):
1606
+ name = class_name.name
1438
1607
  name, step = params_to_step(
1439
1608
  class_name,
1440
1609
  name,
@@ -1448,6 +1617,8 @@ class FlowStep(BaseStep):
1448
1617
  class_args=class_args,
1449
1618
  )
1450
1619
 
1620
+ # Make sure model endpoint was not introduce in ModelRunnerStep
1621
+ self.check_model_endpoint_existence(step, model_endpoint_creation_strategy)
1451
1622
  self.verify_model_runner_step(step)
1452
1623
 
1453
1624
  after_list = after if isinstance(after, list) else [after]
@@ -1876,6 +2047,7 @@ class RootFlowStep(FlowStep):
1876
2047
  "final_step",
1877
2048
  "on_error",
1878
2049
  "model_endpoints_names",
2050
+ "model_endpoints_routes_names",
1879
2051
  ]
1880
2052
 
1881
2053
  def __init__(
@@ -1893,18 +2065,43 @@ class RootFlowStep(FlowStep):
1893
2065
  engine,
1894
2066
  final_step,
1895
2067
  )
1896
- self._models = []
2068
+ self._models = set()
2069
+ self._route_models = set()
1897
2070
 
1898
2071
  @property
1899
2072
  def model_endpoints_names(self) -> list[str]:
1900
- return self._models
2073
+ return list(self._models)
1901
2074
 
1902
2075
  @model_endpoints_names.setter
1903
2076
  def model_endpoints_names(self, models: list[str]):
1904
- self._models = models
2077
+ self._models = set(models)
2078
+
2079
+ def update_model_endpoints_names(self, model_endpoints_names: list):
2080
+ self._models.update(model_endpoints_names)
2081
+
2082
+ @property
2083
+ def model_endpoints_routes_names(self) -> list[str]:
2084
+ return list(self._route_models)
2085
+
2086
+ @model_endpoints_routes_names.setter
2087
+ def model_endpoints_routes_names(self, models: list[str]):
2088
+ self._route_models = set(models)
1905
2089
 
1906
- def extend_model_endpoints_names(self, model_endpoints_names: list):
1907
- self._models.extend(model_endpoints_names)
2090
+ def update_model_endpoints_routes_names(self, model_endpoints_names: list):
2091
+ self._route_models.update(model_endpoints_names)
2092
+
2093
+ def include_monitored_step(self) -> bool:
2094
+ for step in self.steps.values():
2095
+ if isinstance(step, mlrun.serving.MonitoredStep):
2096
+ return True
2097
+ return False
2098
+
2099
+ def get_monitored_steps(self) -> dict[str, "MonitoredStep"]:
2100
+ return {
2101
+ step.name: step
2102
+ for step in self.steps.values()
2103
+ if isinstance(step, mlrun.serving.MonitoredStep)
2104
+ }
1908
2105
 
1909
2106
 
1910
2107
  classes_map = {