mlrun 1.10.0rc18__py3-none-any.whl → 1.10.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (38) hide show
  1. mlrun/__init__.py +21 -2
  2. mlrun/common/constants.py +1 -0
  3. mlrun/common/schemas/function.py +10 -0
  4. mlrun/common/schemas/model_monitoring/constants.py +4 -11
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
  6. mlrun/datastore/__init__.py +9 -1
  7. mlrun/datastore/model_provider/huggingface_provider.py +114 -26
  8. mlrun/datastore/model_provider/model_provider.py +144 -70
  9. mlrun/datastore/model_provider/openai_provider.py +95 -37
  10. mlrun/db/base.py +0 -19
  11. mlrun/db/httpdb.py +10 -46
  12. mlrun/db/nopdb.py +0 -10
  13. mlrun/launcher/base.py +13 -6
  14. mlrun/model_monitoring/api.py +43 -22
  15. mlrun/model_monitoring/applications/base.py +1 -1
  16. mlrun/model_monitoring/controller.py +112 -38
  17. mlrun/model_monitoring/db/_schedules.py +13 -9
  18. mlrun/model_monitoring/stream_processing.py +16 -12
  19. mlrun/platforms/__init__.py +3 -2
  20. mlrun/projects/project.py +2 -2
  21. mlrun/run.py +1 -1
  22. mlrun/runtimes/base.py +5 -2
  23. mlrun/runtimes/daskjob.py +1 -0
  24. mlrun/runtimes/nuclio/application/application.py +84 -5
  25. mlrun/runtimes/nuclio/function.py +3 -1
  26. mlrun/serving/server.py +24 -0
  27. mlrun/serving/states.py +80 -30
  28. mlrun/serving/system_steps.py +60 -36
  29. mlrun/utils/helpers.py +37 -13
  30. mlrun/utils/notifications/notification_pusher.py +1 -1
  31. mlrun/utils/version/version.json +2 -2
  32. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/METADATA +4 -4
  33. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/RECORD +37 -38
  34. mlrun/api/schemas/__init__.py +0 -259
  35. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/WHEEL +0 -0
  36. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/entry_points.txt +0 -0
  37. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/licenses/LICENSE +0 -0
  38. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/top_level.txt +0 -0
@@ -29,12 +29,13 @@ from mlrun.runtimes.nuclio.api_gateway import (
29
29
  APIGatewaySpec,
30
30
  )
31
31
  from mlrun.runtimes.nuclio.function import NuclioSpec, NuclioStatus
32
- from mlrun.utils import logger, update_in
32
+ from mlrun.utils import is_valid_port, logger, update_in
33
33
 
34
34
 
35
35
  class ApplicationSpec(NuclioSpec):
36
36
  _dict_fields = NuclioSpec._dict_fields + [
37
37
  "internal_application_port",
38
+ "application_ports",
38
39
  ]
39
40
 
40
41
  def __init__(
@@ -79,6 +80,7 @@ class ApplicationSpec(NuclioSpec):
79
80
  state_thresholds=None,
80
81
  disable_default_http_trigger=None,
81
82
  internal_application_port=None,
83
+ application_ports=None,
82
84
  ):
83
85
  super().__init__(
84
86
  command=command,
@@ -126,11 +128,54 @@ class ApplicationSpec(NuclioSpec):
126
128
  self.min_replicas = min_replicas or 1
127
129
  self.max_replicas = max_replicas or 1
128
130
 
131
+ # initializing internal application port and application ports
132
+ self._internal_application_port = None
133
+ self._application_ports = []
134
+
135
+ application_ports = application_ports or []
136
+
137
+ # if internal_application_port is not provided, use the first application port
138
+ if not internal_application_port and len(application_ports) > 0:
139
+ internal_application_port = application_ports[0]
140
+
141
+ # the port of application sidecar to which traffic will be routed from a nuclio function
129
142
  self.internal_application_port = (
130
143
  internal_application_port
131
144
  or mlrun.mlconf.function.application.default_sidecar_internal_port
132
145
  )
133
146
 
147
+ # all exposed ports by the application sidecar
148
+ self.application_ports = application_ports
149
+
150
+ @property
151
+ def application_ports(self):
152
+ return self._application_ports
153
+
154
+ @application_ports.setter
155
+ def application_ports(self, ports):
156
+ """
157
+ Set the application ports for the application sidecar.
158
+ The internal application port is always included and always first.
159
+ """
160
+ # Handle None / single int
161
+ if ports is None:
162
+ ports = []
163
+ elif isinstance(ports, int):
164
+ ports = [ports]
165
+ elif not isinstance(ports, list):
166
+ raise mlrun.errors.MLRunInvalidArgumentError(
167
+ "Application ports must be a list of integers"
168
+ )
169
+
170
+ # Validate and normalize
171
+ cleaned_ports = []
172
+ for port in ports:
173
+ is_valid_port(port, raise_on_error=True)
174
+ if port != self.internal_application_port:
175
+ cleaned_ports.append(port)
176
+
177
+ self._application_ports = [self.internal_application_port] + cleaned_ports
178
+
134
179
  @property
135
180
  def internal_application_port(self):
136
181
  return self._internal_application_port
@@ -138,10 +183,13 @@ class ApplicationSpec(NuclioSpec):
138
183
  @internal_application_port.setter
139
184
  def internal_application_port(self, port):
140
185
  port = int(port)
141
- if port < 0 or port > 65535:
142
- raise ValueError("Port must be in the range 0-65535")
186
+ is_valid_port(port, raise_on_error=True)
143
187
  self._internal_application_port = port
144
188
 
189
+ # when setting new internal application port, ensure that it is included in the application ports
190
+ # it just triggers setter logic, so setting to the same value is a no-op
191
+ self.application_ports = self._application_ports
192
+
145
193
 
146
194
  class ApplicationStatus(NuclioStatus):
147
195
  def __init__(
@@ -222,6 +270,32 @@ class ApplicationRuntime(RemoteRuntime):
222
270
  def set_internal_application_port(self, port: int):
223
271
  self.spec.internal_application_port = port
224
272
 
273
+ def with_sidecar(
274
+ self,
275
+ name: typing.Optional[str] = None,
276
+ image: typing.Optional[str] = None,
277
+ ports: typing.Optional[typing.Union[int, list[int]]] = None,
278
+ command: typing.Optional[str] = None,
279
+ args: typing.Optional[list[str]] = None,
280
+ ):
281
+ # wraps with_sidecar just to set the application ports
282
+ super().with_sidecar(
283
+ name=name,
284
+ image=image,
285
+ ports=ports,
286
+ command=command,
287
+ args=args,
288
+ )
289
+
290
+ if ports:
291
+ if self.spec.internal_application_port != ports[0]:
292
+ logger.info(
293
+ f"Setting internal application port to the first port from the sidecar: {ports[0]}. "
294
+ f"If this is not intended, please set the internal_application_port explicitly."
295
+ )
296
+ self.spec.internal_application_port = ports[0]
297
+ self.spec.application_ports = ports
298
+
225
299
  def pre_deploy_validation(self):
226
300
  super().pre_deploy_validation()
227
301
  if not self.spec.config.get("spec.sidecars"):
@@ -431,6 +505,7 @@ class ApplicationRuntime(RemoteRuntime):
431
505
  ssl_redirect: typing.Optional[bool] = None,
432
506
  set_as_default: bool = False,
433
507
  gateway_timeout: typing.Optional[int] = None,
508
+ port: typing.Optional[int] = None,
434
509
  ):
435
510
  """
436
511
  Create the application API gateway. Once the application is deployed, the API gateway can be created.
@@ -447,6 +522,8 @@ class ApplicationRuntime(RemoteRuntime):
447
522
  :param set_as_default: Set the API gateway as the default for the application (`status.api_gateway`)
448
523
  :param gateway_timeout: nginx ingress timeout in sec (request timeout, when will the gateway return an
449
524
  error)
525
+ :param port: The API gateway port, used only when direct_port_access=True
526
+
450
527
  :return: The API gateway URL
451
528
  """
452
529
  if not name:
@@ -467,7 +544,9 @@ class ApplicationRuntime(RemoteRuntime):
467
544
  "Authentication credentials not provided"
468
545
  )
469
546
 
470
- ports = self.spec.internal_application_port if direct_port_access else []
547
+ ports = (
548
+ port or self.spec.internal_application_port if direct_port_access else []
549
+ )
471
550
 
472
551
  api_gateway = APIGateway(
473
552
  APIGatewayMetadata(
@@ -728,7 +807,7 @@ class ApplicationRuntime(RemoteRuntime):
728
807
  self.with_sidecar(
729
808
  name=self.status.sidecar_name,
730
809
  image=self.status.application_image,
731
- ports=self.spec.internal_application_port,
810
+ ports=self.spec.application_ports,
732
811
  command=self.spec.command,
733
812
  args=self.spec.args,
734
813
  )
@@ -29,6 +29,7 @@ from kubernetes import client
29
29
  from nuclio.deploy import find_dashboard_url, get_deploy_status
30
30
  from nuclio.triggers import V3IOStreamTrigger
31
31
 
32
+ import mlrun.common.constants
32
33
  import mlrun.db
33
34
  import mlrun.errors
34
35
  import mlrun.k8s_utils
@@ -830,7 +831,8 @@ class RemoteRuntime(KubeResource):
830
831
  def _get_runtime_env(self):
831
832
  # for runtime specific env var enrichment (before deploy)
832
833
  runtime_env = {
833
- "MLRUN_ACTIVE_PROJECT": self.metadata.project or mlconf.active_project,
834
+ mlrun.common.constants.MLRUN_ACTIVE_PROJECT: self.metadata.project
835
+ or mlconf.active_project,
834
836
  }
835
837
  if mlconf.httpdb.api_url:
836
838
  runtime_env["MLRUN_DBPATH"] = mlconf.httpdb.api_url
mlrun/serving/server.py CHANGED
@@ -361,6 +361,7 @@ def add_error_raiser_step(
361
361
  raise_exception=monitored_step.raise_exception,
362
362
  models_names=list(monitored_step.class_args["models"].keys()),
363
363
  model_endpoint_creation_strategy=mlrun.common.schemas.ModelEndpointCreationStrategy.SKIP,
364
+ function=monitored_step.function,
364
365
  )
365
366
  if monitored_step.responder:
366
367
  monitored_step.responder = False
@@ -745,6 +746,26 @@ async def async_execute_graph(
745
746
  return responses
746
747
 
747
748
 
749
+ def _is_inside_asyncio_loop():
750
+ try:
751
+ asyncio.get_running_loop()
752
+ return True
753
+ except RuntimeError:
754
+ return False
755
+
756
+
757
+ # Workaround for running with local=True in Jupyter (ML-10620)
758
+ def _workaround_asyncio_nesting():
759
+ try:
760
+ import nest_asyncio
761
+ except ImportError:
762
+ raise mlrun.errors.MLRunRuntimeError(
763
+ "Cannot execute graph from within an already running asyncio loop. "
764
+ "Attempt to import nest_asyncio as a workaround failed as well."
765
+ )
766
+ nest_asyncio.apply()
767
+
768
+
748
769
  def execute_graph(
749
770
  context: MLClientCtx,
750
771
  data: DataItem,
@@ -770,6 +791,9 @@ def execute_graph(
770
791
 
771
792
  :return: A list of responses.
772
793
  """
794
+ if _is_inside_asyncio_loop():
795
+ _workaround_asyncio_nesting()
796
+
773
797
  return asyncio.run(
774
798
  async_execute_graph(
775
799
  context,
mlrun/serving/states.py CHANGED
@@ -24,6 +24,7 @@ import inspect
24
24
  import os
25
25
  import pathlib
26
26
  import traceback
27
+ import warnings
27
28
  from abc import ABC
28
29
  from copy import copy, deepcopy
29
30
  from inspect import getfullargspec, signature
@@ -43,12 +44,16 @@ from mlrun.datastore.datastore_profile import (
43
44
  DatastoreProfileV3io,
44
45
  datastore_profile_read,
45
46
  )
46
- from mlrun.datastore.model_provider.model_provider import ModelProvider
47
+ from mlrun.datastore.model_provider.model_provider import (
48
+ InvokeResponseFormat,
49
+ ModelProvider,
50
+ UsageResponseKeys,
51
+ )
47
52
  from mlrun.datastore.storeytargets import KafkaStoreyTarget, StreamStoreyTarget
48
- from mlrun.utils import get_data_from_path, logger, split_path
53
+ from mlrun.utils import get_data_from_path, logger, set_data_by_path, split_path
49
54
 
50
55
  from ..config import config
51
- from ..datastore import get_stream_pusher
56
+ from ..datastore import _DummyStream, get_stream_pusher
52
57
  from ..datastore.utils import (
53
58
  get_kafka_brokers_from_dict,
54
59
  parse_kafka_url,
@@ -1206,10 +1211,15 @@ class Model(storey.ParallelExecutionRunnable, ModelObj):
1206
1211
 
1207
1212
  class LLModel(Model):
1208
1213
  def __init__(
1209
- self, name: str, input_path: Optional[Union[str, list[str]]] = None, **kwargs
1214
+ self,
1215
+ name: str,
1216
+ input_path: Optional[Union[str, list[str]]] = None,
1217
+ result_path: Optional[Union[str, list[str]]] = None,
1218
+ **kwargs,
1210
1219
  ):
1211
1220
  super().__init__(name, **kwargs)
1212
1221
  self._input_path = split_path(input_path)
1222
+ self._result_path = split_path(result_path)
1213
1223
 
1214
1224
  def predict(
1215
1225
  self,
@@ -1221,11 +1231,14 @@ class LLModel(Model):
1221
1231
  if isinstance(
1222
1232
  self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
1223
1233
  ) and isinstance(self.model_provider, ModelProvider):
1224
- body["result"] = self.model_provider.invoke(
1234
+ response_with_stats = self.model_provider.invoke(
1225
1235
  messages=messages,
1226
- as_str=True,
1236
+ invoke_response_format=InvokeResponseFormat.USAGE,
1227
1237
  **(model_configuration or {}),
1228
1238
  )
1239
+ set_data_by_path(
1240
+ path=self._result_path, data=body, value=response_with_stats
1241
+ )
1229
1242
  return body
1230
1243
 
1231
1244
  async def predict_async(
@@ -1238,11 +1251,14 @@ class LLModel(Model):
1238
1251
  if isinstance(
1239
1252
  self.invocation_artifact, mlrun.artifacts.LLMPromptArtifact
1240
1253
  ) and isinstance(self.model_provider, ModelProvider):
1241
- body["result"] = await self.model_provider.async_invoke(
1254
+ response_with_stats = await self.model_provider.async_invoke(
1242
1255
  messages=messages,
1243
- as_str=True,
1256
+ invoke_response_format=InvokeResponseFormat.USAGE,
1244
1257
  **(model_configuration or {}),
1245
1258
  )
1259
+ set_data_by_path(
1260
+ path=self._result_path, data=body, value=response_with_stats
1261
+ )
1246
1262
  return body
1247
1263
 
1248
1264
  def run(self, body: Any, path: str, origin_name: Optional[str] = None) -> Any:
@@ -1287,6 +1303,7 @@ class LLModel(Model):
1287
1303
  {
1288
1304
  place_holder: input_data.get(body_map["field"])
1289
1305
  for place_holder, body_map in prompt_legend.items()
1306
+ if input_data.get(body_map["field"])
1290
1307
  }
1291
1308
  if prompt_legend
1292
1309
  else {}
@@ -1608,6 +1625,9 @@ class ModelRunnerStep(MonitoredStep):
1608
1625
  :param outputs: list of the model outputs (e.g. labels) ,if provided will override the outputs
1609
1626
  that been configured in the model artifact, please note that those outputs need to
1610
1627
  be equal to the model_class predict method outputs (length, and order)
1628
+
1629
+ When using LLModel, the output will be overridden with UsageResponseKeys.fields().
1630
+
1611
1631
  :param input_path: when specified selects the key/path in the event to use as model monitoring inputs
1612
1632
  this require that the event body will behave like a dict, expects scopes to be
1613
1633
  defined by dot notation (e.g "data.d").
@@ -1636,7 +1656,14 @@ class ModelRunnerStep(MonitoredStep):
1636
1656
  raise mlrun.errors.MLRunInvalidArgumentError(
1637
1657
  "Cannot provide a model object as argument to `model_class` and also provide `model_parameters`."
1638
1658
  )
1639
-
1659
+ if type(model_class) is LLModel or (
1660
+ isinstance(model_class, str) and model_class == LLModel.__name__
1661
+ ):
1662
+ if outputs:
1663
+ warnings.warn(
1664
+ "LLModel with existing outputs detected, overriding to default"
1665
+ )
1666
+ outputs = UsageResponseKeys.fields()
1640
1667
  model_parameters = model_parameters or (
1641
1668
  model_class.to_dict() if isinstance(model_class, Model) else {}
1642
1669
  )
@@ -1652,8 +1679,6 @@ class ModelRunnerStep(MonitoredStep):
1652
1679
  except mlrun.errors.MLRunNotFoundError:
1653
1680
  raise mlrun.errors.MLRunInvalidArgumentError("Artifact not found.")
1654
1681
 
1655
- outputs = outputs or self._get_model_output_schema(model_artifact)
1656
-
1657
1682
  model_artifact = (
1658
1683
  model_artifact.uri
1659
1684
  if isinstance(model_artifact, mlrun.artifacts.Artifact)
@@ -1719,28 +1744,13 @@ class ModelRunnerStep(MonitoredStep):
1719
1744
  self.class_args[schemas.ModelRunnerStepData.MONITORING_DATA] = monitoring_data
1720
1745
 
1721
1746
  @staticmethod
1722
- def _get_model_output_schema(
1723
- model_artifact: Union[ModelArtifact, LLMPromptArtifact],
1724
- ) -> Optional[list[str]]:
1725
- if isinstance(
1726
- model_artifact,
1727
- ModelArtifact,
1728
- ):
1729
- return [feature.name for feature in model_artifact.spec.outputs]
1730
- elif isinstance(
1731
- model_artifact,
1732
- LLMPromptArtifact,
1733
- ):
1734
- _model_artifact = model_artifact.model_artifact
1735
- return [feature.name for feature in _model_artifact.spec.outputs]
1736
-
1737
- @staticmethod
1738
- def _get_model_endpoint_output_schema(
1747
+ def _get_model_endpoint_schema(
1739
1748
  name: str,
1740
1749
  project: str,
1741
1750
  uid: str,
1742
- ) -> list[str]:
1751
+ ) -> tuple[list[str], list[str]]:
1743
1752
  output_schema = None
1753
+ input_schema = None
1744
1754
  try:
1745
1755
  model_endpoint: mlrun.common.schemas.model_monitoring.ModelEndpoint = (
1746
1756
  mlrun.db.get_run_db().get_model_endpoint(
@@ -1751,6 +1761,7 @@ class ModelRunnerStep(MonitoredStep):
1751
1761
  )
1752
1762
  )
1753
1763
  output_schema = model_endpoint.spec.label_names
1764
+ input_schema = model_endpoint.spec.feature_names
1754
1765
  except (
1755
1766
  mlrun.errors.MLRunNotFoundError,
1756
1767
  mlrun.errors.MLRunInvalidArgumentError,
@@ -1759,7 +1770,7 @@ class ModelRunnerStep(MonitoredStep):
1759
1770
  f"Model endpoint not found, using default output schema for model {name}",
1760
1771
  error=f"{type(ex).__name__}: {ex}",
1761
1772
  )
1762
- return output_schema
1773
+ return input_schema, output_schema
1763
1774
 
1764
1775
  def _calculate_monitoring_data(self) -> dict[str, dict[str, str]]:
1765
1776
  monitoring_data = deepcopy(
@@ -1775,6 +1786,36 @@ class ModelRunnerStep(MonitoredStep):
1775
1786
  monitoring_data[model][schemas.MonitoringData.RESULT_PATH] = split_path(
1776
1787
  monitoring_data[model][schemas.MonitoringData.RESULT_PATH]
1777
1788
  )
1789
+
1790
+ mep_output_schema, mep_input_schema = None, None
1791
+
1792
+ output_schema = self.class_args[
1793
+ mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
1794
+ ][model][schemas.MonitoringData.OUTPUTS]
1795
+ input_schema = self.class_args[
1796
+ mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
1797
+ ][model][schemas.MonitoringData.INPUTS]
1798
+ if not output_schema or not input_schema:
1799
+ # if output or input schema is not provided, try to get it from the model endpoint
1800
+ mep_input_schema, mep_output_schema = (
1801
+ self._get_model_endpoint_schema(
1802
+ model,
1803
+ self.context.project,
1804
+ monitoring_data[model].get(
1805
+ schemas.MonitoringData.MODEL_ENDPOINT_UID, ""
1806
+ ),
1807
+ )
1808
+ )
1809
+ self.class_args[
1810
+ mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
1811
+ ][model][schemas.MonitoringData.OUTPUTS] = (
1812
+ output_schema or mep_output_schema
1813
+ )
1814
+ self.class_args[
1815
+ mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA
1816
+ ][model][schemas.MonitoringData.INPUTS] = (
1817
+ input_schema or mep_input_schema
1818
+ )
1778
1819
  return monitoring_data
1779
1820
  else:
1780
1821
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1802,6 +1843,13 @@ class ModelRunnerStep(MonitoredStep):
1802
1843
  .get(model_params.get("name"), {})
1803
1844
  .get(schemas.MonitoringData.INPUT_PATH)
1804
1845
  )
1846
+ model_params[schemas.MonitoringData.RESULT_PATH] = (
1847
+ self.class_args.get(
1848
+ mlrun.common.schemas.ModelRunnerStepData.MONITORING_DATA, {}
1849
+ )
1850
+ .get(model_params.get("name"), {})
1851
+ .get(schemas.MonitoringData.RESULT_PATH)
1852
+ )
1805
1853
  model = get_class(model, namespace).from_dict(
1806
1854
  model_params, init_with_params=True
1807
1855
  )
@@ -3099,6 +3147,8 @@ def _init_async_objects(context, steps):
3099
3147
  context=context,
3100
3148
  **options,
3101
3149
  )
3150
+ elif stream_path.startswith("dummy://"):
3151
+ step._async_object = _DummyStream(context=context, **options)
3102
3152
  else:
3103
3153
  if stream_path.startswith("v3io://"):
3104
3154
  endpoint, stream_path = parse_path(step.path)
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import random
16
+ from copy import copy
16
17
  from datetime import timedelta
17
18
  from typing import Any, Optional, Union
18
19
 
@@ -22,6 +23,7 @@ import storey
22
23
  import mlrun
23
24
  import mlrun.artifacts
24
25
  import mlrun.common.schemas.model_monitoring as mm_schemas
26
+ import mlrun.feature_store
25
27
  import mlrun.serving
26
28
  from mlrun.common.schemas import MonitoringData
27
29
  from mlrun.utils import get_data_from_path, logger
@@ -45,33 +47,20 @@ class MonitoringPreProcessor(storey.MapClass):
45
47
  result_path = model_monitoring_data.get(MonitoringData.RESULT_PATH)
46
48
  input_path = model_monitoring_data.get(MonitoringData.INPUT_PATH)
47
49
 
48
- result = get_data_from_path(result_path, event.body.get(model, event.body))
49
50
  output_schema = model_monitoring_data.get(MonitoringData.OUTPUTS)
50
51
  input_schema = model_monitoring_data.get(MonitoringData.INPUTS)
51
- logger.debug("output schema retrieved", output_schema=output_schema)
52
- if isinstance(result, dict):
53
- # transpose by key the outputs:
54
- outputs = self.transpose_by_key(result, output_schema)
55
- if not output_schema:
56
- logger.warn(
57
- "Output schema was not provided using Project:log_model or by ModelRunnerStep:add_model order "
58
- "may not preserved"
59
- )
60
- else:
61
- outputs = result
52
+ logger.debug(
53
+ "output and input schema retrieved",
54
+ output_schema=output_schema,
55
+ input_schema=input_schema,
56
+ )
62
57
 
63
- event_inputs = event._metadata.get("inputs", {})
64
- event_inputs = get_data_from_path(input_path, event_inputs)
65
- if isinstance(event_inputs, dict):
66
- # transpose by key the inputs:
67
- inputs = self.transpose_by_key(event_inputs, input_schema)
68
- if not input_schema:
69
- logger.warn(
70
- "Input schema was not provided using by ModelRunnerStep:add_model, order "
71
- "may not preserved"
72
- )
73
- else:
74
- inputs = event_inputs
58
+ outputs, new_output_schema = self.get_listed_data(
59
+ event.body.get(model, event.body), result_path, output_schema
60
+ )
61
+ inputs, new_input_schema = self.get_listed_data(
62
+ event._metadata.get("inputs", {}), input_path, input_schema
63
+ )
75
64
 
76
65
  if outputs and isinstance(outputs[0], list):
77
66
  if output_schema and len(output_schema) != len(outputs[0]):
@@ -96,15 +85,43 @@ class MonitoringPreProcessor(storey.MapClass):
96
85
  "outputs and inputs are not in the same length check 'input_path' and "
97
86
  "'output_path' was specified if needed"
98
87
  )
99
- request = {"inputs": inputs, "id": getattr(event, "id", None)}
100
- resp = {"outputs": outputs}
88
+ request = {
89
+ "inputs": inputs,
90
+ "id": getattr(event, "id", None),
91
+ "input_schema": new_input_schema,
92
+ }
93
+ resp = {"outputs": outputs, "output_schema": new_output_schema}
101
94
 
102
95
  return request, resp
103
96
 
97
+ def get_listed_data(
98
+ self,
99
+ raw_data: dict,
100
+ data_path: Optional[Union[list[str], str]] = None,
101
+ schema: Optional[list[str]] = None,
102
+ ):
103
+ """Get data from a path and transpose it by keys if dict is provided."""
104
+ new_schema = None
105
+ data_from_path = get_data_from_path(data_path, raw_data)
106
+ if isinstance(data_from_path, dict):
107
+ # transpose by key the inputs:
108
+ listed_data, new_schema = self.transpose_by_key(data_from_path, schema)
109
+ new_schema = new_schema or schema
110
+ if not schema:
111
+ logger.warn(
112
+ f"No schema provided through add_model(); the order of {data_from_path} "
113
+ "may not be preserved."
114
+ )
115
+ elif not isinstance(data_from_path, list):
116
+ listed_data = [data_from_path]
117
+ else:
118
+ listed_data = data_from_path
119
+ return listed_data, new_schema
120
+
104
121
  @staticmethod
105
122
  def transpose_by_key(
106
123
  data: dict, schema: Optional[Union[str, list[str]]] = None
107
- ) -> Union[list[Any], list[list[Any]]]:
124
+ ) -> tuple[Union[list[Any], list[list[Any]]], list[str]]:
108
125
  """
109
126
  Transpose values from a dictionary by keys.
110
127
 
@@ -136,20 +153,27 @@ class MonitoringPreProcessor(storey.MapClass):
136
153
  * If result is a matrix, returns a list of lists.
137
154
 
138
155
  :raises ValueError: If the values include a mix of scalars and lists, or if the list lengths do not match.
156
+ mlrun.MLRunInvalidArgumentError if the schema keys are not contained in the data keys.
139
157
  """
140
-
158
+ new_schema = None
159
+ # Normalize keys in data:
160
+ normalize_data = {
161
+ mlrun.feature_store.api.norm_column_name(k): copy(v)
162
+ for k, v in data.items()
163
+ }
141
164
  # Normalize schema to list
142
165
  if not schema:
143
- keys = list(data.keys())
166
+ keys = list(normalize_data.keys())
167
+ new_schema = keys
144
168
  elif isinstance(schema, str):
145
- keys = [schema]
169
+ keys = [mlrun.feature_store.api.norm_column_name(schema)]
146
170
  else:
147
- keys = schema
171
+ keys = [mlrun.feature_store.api.norm_column_name(key) for key in schema]
148
172
 
149
- values = [data[key] for key in keys if key in data]
173
+ values = [normalize_data[key] for key in keys if key in normalize_data]
150
174
  if len(values) != len(keys):
151
175
  raise mlrun.MLRunInvalidArgumentError(
152
- f"Schema keys {keys} do not match the data keys {list(data.keys())}."
176
+ f"Schema keys {keys} are not contained in the data keys {list(data.keys())}."
153
177
  )
154
178
 
155
179
  # Detect if all are scalars ie: int,float,str
@@ -168,12 +192,12 @@ class MonitoringPreProcessor(storey.MapClass):
168
192
  mat = np.stack(arrays, axis=0)
169
193
  transposed = mat.T
170
194
  else:
171
- return values[0]
195
+ return values[0], new_schema
172
196
 
173
197
  if transposed.shape[1] == 1 and transposed.shape[0] == 1:
174
198
  # Transform [[0]] -> [0]:
175
- return transposed[:, 0].tolist()
176
- return transposed.tolist()
199
+ return transposed[:, 0].tolist(), new_schema
200
+ return transposed.tolist(), new_schema
177
201
 
178
202
  def do(self, event):
179
203
  monitoring_event_list = []
mlrun/utils/helpers.py CHANGED
@@ -464,17 +464,11 @@ def to_date_str(d):
464
464
  return ""
465
465
 
466
466
 
467
- def normalize_name(name: str, verbose: bool = True):
467
+ def normalize_name(name: str):
468
468
  # TODO: Must match
469
469
  # [a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?
470
470
  name = re.sub(r"\s+", "-", name)
471
471
  if "_" in name:
472
- if verbose:
473
- warnings.warn(
474
- "Names with underscore '_' are about to be deprecated, use dashes '-' instead. "
475
- f"Replacing '{name}' underscores with dashes.",
476
- FutureWarning,
477
- )
478
472
  name = name.replace("_", "-")
479
473
  return name.lower()
480
474
 
@@ -835,7 +829,7 @@ def extend_hub_uri_if_needed(uri) -> tuple[str, bool]:
835
829
  raise mlrun.errors.MLRunInvalidArgumentError(
836
830
  "Invalid character '/' in function name or source name"
837
831
  ) from exc
838
- name = normalize_name(name=name, verbose=False)
832
+ name = normalize_name(name=name)
839
833
  if not source_name:
840
834
  # Searching item in all sources
841
835
  sources = db.list_hub_sources(item_name=name, tag=tag)
@@ -2409,9 +2403,7 @@ def split_path(path: str) -> typing.Union[str, list[str], None]:
2409
2403
  return path
2410
2404
 
2411
2405
 
2412
- def get_data_from_path(
2413
- path: typing.Union[str, list[str], None], data: dict
2414
- ) -> dict[str, Any]:
2406
+ def get_data_from_path(path: typing.Union[str, list[str], None], data: dict) -> Any:
2415
2407
  if isinstance(path, str):
2416
2408
  output_data = data.get(path)
2417
2409
  elif isinstance(path, list):
@@ -2424,6 +2416,38 @@ def get_data_from_path(
2424
2416
  raise mlrun.errors.MLRunInvalidArgumentError(
2425
2417
  "Expected path be of type str or list of str or None"
2426
2418
  )
2427
- if isinstance(output_data, (int, float)):
2428
- output_data = [output_data]
2429
2419
  return output_data
2420
+
2421
+
2422
+ def is_valid_port(port: int, raise_on_error: bool = False) -> bool:
2423
+ if not port:
2424
+ return False
2425
+ if 0 <= port <= 65535:
2426
+ return True
2427
+ if raise_on_error:
2428
+ raise ValueError("Port must be in the range 0–65535")
2429
+ return False
2430
+
2431
+
2432
+ def set_data_by_path(
2433
+ path: typing.Union[str, list[str], None], data: dict, value
2434
+ ) -> None:
2435
+ if path is None:
2436
+ if not isinstance(value, dict):
2437
+ raise ValueError("When path is None, value must be a dictionary.")
2438
+ data.update(value)
2439
+
2440
+ elif isinstance(path, str):
2441
+ data[path] = value
2442
+
2443
+ elif isinstance(path, list):
2444
+ current = data
2445
+ for key in path[:-1]:
2446
+ if key not in current or not isinstance(current[key], dict):
2447
+ current[key] = {}
2448
+ current = current[key]
2449
+ current[path[-1]] = value
2450
+ else:
2451
+ raise mlrun.errors.MLRunInvalidArgumentError(
2452
+ "Expected path to be of type str or list of str"
2453
+ )