mlrun 1.10.0rc37__py3-none-any.whl → 1.10.0rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (33) hide show
  1. mlrun/artifacts/document.py +6 -1
  2. mlrun/common/constants.py +6 -0
  3. mlrun/common/model_monitoring/helpers.py +1 -1
  4. mlrun/common/schemas/model_monitoring/constants.py +0 -2
  5. mlrun/common/secrets.py +22 -1
  6. mlrun/launcher/local.py +2 -0
  7. mlrun/model.py +7 -1
  8. mlrun/model_monitoring/api.py +3 -2
  9. mlrun/model_monitoring/applications/base.py +6 -3
  10. mlrun/model_monitoring/applications/context.py +1 -0
  11. mlrun/model_monitoring/db/tsdb/base.py +2 -4
  12. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +17 -11
  13. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +154 -76
  14. mlrun/projects/project.py +15 -2
  15. mlrun/run.py +26 -1
  16. mlrun/runtimes/__init__.py +18 -0
  17. mlrun/runtimes/base.py +3 -0
  18. mlrun/runtimes/local.py +5 -2
  19. mlrun/runtimes/mounts.py +5 -0
  20. mlrun/runtimes/nuclio/application/application.py +2 -0
  21. mlrun/runtimes/nuclio/function.py +14 -0
  22. mlrun/runtimes/nuclio/serving.py +67 -4
  23. mlrun/runtimes/pod.py +59 -10
  24. mlrun/serving/server.py +42 -10
  25. mlrun/serving/states.py +75 -26
  26. mlrun/utils/helpers.py +86 -10
  27. mlrun/utils/version/version.json +2 -2
  28. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/METADATA +3 -3
  29. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/RECORD +33 -33
  30. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/WHEEL +0 -0
  31. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/entry_points.txt +0 -0
  32. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/licenses/LICENSE +0 -0
  33. {mlrun-1.10.0rc37.dist-info → mlrun-1.10.0rc41.dist-info}/top_level.txt +0 -0
@@ -221,6 +221,24 @@ class RuntimeKinds:
221
221
  return True
222
222
  return False
223
223
 
224
+ @staticmethod
225
+ def requires_k8s_name_validation(kind: str) -> bool:
226
+ """
227
+ Returns True if the runtime kind creates Kubernetes resources that use the function name.
228
+
229
+ Function names for k8s-deployed runtimes must conform to DNS-1123 label requirements:
230
+ - Lowercase alphanumeric characters or '-'
231
+ - Start and end with an alphanumeric character
232
+ - Maximum 63 characters
233
+
234
+ Local runtimes (local, handler) run on the local machine and don't create k8s resources,
235
+ so they don't require k8s naming validation.
236
+
237
+ :param kind: Runtime kind string (job, spark, serving, local, etc.)
238
+ :return: True if function name needs k8s DNS-1123 validation, False otherwise
239
+ """
240
+ return not RuntimeKinds.is_local_runtime(kind)
241
+
224
242
  @staticmethod
225
243
  def requires_absolute_artifacts_path(kind):
226
244
  """
mlrun/runtimes/base.py CHANGED
@@ -393,6 +393,9 @@ class BaseRuntime(ModelObj):
393
393
  FutureWarning,
394
394
  )
395
395
  output_path = output_path or out_path or artifact_path
396
+
397
+ mlrun.utils.helpers.validate_function_name(self.metadata.name)
398
+
396
399
  launcher = mlrun.launcher.factory.LauncherFactory().create_launcher(
397
400
  self._is_remote, local=local, **launcher_kwargs
398
401
  )
mlrun/runtimes/local.py CHANGED
@@ -29,12 +29,12 @@ from os import environ, remove
29
29
  from pathlib import Path
30
30
  from subprocess import PIPE, Popen
31
31
  from sys import executable
32
+ from typing import Optional
32
33
 
33
34
  from nuclio import Event
34
35
 
35
36
  import mlrun
36
37
  import mlrun.common.constants as mlrun_constants
37
- import mlrun.common.runtimes.constants
38
38
  from mlrun.lists import RunList
39
39
 
40
40
  from ..errors import err_to_str
@@ -201,9 +201,12 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
201
201
  kind = "local"
202
202
  _is_remote = False
203
203
 
204
- def to_job(self, image=""):
204
+ def to_job(self, image="", func_name: Optional[str] = None):
205
205
  struct = self.to_dict()
206
206
  obj = KubejobRuntime.from_dict(struct)
207
+ obj.kind = "job" # Ensure kind is set to 'job' for KubejobRuntime
208
+ if func_name:
209
+ obj.metadata.name = func_name
207
210
  if image:
208
211
  obj.spec.image = image
209
212
  return obj
mlrun/runtimes/mounts.py CHANGED
@@ -17,6 +17,8 @@ import typing
17
17
  import warnings
18
18
  from collections import namedtuple
19
19
 
20
+ import mlrun.common.secrets
21
+ import mlrun.errors
20
22
  from mlrun.config import config
21
23
  from mlrun.config import config as mlconf
22
24
  from mlrun.errors import MLRunInvalidArgumentError
@@ -412,6 +414,9 @@ def mount_secret(
412
414
  the specified paths, and unlisted keys will not be
413
415
  present."""
414
416
 
417
+ if secret_name:
418
+ mlrun.common.secrets.validate_not_forbidden_secret(secret_name.strip())
419
+
415
420
  def _mount_secret(runtime: "KubeResource"):
416
421
  # Define the secret volume source
417
422
  secret_volume_source = {
@@ -400,6 +400,8 @@ class ApplicationRuntime(RemoteRuntime):
400
400
 
401
401
  :return: The default API gateway URL if created or True if the function is ready (deployed)
402
402
  """
403
+ mlrun.utils.helpers.validate_function_name(self.metadata.name)
404
+
403
405
  if (self.requires_build() and not self.spec.image) or force_build:
404
406
  self._fill_credentials()
405
407
  self._build_application_image(
@@ -655,6 +655,8 @@ class RemoteRuntime(KubeResource):
655
655
  if tag:
656
656
  self.metadata.tag = tag
657
657
 
658
+ mlrun.utils.helpers.validate_function_name(self.metadata.name)
659
+
658
660
  # Attempt auto-mounting, before sending to remote build
659
661
  self.try_auto_mount_based_on_config()
660
662
  self._fill_credentials()
@@ -1224,6 +1226,18 @@ class RemoteRuntime(KubeResource):
1224
1226
  # try to infer the invocation url from the internal and if not exists, use external.
1225
1227
  # $$$$ we do not want to use the external invocation url (e.g.: ingress, nodePort, etc.)
1226
1228
 
1229
+ # if none of urls is set, function was deployed with watch=False
1230
+ # and status wasn't fetched with Nuclio
1231
+ # _get_state fetches the state and updates url
1232
+ if (
1233
+ not self.status.address
1234
+ and not self.status.internal_invocation_urls
1235
+ and not self.status.external_invocation_urls
1236
+ ):
1237
+ state, _, _ = self._get_state()
1238
+ if state not in ["ready", "scaledToZero"]:
1239
+ logger.warning(f"Function is in the {state} state")
1240
+
1227
1241
  # prefer internal invocation url if running inside k8s cluster
1228
1242
  if (
1229
1243
  not force_external_address
@@ -23,6 +23,7 @@ from nuclio import KafkaTrigger
23
23
 
24
24
  import mlrun
25
25
  import mlrun.common.schemas as schemas
26
+ import mlrun.common.secrets
26
27
  import mlrun.datastore.datastore_profile as ds_profile
27
28
  from mlrun.datastore import get_kafka_brokers_from_dict, parse_kafka_url
28
29
  from mlrun.model import ObjectList
@@ -635,7 +636,12 @@ class ServingRuntime(RemoteRuntime):
635
636
 
636
637
  :returns: The Runtime (function) object
637
638
  """
638
-
639
+ if kind == "azure_vault" and isinstance(source, dict):
640
+ candidate_secret_name = (source.get("k8s_secret") or "").strip()
641
+ if candidate_secret_name:
642
+ mlrun.common.secrets.validate_not_forbidden_secret(
643
+ candidate_secret_name
644
+ )
639
645
  if kind == "vault" and isinstance(source, list):
640
646
  source = {"project": self.metadata.project, "secrets": source}
641
647
 
@@ -659,6 +665,9 @@ class ServingRuntime(RemoteRuntime):
659
665
  :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
660
666
  :param force_build: set True for force building the image
661
667
  """
668
+ # Validate function name before deploying to k8s
669
+ mlrun.utils.helpers.validate_function_name(self.metadata.name)
670
+
662
671
  load_mode = self.spec.load_mode
663
672
  if load_mode and load_mode not in ["sync", "async"]:
664
673
  raise ValueError(f"illegal model loading mode {load_mode}")
@@ -855,8 +864,20 @@ class ServingRuntime(RemoteRuntime):
855
864
  )
856
865
  self._mock_server = self.to_mock_server()
857
866
 
858
- def to_job(self) -> KubejobRuntime:
859
- """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job."""
867
+ def to_job(self, func_name: Optional[str] = None) -> KubejobRuntime:
868
+ """Convert this ServingRuntime to a KubejobRuntime, so that the graph can be run as a standalone job.
869
+
870
+ Args:
871
+ func_name: Optional custom name for the job function. If not provided, automatically
872
+ appends '-batch' suffix to the serving function name to prevent database collision.
873
+
874
+ Returns:
875
+ KubejobRuntime configured to execute the serving graph as a batch job.
876
+
877
+ Note:
878
+ The job will have a different name than the serving function to prevent database collision.
879
+ The original serving function remains unchanged and can still be invoked after running the job.
880
+ """
860
881
  if self.spec.function_refs:
861
882
  raise mlrun.errors.MLRunInvalidArgumentError(
862
883
  f"Cannot convert function '{self.metadata.name}' to a job because it has child functions"
@@ -890,8 +911,50 @@ class ServingRuntime(RemoteRuntime):
890
911
  parameters=self.spec.parameters,
891
912
  graph=self.spec.graph,
892
913
  )
914
+
915
+ job_metadata = deepcopy(self.metadata)
916
+ original_name = job_metadata.name
917
+
918
+ if func_name:
919
+ # User provided explicit job name
920
+ job_metadata.name = func_name
921
+ logger.debug(
922
+ "Creating job from serving function with custom name",
923
+ new_name=func_name,
924
+ )
925
+ else:
926
+ job_metadata.name, was_renamed, suffix = (
927
+ mlrun.utils.helpers.ensure_batch_job_suffix(job_metadata.name)
928
+ )
929
+
930
+ # Check if the resulting name exceeds Kubernetes length limit
931
+ if (
932
+ len(job_metadata.name)
933
+ > mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH
934
+ ):
935
+ raise mlrun.errors.MLRunInvalidArgumentError(
936
+ f"Cannot convert serving function '{original_name}' to batch job: "
937
+ f"the resulting name '{job_metadata.name}' ({len(job_metadata.name)} characters) "
938
+ f"exceeds Kubernetes limit of {mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH} characters. "
939
+ f"Please provide a custom name via the func_name parameter, "
940
+ f"with at most {mlrun.common.constants.K8S_DNS_1123_LABEL_MAX_LENGTH} characters."
941
+ )
942
+
943
+ if was_renamed:
944
+ logger.info(
945
+ "Creating job from serving function (auto-appended suffix to prevent collision)",
946
+ new_name=job_metadata.name,
947
+ suffix=suffix,
948
+ )
949
+ else:
950
+ logger.debug(
951
+ "Creating job from serving function (name already has suffix)",
952
+ name=original_name,
953
+ suffix=suffix,
954
+ )
955
+
893
956
  job = KubejobRuntime(
894
957
  spec=spec,
895
- metadata=self.metadata,
958
+ metadata=job_metadata,
896
959
  )
897
960
  return job
mlrun/runtimes/pod.py CHANGED
@@ -20,12 +20,14 @@ import typing
20
20
  import warnings
21
21
  from collections.abc import Iterable
22
22
  from enum import Enum
23
+ from typing import Optional
23
24
 
24
25
  import dotenv
25
26
  import kubernetes.client as k8s_client
26
27
  from kubernetes.client import V1Volume, V1VolumeMount
27
28
 
28
29
  import mlrun.common.constants
30
+ import mlrun.common.secrets
29
31
  import mlrun.errors
30
32
  import mlrun.runtimes.mounts
31
33
  import mlrun.utils.regex
@@ -708,19 +710,45 @@ class KubeResource(BaseRuntime):
708
710
  def spec(self, spec):
709
711
  self._spec = self._verify_dict(spec, "spec", KubeResourceSpec)
710
712
 
711
- def set_env_from_secret(self, name, secret=None, secret_key=None):
712
- """set pod environment var from secret"""
713
- secret_key = secret_key or name
713
+ def set_env_from_secret(
714
+ self,
715
+ name: str,
716
+ secret: Optional[str] = None,
717
+ secret_key: Optional[str] = None,
718
+ ):
719
+ """
720
+ Set an environment variable from a Kubernetes Secret.
721
+ Client-side guard forbids MLRun internal auth/project secrets; no-op on API.
722
+ """
723
+ mlrun.common.secrets.validate_not_forbidden_secret(secret)
724
+ key = secret_key or name
714
725
  value_from = k8s_client.V1EnvVarSource(
715
- secret_key_ref=k8s_client.V1SecretKeySelector(name=secret, key=secret_key)
726
+ secret_key_ref=k8s_client.V1SecretKeySelector(name=secret, key=key)
716
727
  )
717
- return self._set_env(name, value_from=value_from)
728
+ return self._set_env(name=name, value_from=value_from)
718
729
 
719
- def set_env(self, name, value=None, value_from=None):
720
- """set pod environment var from value"""
721
- if value is not None:
722
- return self._set_env(name, value=str(value))
723
- return self._set_env(name, value_from=value_from)
730
+ def set_env(
731
+ self,
732
+ name: str,
733
+ value: Optional[str] = None,
734
+ value_from: Optional[typing.Any] = None,
735
+ ):
736
+ """
737
+ Set an environment variable.
738
+ If value comes from a Secret, validate on client-side only.
739
+ """
740
+ if value_from is not None:
741
+ secret_name = self._extract_secret_name_from_value_from(
742
+ value_from=value_from
743
+ )
744
+ if secret_name:
745
+ mlrun.common.secrets.validate_not_forbidden_secret(secret_name)
746
+ return self._set_env(name=name, value_from=value_from)
747
+
748
+ # Plain literal value path
749
+ return self._set_env(
750
+ name=name, value=(str(value) if value is not None else None)
751
+ )
724
752
 
725
753
  def with_annotations(self, annotations: dict):
726
754
  """set a key/value annotations in the metadata of the pod"""
@@ -1366,6 +1394,27 @@ class KubeResource(BaseRuntime):
1366
1394
 
1367
1395
  return self.status.state
1368
1396
 
1397
+ @staticmethod
1398
+ def _extract_secret_name_from_value_from(
1399
+ value_from: typing.Any,
1400
+ ) -> Optional[str]:
1401
+ """Extract secret name from a V1EnvVarSource or dict representation."""
1402
+ if isinstance(value_from, k8s_client.V1EnvVarSource):
1403
+ if value_from.secret_key_ref:
1404
+ return value_from.secret_key_ref.name
1405
+ elif isinstance(value_from, dict):
1406
+ value_from = (
1407
+ value_from.get("valueFrom")
1408
+ or value_from.get("value_from")
1409
+ or value_from
1410
+ )
1411
+ secret_key_ref = (value_from or {}).get("secretKeyRef") or (
1412
+ value_from or {}
1413
+ ).get("secret_key_ref")
1414
+ if isinstance(secret_key_ref, dict):
1415
+ return secret_key_ref.get("name")
1416
+ return None
1417
+
1369
1418
 
1370
1419
  def _resolve_if_type_sanitized(attribute_name, attribute):
1371
1420
  attribute_config = sanitized_attributes[attribute_name]
mlrun/serving/server.py CHANGED
@@ -23,6 +23,7 @@ import os
23
23
  import socket
24
24
  import traceback
25
25
  import uuid
26
+ from collections import defaultdict
26
27
  from datetime import datetime, timezone
27
28
  from typing import Any, Optional, Union
28
29
 
@@ -50,7 +51,7 @@ from ..datastore.store_resources import ResourceCache
50
51
  from ..errors import MLRunInvalidArgumentError
51
52
  from ..execution import MLClientCtx
52
53
  from ..model import ModelObj
53
- from ..utils import get_caller_globals, get_module_name_from_path
54
+ from ..utils import get_caller_globals, get_relative_module_name_from_path
54
55
  from .states import (
55
56
  FlowStep,
56
57
  MonitoredStep,
@@ -522,10 +523,6 @@ def add_system_steps_to_graph(
522
523
  monitor_flow_step.after = [
523
524
  step_name,
524
525
  ]
525
- context.logger.info_with(
526
- "Server graph after adding system steps",
527
- graph=str(graph.steps),
528
- )
529
526
  return graph
530
527
 
531
528
 
@@ -583,7 +580,7 @@ async def async_execute_graph(
583
580
  batch_size: Optional[int],
584
581
  read_as_lists: bool,
585
582
  nest_under_inputs: bool,
586
- ) -> list[Any]:
583
+ ) -> None:
587
584
  # Validate that data parameter is a DataItem and not passed via params
588
585
  if not isinstance(data, DataItem):
589
586
  raise MLRunInvalidArgumentError(
@@ -593,7 +590,7 @@ async def async_execute_graph(
593
590
  f"while 'inputs' is for data files that need to be loaded. "
594
591
  f"Example: run_function(..., inputs={{'data': 'path/to/data.csv'}}, params={{other_config: value}})"
595
592
  )
596
-
593
+ run_call_count = 0
597
594
  spec = mlrun.utils.get_serving_spec()
598
595
  modname = None
599
596
  code = os.getenv("MLRUN_EXEC_CODE")
@@ -607,7 +604,17 @@ async def async_execute_graph(
607
604
  # gets set in local flow and not just in the remote pod
608
605
  source_file_path = spec.get("filename", None)
609
606
  if source_file_path:
610
- modname = get_module_name_from_path(source_file_path)
607
+ source_file_path_object, working_dir_path_object = (
608
+ mlrun.utils.helpers.get_source_and_working_dir_paths(source_file_path)
609
+ )
610
+ if not source_file_path_object.is_relative_to(working_dir_path_object):
611
+ raise mlrun.errors.MLRunRuntimeError(
612
+ f"Source file path '{source_file_path}' is not under the current working directory "
613
+ f"(which is required when running with local=True)"
614
+ )
615
+ modname = get_relative_module_name_from_path(
616
+ source_file_path_object, working_dir_path_object
617
+ )
611
618
 
612
619
  namespace = {}
613
620
  if modname:
@@ -682,7 +689,6 @@ async def async_execute_graph(
682
689
 
683
690
  if config.log_level.lower() == "debug":
684
691
  server.verbose = True
685
- context.logger.info_with("Initializing states", namespace=namespace)
686
692
  kwargs = {}
687
693
  if hasattr(context, "is_mock"):
688
694
  kwargs["is_mock"] = context.is_mock
@@ -700,6 +706,7 @@ async def async_execute_graph(
700
706
  context.logger.info(server.to_yaml())
701
707
 
702
708
  async def run(body):
709
+ nonlocal run_call_count
703
710
  event = storey.Event(id=index, body=body)
704
711
  if timestamp_column:
705
712
  if batching:
@@ -714,6 +721,7 @@ async def async_execute_graph(
714
721
  f"Event body '{body}' did not contain timestamp column '{timestamp_column}'"
715
722
  )
716
723
  event._original_timestamp = body[timestamp_column]
724
+ run_call_count += 1
717
725
  return await server.run(event, context)
718
726
 
719
727
  if batching and not batch_size:
@@ -771,7 +779,31 @@ async def async_execute_graph(
771
779
  model_endpoint_uids=model_endpoint_uids,
772
780
  )
773
781
 
774
- return responses
782
+ # log the results as artifacts
783
+ num_of_meps_in_the_graph = len(server.graph.model_endpoints_names)
784
+ artifact_path = None
785
+ if (
786
+ "{{run.uid}}" not in context.artifact_path
787
+ ): # TODO: delete when IG-22841 is resolved
788
+ artifact_path = "+/{{run.uid}}" # will be concatenated to the context's path in extend_artifact_path
789
+ if num_of_meps_in_the_graph <= 1:
790
+ context.log_dataset(
791
+ "prediction", df=pd.DataFrame(responses), artifact_path=artifact_path
792
+ )
793
+ else:
794
+ # turn this list of samples into a dict of lists, one per model endpoint
795
+ grouped = defaultdict(list)
796
+ for sample in responses:
797
+ for model_name, features in sample.items():
798
+ grouped[model_name].append(features)
799
+ # create a dataframe per model endpoint and log it
800
+ for model_name, features in grouped.items():
801
+ context.log_dataset(
802
+ f"prediction_{model_name}",
803
+ df=pd.DataFrame(features),
804
+ artifact_path=artifact_path,
805
+ )
806
+ context.log_result("num_rows", run_call_count)
775
807
 
776
808
 
777
809
  def _is_inside_asyncio_loop():