PyPI - mlrun - Versions diffs - 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl - Mend

mlrun 1.8.0rc19py3-none-any.whl → 1.8.0rc26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (52) hide show

mlrun/__init__.py +37 -3
mlrun/__main__.py +5 -0
mlrun/alerts/alert.py +1 -0
mlrun/artifacts/document.py +78 -36
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/runtimes/constants.py +17 -0
mlrun/common/schemas/alert.py +3 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/model_monitoring/constants.py +32 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
mlrun/common/schemas/workflow.py +1 -0
mlrun/config.py +39 -6
mlrun/datastore/datastore_profile.py +58 -16
mlrun/datastore/sources.py +7 -1
mlrun/datastore/vectorstore.py +20 -1
mlrun/db/base.py +20 -0
mlrun/db/httpdb.py +97 -10
mlrun/db/nopdb.py +19 -0
mlrun/errors.py +4 -0
mlrun/execution.py +15 -6
mlrun/frameworks/_common/model_handler.py +0 -2
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +5 -1
mlrun/model_monitoring/applications/_application_steps.py +3 -1
mlrun/model_monitoring/controller.py +266 -103
mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +20 -21
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -34
mlrun/model_monitoring/helpers.py +16 -10
mlrun/model_monitoring/stream_processing.py +106 -35
mlrun/package/context_handler.py +1 -1
mlrun/package/packagers_manager.py +4 -18
mlrun/projects/pipelines.py +18 -5
mlrun/projects/project.py +156 -39
mlrun/runtimes/nuclio/serving.py +22 -13
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/secrets.py +1 -1
mlrun/serving/server.py +11 -3
mlrun/serving/states.py +65 -8
mlrun/serving/v2_serving.py +67 -44
mlrun/utils/helpers.py +111 -23
mlrun/utils/notifications/notification/base.py +6 -1
mlrun/utils/notifications/notification/slack.py +5 -1
mlrun/utils/notifications/notification_pusher.py +67 -36
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +52 -52
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0

mlrun/__init__.py CHANGED Viewed

@@ -213,7 +213,41 @@ def set_env_from_file(env_file: str, return_dict: bool = False) -> Optional[dict
     env_vars = dotenv.dotenv_values(env_file)
     if None in env_vars.values():
         raise MLRunInvalidArgumentError("env file lines must be in the form key=value")
-    for key, value in env_vars.items():
-        environ[key] = value  # Load to local environ
+    ordered_env_vars = order_env_vars(env_vars)
+    for key, value in ordered_env_vars.items():
+        environ[key] = value
     mlconf.reload()  # reload mlrun configuration
-    return env_vars if return_dict else None
+    return ordered_env_vars if return_dict else None
+def order_env_vars(env_vars: dict[str, str]) -> dict[str, str]:
+    """
+    Order and process environment variables by first handling specific ordered keys,
+    then processing the remaining keys in the given dictionary.
+    The function ensures that environment variables defined in the `ordered_keys` list
+    are added to the result dictionary first. Any other environment variables from
+    `env_vars` are then added in the order they appear in the input dictionary.
+    :param env_vars: A dictionary where each key is the name of an environment variable (str),
+                      and each value is the corresponding environment variable value (str).
+    :return: A dictionary with the processed environment variables, ordered with the specific
+             keys first, followed by the rest in their original order.
+    """
+    ordered_keys = mlconf.get_ordered_keys()
+    ordered_env_vars: dict[str, str] = {}
+    # First, add the ordered keys to the dictionary
+    for key in ordered_keys:
+        if key in env_vars:
+            ordered_env_vars[key] = env_vars[key]
+    # Then, add the remaining keys (those not in ordered_keys)
+    for key, value in env_vars.items():
+        if key not in ordered_keys:
+            ordered_env_vars[key] = value
+    return ordered_env_vars

mlrun/__main__.py CHANGED Viewed

@@ -32,6 +32,7 @@ from tabulate import tabulate
 import mlrun
 import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas
+import mlrun.utils.helpers
 from mlrun.common.helpers import parse_versioned_object_uri
 from mlrun.runtimes.mounts import auto_mount as auto_mount_modifier
@@ -304,6 +305,7 @@ def run(
                 update_in(runtime, "spec.build.code_origin", url_file)
     elif runtime:
         runtime = py_eval(runtime)
+        runtime = mlrun.utils.helpers.as_dict(runtime)
         if not isinstance(runtime, dict):
             print(f"Runtime parameter must be a dict, not {type(runtime)}")
             exit(1)
@@ -515,6 +517,7 @@ def build(
     if runtime:
         runtime = py_eval(runtime)
+        runtime = mlrun.utils.helpers.as_dict(runtime)
         if not isinstance(runtime, dict):
             print(f"Runtime parameter must be a dict, not {type(runtime)}")
             exit(1)
@@ -662,6 +665,8 @@ def deploy(
         runtime = py_eval(spec)
     else:
         runtime = {}
+    runtime = mlrun.utils.helpers.as_dict(runtime)
     if not isinstance(runtime, dict):
         print(f"Runtime parameter must be a dict, not {type(runtime)}")
         exit(1)

mlrun/alerts/alert.py CHANGED Viewed

@@ -57,6 +57,7 @@ class AlertConfig(ModelObj):
         created: Optional[str] = None,
         count: Optional[int] = None,
         updated: Optional[str] = None,
+        **kwargs,
     ):
         """Alert config object

mlrun/artifacts/document.py CHANGED Viewed

@@ -20,10 +20,12 @@ from importlib import import_module
 from typing import Optional, Union
 import mlrun
+import mlrun.artifacts
 from mlrun.artifacts import Artifact, ArtifactSpec
 from mlrun.model import ModelObj
 from ..utils import generate_artifact_uri
+from .base import ArtifactStatus
 class DocumentLoaderSpec(ModelObj):
@@ -41,13 +43,13 @@ class DocumentLoaderSpec(ModelObj):
     """
-    _dict_fields = ["loader_class_name", "src_name", "kwargs"]
+    _dict_fields = ["loader_class_name", "src_name", "download_object", "kwargs"]
     def __init__(
         self,
         loader_class_name: str = "langchain_community.document_loaders.TextLoader",
         src_name: str = "file_path",
-        download_object: bool = False,
+        download_object: bool = True,
         kwargs: Optional[dict] = None,
     ):
         """
@@ -191,6 +193,14 @@ class MLRunLoader:
                     self.producer = mlrun.get_or_create_project(self.producer)
             def lazy_load(self) -> Iterator["Document"]:  # noqa: F821
+                collections = None
+                try:
+                    artifact = self.producer.get_artifact(self.artifact_key, self.tag)
+                    collections = (
+                        artifact.status.collections if artifact else collections
+                    )
+                except mlrun.MLRunNotFoundError:
+                    pass
                 artifact = self.producer.log_document(
                     key=self.artifact_key,
                     document_loader_spec=self.loader_spec,
@@ -198,6 +208,7 @@ class MLRunLoader:
                     upload=self.upload,
                     labels=self.labels,
                     tag=self.tag,
+                    collections=collections,
                 )
                 res = artifact.to_langchain_documents()
                 return res
@@ -217,30 +228,8 @@ class MLRunLoader:
     @staticmethod
     def artifact_key_instance(artifact_key: str, src_path: str) -> str:
         if "%%" in artifact_key:
-            pattern = mlrun.utils.regex.artifact_key[0]
-            # Convert anchored pattern (^...$) to non-anchored version for finditer
-            search_pattern = pattern.strip("^$")
-            result = []
-            current_pos = 0
-            # Find all valid sequences
-            for match in re.finditer(search_pattern, src_path):
-                # Add hex values for characters between matches
-                for char in src_path[current_pos : match.start()]:
-                    result.append(hex(ord(char))[2:].zfill(2))
-                # Add the valid sequence
-                result.append(match.group())
-                current_pos = match.end()
-            # Handle any remaining characters after the last match
-            for char in src_path[current_pos:]:
-                result.append(hex(ord(char))[2:].zfill(2))
-            resolved_path = "".join(result)
+            resolved_path = DocumentArtifact.key_from_source(src_path)
             artifact_key = artifact_key.replace("%%", resolved_path)
         return artifact_key
@@ -249,29 +238,70 @@ class DocumentArtifact(Artifact):
     A specific artifact class inheriting from generic artifact, used to maintain Document meta-data.
     """
+    @staticmethod
+    def key_from_source(src_path: str) -> str:
+        """Convert a source path into a valid artifact key by replacing invalid characters with underscores.
+        Args:
+            src_path (str): The source path to be converted into a valid artifact key
+        Returns:
+            str: A modified version of the source path where all invalid characters are replaced
+                with underscores while preserving valid sequences in their original positions
+        Examples:
+            >>> DocumentArtifact.key_from_source("data/file-name(v1).txt")
+            "data_file-name_v1__txt"
+        """
+        pattern = mlrun.utils.regex.artifact_key[0]
+        # Convert anchored pattern (^...$) to non-anchored version for finditer
+        search_pattern = pattern.strip("^$")
+        result = []
+        current_pos = 0
+        # Find all valid sequences
+        for match in re.finditer(search_pattern, src_path):
+            # Add '_' values for characters between matches
+            for char in src_path[current_pos : match.start()]:
+                result.append("_")
+            # Add the valid sequence
+            result.append(match.group())
+            current_pos = match.end()
+        # Handle any remaining characters after the last match
+        for char in src_path[current_pos:]:
+            result.append("_")
+        resolved_path = "".join(result)
+        return resolved_path
     class DocumentArtifactSpec(ArtifactSpec):
         _dict_fields = ArtifactSpec._dict_fields + [
             "document_loader",
-            "collections",
             "original_source",
         ]
-        _exclude_fields_from_uid_hash = ArtifactSpec._exclude_fields_from_uid_hash + [
-            "collections",
-        ]
         def __init__(
             self,
             *args,
             document_loader: Optional[DocumentLoaderSpec] = None,
-            collections: Optional[dict] = None,
             original_source: Optional[str] = None,
             **kwargs,
         ):
             super().__init__(*args, **kwargs)
             self.document_loader = document_loader
-            self.collections = collections if collections is not None else {}
             self.original_source = original_source
+    class DocumentArtifactStatus(ArtifactStatus):
+        _dict_fields = ArtifactStatus._dict_fields + ["collections"]
+        def __init__(
+            self,
+            *args,
+            collections: Optional[dict] = None,
+            **kwargs,
+        ):
+            super().__init__(*args, **kwargs)
+            self.collections = collections if collections is not None else {}
     kind = "document"
     METADATA_SOURCE_KEY = "source"
@@ -286,6 +316,7 @@ class DocumentArtifact(Artifact):
         self,
         original_source: Optional[str] = None,
         document_loader_spec: Optional[DocumentLoaderSpec] = None,
+        collections: Optional[dict] = None,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -295,6 +326,17 @@ class DocumentArtifact(Artifact):
             else self.spec.document_loader
         )
         self.spec.original_source = original_source or self.spec.original_source
+        self.status = DocumentArtifact.DocumentArtifactStatus(collections=collections)
+    @property
+    def status(self) -> DocumentArtifactStatus:
+        return self._status
+    @status.setter
+    def status(self, status):
+        self._status = self._verify_dict(
+            status, "status", DocumentArtifact.DocumentArtifactStatus
+        )
     @property
     def spec(self) -> DocumentArtifactSpec:
@@ -355,7 +397,7 @@ class DocumentArtifact(Artifact):
             metadata[self.METADATA_ORIGINAL_SOURCE_KEY] = self.spec.original_source
             metadata[self.METADATA_SOURCE_KEY] = self.get_source()
             metadata[self.METADATA_ARTIFACT_TAG] = self.tag or "latest"
-            metadata[self.METADATA_ARTIFACT_KEY] = self.key
+            metadata[self.METADATA_ARTIFACT_KEY] = self.db_key
             metadata[self.METADATA_ARTIFACT_PROJECT] = self.metadata.project
             if self.get_target_path():
@@ -386,8 +428,8 @@ class DocumentArtifact(Artifact):
         Args:
             collection_id (str): The ID of the collection to add
         """
-        if collection_id not in self.spec.collections:
-            self.spec.collections[collection_id] = "1"
+        if collection_id not in self.status.collections:
+            self.status.collections[collection_id] = "1"
             return True
         return False
@@ -403,7 +445,7 @@ class DocumentArtifact(Artifact):
         Args:
             collection_id (str): The ID of the collection to remove
         """
-        if collection_id in self.spec.collections:
-            self.spec.collections.pop(collection_id)
+        if collection_id in self.status.collections:
+            self.status.collections.pop(collection_id)
             return True
         return False

mlrun/common/formatters/feature_set.py CHANGED Viewed

@@ -33,6 +33,7 @@ class FeatureSetFormat(ObjectFormat, mlrun.common.types.StrEnum):
                     "metadata.name",
                     "metadata.project",
                     "metadata.tag",
+                    "metadata.updated",
                     "metadata.uid",
                     "metadata.labels",
                     "spec.entities",

mlrun/common/runtimes/constants.py CHANGED Viewed

@@ -214,6 +214,23 @@ class RunStates:
             RunStates.skipped: mlrun_pipelines.common.models.RunStatuses.skipped,
         }[run_state]
+    @staticmethod
+    def pipeline_run_status_to_run_state(pipeline_run_status):
+        if pipeline_run_status not in mlrun_pipelines.common.models.RunStatuses.all():
+            raise ValueError(f"Invalid pipeline run status: {pipeline_run_status}")
+        return {
+            mlrun_pipelines.common.models.RunStatuses.succeeded: RunStates.completed,
+            mlrun_pipelines.common.models.RunStatuses.failed: RunStates.error,
+            mlrun_pipelines.common.models.RunStatuses.running: RunStates.running,
+            mlrun_pipelines.common.models.RunStatuses.pending: RunStates.pending,
+            mlrun_pipelines.common.models.RunStatuses.canceled: RunStates.aborted,
+            mlrun_pipelines.common.models.RunStatuses.canceling: RunStates.aborting,
+            mlrun_pipelines.common.models.RunStatuses.skipped: RunStates.skipped,
+            mlrun_pipelines.common.models.RunStatuses.runtime_state_unspecified: RunStates.unknown,
+            mlrun_pipelines.common.models.RunStatuses.error: RunStates.error,
+            mlrun_pipelines.common.models.RunStatuses.paused: RunStates.unknown,
+        }[pipeline_run_status]
 # TODO: remove this class in 1.9.0 - use only MlrunInternalLabels
 class RunLabels(enum.Enum):

mlrun/common/schemas/alert.py CHANGED Viewed

@@ -160,6 +160,9 @@ class AlertConfig(pydantic.v1.BaseModel):
     count: Optional[int] = 0
     updated: datetime = None
+    class Config:
+        extra = pydantic.v1.Extra.allow
     def get_raw_notifications(self) -> list[notification_objects.Notification]:
         return [
             alert_notification.notification for alert_notification in self.notifications

mlrun/common/schemas/client_spec.py CHANGED Viewed

@@ -57,7 +57,6 @@ class ClientSpec(pydantic.v1.BaseModel):
     redis_url: typing.Optional[str]
     redis_type: typing.Optional[str]
     sql_url: typing.Optional[str]
-    model_monitoring_tsdb_connection: typing.Optional[str]
     ce: typing.Optional[dict]
     # not passing them as one object as it possible client user would like to override only one of the params
     calculate_artifact_hash: typing.Optional[str]

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -61,6 +61,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
     STATE = "state"
     MONITORING_MODE = "monitoring_mode"
     FIRST_REQUEST = "first_request"
+    SAMPLING_PERCENTAGE = "sampling_percentage"
     # status - operative
     LAST_REQUEST = "last_request"
@@ -137,6 +138,10 @@ class EventFieldType:
     SAMPLE_PARQUET_PATH = "sample_parquet_path"
     TIME = "time"
     TABLE_COLUMN = "table_column"
+    SAMPLING_PERCENTAGE = "sampling_percentage"
+    SAMPLING_RATE = "sampling_rate"
+    ESTIMATED_PREDICTION_COUNT = "estimated_prediction_count"
+    EFFECTIVE_SAMPLE_COUNT = "effective_sample_count"
 class FeatureSetFeatures(MonitoringStrEnum):
@@ -178,6 +183,25 @@ class WriterEventKind(MonitoringStrEnum):
     STATS = "stats"
+class ControllerEvent(MonitoringStrEnum):
+    KIND = "kind"
+    ENDPOINT_ID = "endpoint_id"
+    ENDPOINT_NAME = "endpoint_name"
+    PROJECT = "project"
+    TIMESTAMP = "timestamp"
+    FIRST_REQUEST = "first_request"
+    FEATURE_SET_URI = "feature_set_uri"
+    ENDPOINT_TYPE = "endpoint_type"
+    ENDPOINT_POLICY = "endpoint_policy"
+    # Note: currently under endpoint policy we will have a dictionary including the keys: "application_names"
+    # and "base_period"
+class ControllerEventKind(MonitoringStrEnum):
+    NOP_EVENT = "nop_event"
+    REGULAR_EVENT = "regular_event"
 class MetricData(MonitoringStrEnum):
     METRIC_NAME = "metric_name"
     METRIC_VALUE = "metric_value"
@@ -223,28 +247,26 @@ class ModelEndpointTarget(MonitoringStrEnum):
     SQL = "sql"
-class StreamKind(MonitoringStrEnum):
-    V3IO_STREAM = "v3io_stream"
-    KAFKA = "kafka"
 class TSDBTarget(MonitoringStrEnum):
     V3IO_TSDB = "v3io-tsdb"
     TDEngine = "tdengine"
+class DefaultProfileName(StrEnum):
+    STREAM = "mm-infra-stream"
+    TSDB = "mm-infra-tsdb"
 class ProjectSecretKeys:
     ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
-    STREAM_PATH = "STREAM_PATH"
-    TSDB_CONNECTION = "TSDB_CONNECTION"
     TSDB_PROFILE_NAME = "TSDB_PROFILE_NAME"
     STREAM_PROFILE_NAME = "STREAM_PROFILE_NAME"
     @classmethod
     def mandatory_secrets(cls):
         return [
-            cls.STREAM_PATH,
-            cls.TSDB_CONNECTION,
+            cls.STREAM_PROFILE_NAME,
+            cls.TSDB_PROFILE_NAME,
         ]
@@ -306,6 +328,7 @@ class V3IOTSDBTables(MonitoringStrEnum):
     METRICS = "metrics"
     EVENTS = "events"
     ERRORS = "errors"
+    PREDICTIONS = "predictions"
 class TDEngineSuperTables(MonitoringStrEnum):

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -160,6 +160,7 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
     state: Optional[str] = "unknown"  # will be updated according to the function state
     first_request: Optional[datetime] = None
     monitoring_mode: Optional[ModelMonitoringMode] = ModelMonitoringMode.disabled
+    sampling_percentage: Optional[float] = 100
     # operative
     last_request: Optional[datetime] = None
@@ -177,6 +178,7 @@ class ModelEndpointStatus(ObjectStatus, ModelEndpointParser):
             "monitoring_mode",
             "first_request",
             "last_request",
+            "sampling_percentage",
         ]

mlrun/common/schemas/workflow.py CHANGED Viewed

@@ -62,3 +62,4 @@ class EngineType(StrEnum):
     LOCAL = "local"
     REMOTE = "remote"
     KFP = "kfp"
+    REMOTE_KFP = "remote:kfp"

mlrun/config.py CHANGED Viewed

@@ -537,6 +537,8 @@ default_config = {
         },
         "pagination": {
             "default_page_size": 200,
+            "page_limit": 1000000,
+            "page_size_limit": 1000000,
             "pagination_cache": {
                 "interval": 60,
                 "ttl": 3600,
@@ -594,6 +596,22 @@ default_config = {
                 "max_replicas": 1,
             },
         },
+        "controller_stream_args": {
+            "v3io": {
+                "shard_count": 10,
+                "retention_period_hours": 24,
+                "num_workers": 10,
+                "min_replicas": 1,
+                "max_replicas": 1,
+            },
+            "kafka": {
+                "partition_count": 10,
+                "replication_factor": 1,
+                "num_workers": 10,
+                "min_replicas": 1,
+                "max_replicas": 1,
+            },
+        },
         # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
         # stream, and endpoints.
         "store_prefixes": {
@@ -606,10 +624,6 @@ default_config = {
         "offline_storage_path": "model-endpoints/{kind}",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
-        "tsdb_connection": "",
-        # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
-        "stream_connection": "",
         "tdengine": {
             "timeout": 10,
             "retries": 1,
@@ -727,6 +741,7 @@ default_config = {
     },
     "workflows": {
         "default_workflow_runner_name": "workflow-runner-{}",
+        "concurrent_delete_worker_count": 20,
         # Default timeout seconds for retrieving workflow id after execution
         # Remote workflow timeout is the maximum between remote and the inner engine timeout
         "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
@@ -799,7 +814,7 @@ default_config = {
         # maximum allowed value for count in criteria field inside AlertConfig
         "max_criteria_count": 100,
         # interval for periodic events generation job
-        "events_generation_interval": "30",
+        "events_generation_interval": 30,  # seconds
     },
     "auth_with_client_id": {
         "enabled": False,
@@ -1282,6 +1297,8 @@ class Config:
                 function_name
                 and function_name
                 != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
+                and function_name
+                != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
             ):
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
                     project=project,
@@ -1289,12 +1306,21 @@ class Config:
                     if function_name is None
                     else f"{kind}-{function_name.lower()}",
                 )
-            elif kind == "stream":
+            elif (
+                kind == "stream"
+                and function_name
+                != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
+            ):
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
                     project=project,
                     kind=kind,
                 )
             else:
+                if (
+                    function_name
+                    == mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.APPLICATION_CONTROLLER
+                ):
+                    kind = function_name
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
                     project=project,
                     kind=kind,
@@ -1363,6 +1389,13 @@ class Config:
             >= semver.VersionInfo.parse("1.12.10")
         )
+    @staticmethod
+    def get_ordered_keys():
+        # Define the keys to process first
+        return [
+            "MLRUN_HTTPDB__HTTP__VERIFY"  # Ensure this key is processed first for proper connection setup
+        ]
 # Global configuration
 config = Config.from_dict(default_config)

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -17,7 +17,7 @@ import base64
 import json
 import typing
 import warnings
-from urllib.parse import ParseResult, urlparse, urlunparse
+from urllib.parse import ParseResult, urlparse
 import pydantic.v1
 from mergedeep import merge
@@ -211,9 +211,10 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
             attributes["partitions"] = self.partitions
         sasl = attributes.pop("sasl", {})
         if self.sasl_user and self.sasl_pass:
-            sasl["enabled"] = True
+            sasl["enable"] = True
             sasl["user"] = self.sasl_user
             sasl["password"] = self.sasl_pass
+            sasl["mechanism"] = "PLAIN"
         if sasl:
             attributes["sasl"] = sasl
         return attributes
@@ -312,7 +313,7 @@ class DatastoreProfileRedis(DatastoreProfile):
             query=parsed_url.query,
             fragment=parsed_url.fragment,
         )
-        return urlunparse(new_parsed_url)
+        return new_parsed_url.geturl()
     def secrets(self) -> dict:
         res = {}
@@ -473,6 +474,59 @@ class DatastoreProfileHdfs(DatastoreProfile):
         return f"webhdfs://{self.host}:{self.http_port}{subpath}"
+class TDEngineDatastoreProfile(DatastoreProfile):
+    """
+    A profile that holds the required parameters for a TDEngine database, with the websocket scheme.
+    https://docs.tdengine.com/developer-guide/connecting-to-tdengine/#websocket-connection
+    """
+    type: str = pydantic.v1.Field("taosws")
+    _private_attributes = ["password"]
+    user: str
+    # The password cannot be empty in real world scenarios. It's here just because of the profiles completion design.
+    password: typing.Optional[str]
+    host: str
+    port: int
+    def dsn(self) -> str:
+        """Get the Data Source Name of the configured TDEngine profile."""
+        return f"{self.type}://{self.user}:{self.password}@{self.host}:{self.port}"
+    @classmethod
+    def from_dsn(cls, dsn: str, profile_name: str) -> "TDEngineDatastoreProfile":
+        """
+        Construct a TDEngine profile from DSN (connection string) and a name for the profile.
+        :param dsn:          The DSN (Data Source Name) of the TDEngine database, e.g.: ``"taosws://root:taosdata@localhost:6041"``.
+        :param profile_name: The new profile's name.
+        :return:             The TDEngine profile.
+        """
+        parsed_url = urlparse(dsn)
+        return cls(
+            name=profile_name,
+            user=parsed_url.username,
+            password=parsed_url.password,
+            host=parsed_url.hostname,
+            port=parsed_url.port,
+        )
+_DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
+    "v3io": DatastoreProfileV3io,
+    "s3": DatastoreProfileS3,
+    "redis": DatastoreProfileRedis,
+    "basic": DatastoreProfileBasic,
+    "kafka_target": DatastoreProfileKafkaTarget,
+    "kafka_source": DatastoreProfileKafkaSource,
+    "dbfs": DatastoreProfileDBFS,
+    "gcs": DatastoreProfileGCS,
+    "az": DatastoreProfileAzureBlob,
+    "hdfs": DatastoreProfileHdfs,
+    "taosws": TDEngineDatastoreProfile,
+    "config": ConfigProfile,
+}
 class DatastoreProfile2Json(pydantic.v1.BaseModel):
     @staticmethod
     def _to_json(attributes):
@@ -523,19 +577,7 @@ class DatastoreProfile2Json(pydantic.v1.BaseModel):
         decoded_dict = {k: safe_literal_eval(v) for k, v in decoded_dict.items()}
         datastore_type = decoded_dict.get("type")
-        ds_profile_factory = {
-            "v3io": DatastoreProfileV3io,
-            "s3": DatastoreProfileS3,
-            "redis": DatastoreProfileRedis,
-            "basic": DatastoreProfileBasic,
-            "kafka_target": DatastoreProfileKafkaTarget,
-            "kafka_source": DatastoreProfileKafkaSource,
-            "dbfs": DatastoreProfileDBFS,
-            "gcs": DatastoreProfileGCS,
-            "az": DatastoreProfileAzureBlob,
-            "hdfs": DatastoreProfileHdfs,
-            "config": ConfigProfile,
-        }
+        ds_profile_factory = _DATASTORE_TYPE_TO_PROFILE_CLASS
         if datastore_type in ds_profile_factory:
             return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
         else:

mlrun 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc19py3-none-any.whl → 1.8.0rc26py3-none-any.whl