PyPI - mlrun - Versions diffs - 1.7.0rc14__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl - Mend

mlrun 1.7.0rc14py3-none-any.whl → 1.7.0rc21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (152) hide show

mlrun/__init__.py +10 -1
mlrun/__main__.py +23 -111
mlrun/alerts/__init__.py +15 -0
mlrun/alerts/alert.py +144 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +36 -253
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +46 -42
mlrun/artifacts/model.py +9 -141
mlrun/artifacts/plots.py +14 -375
mlrun/common/constants.py +65 -3
mlrun/common/formatters/__init__.py +19 -0
mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +10 -5
mlrun/common/schemas/alert.py +92 -11
mlrun/common/schemas/api_gateway.py +56 -0
mlrun/common/schemas/artifact.py +15 -5
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/frontend_spec.py +1 -0
mlrun/common/schemas/function.py +4 -0
mlrun/common/schemas/model_monitoring/__init__.py +15 -3
mlrun/common/schemas/model_monitoring/constants.py +58 -7
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
mlrun/common/schemas/pipeline.py +0 -9
mlrun/common/schemas/project.py +5 -11
mlrun/common/types.py +1 -0
mlrun/config.py +27 -9
mlrun/data_types/to_pandas.py +9 -9
mlrun/datastore/base.py +41 -9
mlrun/datastore/datastore.py +6 -2
mlrun/datastore/datastore_profile.py +56 -4
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/redis.py +2 -2
mlrun/datastore/s3.py +5 -0
mlrun/datastore/sources.py +147 -7
mlrun/datastore/store_resources.py +7 -7
mlrun/datastore/targets.py +110 -42
mlrun/datastore/utils.py +42 -0
mlrun/db/base.py +54 -10
mlrun/db/httpdb.py +282 -79
mlrun/db/nopdb.py +52 -10
mlrun/errors.py +11 -0
mlrun/execution.py +24 -9
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +12 -47
mlrun/feature_store/feature_set.py +9 -0
mlrun/feature_store/feature_vector.py +8 -0
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +9 -9
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +9 -3
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +16 -0
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/parallel_coordinates.py +2 -1
mlrun/frameworks/tf_keras/__init__.py +4 -1
mlrun/k8s_utils.py +10 -11
mlrun/launcher/base.py +4 -3
mlrun/launcher/client.py +5 -3
mlrun/launcher/local.py +8 -2
mlrun/launcher/remote.py +8 -2
mlrun/lists.py +6 -2
mlrun/model.py +45 -21
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +41 -18
mlrun/model_monitoring/application.py +5 -305
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +280 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +3 -1
mlrun/model_monitoring/db/__init__.py +2 -0
mlrun/model_monitoring/db/stores/__init__.py +0 -2
mlrun/model_monitoring/db/stores/base/store.py +22 -37
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
mlrun/model_monitoring/db/tsdb/base.py +329 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/helpers.py +46 -1
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +57 -216
mlrun/model_monitoring/writer.py +134 -124
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +10 -9
mlrun/platforms/iguazio.py +21 -202
mlrun/projects/operations.py +19 -12
mlrun/projects/pipelines.py +79 -102
mlrun/projects/project.py +265 -103
mlrun/render.py +15 -14
mlrun/run.py +16 -46
mlrun/runtimes/__init__.py +6 -3
mlrun/runtimes/base.py +8 -7
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/kubejob.py +2 -1
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +194 -84
mlrun/runtimes/nuclio/application/application.py +170 -8
mlrun/runtimes/nuclio/function.py +39 -49
mlrun/runtimes/pod.py +16 -36
mlrun/runtimes/remotesparkjob.py +9 -3
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +6 -45
mlrun/serving/server.py +2 -1
mlrun/serving/v2_serving.py +5 -1
mlrun/track/tracker.py +2 -1
mlrun/utils/async_http.py +25 -5
mlrun/utils/helpers.py +107 -75
mlrun/utils/logger.py +39 -7
mlrun/utils/notifications/notification/__init__.py +14 -9
mlrun/utils/notifications/notification/base.py +1 -1
mlrun/utils/notifications/notification/slack.py +34 -7
mlrun/utils/notifications/notification/webhook.py +1 -1
mlrun/utils/notifications/notification_pusher.py +147 -16
mlrun/utils/regex.py +9 -0
mlrun/utils/v3io_clients.py +0 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +150 -130
mlrun/kfpops.py +0 -865
mlrun/platforms/other.py +0 -305
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -81,6 +81,8 @@ class EventFieldType:
     DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
     POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
     SAMPLE_PARQUET_PATH = "sample_parquet_path"
+    TIME = "time"
+    TABLE_COLUMN = "table_column"
 class FeatureSetFeatures(MonitoringStrEnum):
@@ -99,14 +101,17 @@ class FeatureSetFeatures(MonitoringStrEnum):
 class ApplicationEvent:
     APPLICATION_NAME = "application_name"
-    CURRENT_STATS = "current_stats"
-    FEATURE_STATS = "feature_stats"
-    SAMPLE_PARQUET_PATH = "sample_parquet_path"
     START_INFER_TIME = "start_infer_time"
     END_INFER_TIME = "end_infer_time"
     LAST_REQUEST = "last_request"
     ENDPOINT_ID = "endpoint_id"
     OUTPUT_STREAM_URI = "output_stream_uri"
+    MLRUN_CONTEXT = "mlrun_context"
+    # Deprecated fields - TODO : delete in 1.9.0  (V1 app deprecation)
+    SAMPLE_PARQUET_PATH = "sample_parquet_path"
+    CURRENT_STATS = "current_stats"
+    FEATURE_STATS = "feature_stats"
 class WriterEvent(MonitoringStrEnum):
@@ -114,6 +119,21 @@ class WriterEvent(MonitoringStrEnum):
     ENDPOINT_ID = "endpoint_id"
     START_INFER_TIME = "start_infer_time"
     END_INFER_TIME = "end_infer_time"
+    EVENT_KIND = "event_kind"  # metric or result
+    DATA = "data"
+class WriterEventKind(MonitoringStrEnum):
+    METRIC = "metric"
+    RESULT = "result"
+class MetricData(MonitoringStrEnum):
+    METRIC_NAME = "metric_name"
+    METRIC_VALUE = "metric_value"
+class ResultData(MonitoringStrEnum):
     RESULT_NAME = "result_name"
     RESULT_VALUE = "result_value"
     RESULT_KIND = "result_kind"
@@ -138,10 +158,6 @@ class EventKeyMetrics:
     REAL_TIME = "real_time"
-class TimeSeriesTarget:
-    TSDB = "tsdb"
 class ModelEndpointTarget:
     V3IO_NOSQL = "v3io-nosql"
     SQL = "sql"
@@ -153,6 +169,7 @@ class ProjectSecretKeys:
     PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
     KAFKA_BROKERS = "KAFKA_BROKERS"
     STREAM_PATH = "STREAM_PATH"
+    TSDB_CONNECTION = "TSDB_CONNECTION"
 class ModelMonitoringStoreKinds:
@@ -170,12 +187,15 @@ class SchedulingKeys:
 class FileTargetKind:
     ENDPOINTS = "endpoints"
     EVENTS = "events"
+    PREDICTIONS = "predictions"
     STREAM = "stream"
     PARQUET = "parquet"
     APPS_PARQUET = "apps_parquet"
     LOG_STREAM = "log_stream"
     APP_RESULTS = "app_results"
+    APP_METRICS = "app_metrics"
     MONITORING_SCHEDULES = "monitoring_schedules"
+    MONITORING_APPLICATION = "monitoring_application"
 class ModelMonitoringMode(str, Enum):
@@ -210,6 +230,18 @@ class MonitoringFunctionNames(MonitoringStrEnum):
     WRITER = "model-monitoring-writer"
+class V3IOTSDBTables(MonitoringStrEnum):
+    APP_RESULTS = "app-results"
+    METRICS = "metrics"
+    EVENTS = "events"
+class TDEngineSuperTables(MonitoringStrEnum):
+    APP_RESULTS = "app_results"
+    METRICS = "metrics"
+    PREDICTIONS = "predictions"
 @dataclass
 class FunctionURI:
     project: str
@@ -286,6 +318,7 @@ class ResultKindApp(Enum):
     concept_drift = 1
     model_performance = 2
     system_performance = 3
+    custom = 4
 class ResultStatusApp(IntEnum):
@@ -303,11 +336,29 @@ class ModelMonitoringAppLabel:
     KEY = "mlrun__type"
     VAL = "mlrun__model-monitoring-application"
+    def __str__(self) -> str:
+        return f"{self.KEY}={self.VAL}"
 class ControllerPolicy:
     BASE_PERIOD = "base_period"
+class TSDBTarget:
+    V3IO_TSDB = "v3io-tsdb"
+    TDEngine = "tdengine"
+    PROMETHEUS = "prometheus"
 class HistogramDataDriftApplicationConstants:
     NAME = "histogram-data-drift"
     GENERAL_RESULT_NAME = "general_drift"
+class PredictionsQueryConstants:
+    DEFAULT_AGGREGATION_GRANULARITY = "10m"
+    INVOCATIONS = "invocations"
+class SpecialApps:
+    MLRUN_INFRA = "mlrun-infra"

mlrun/common/schemas/model_monitoring/grafana.py CHANGED Viewed

@@ -11,12 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 from typing import Optional, Union
 from pydantic import BaseModel
+import mlrun.common.types
+class GrafanaColumnType(mlrun.common.types.StrEnum):
+    NUMBER = "number"
+    STRING = "string"
 class GrafanaColumn(BaseModel):
     text: str
@@ -24,13 +30,11 @@ class GrafanaColumn(BaseModel):
 class GrafanaNumberColumn(GrafanaColumn):
-    text: str
-    type: str = "number"
+    type: str = GrafanaColumnType.NUMBER
 class GrafanaStringColumn(GrafanaColumn):
-    text: str
-    type: str = "string"
+    type: str = GrafanaColumnType.STRING
 class GrafanaTable(BaseModel):

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -11,16 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import enum
 import json
-from typing import Any, Optional
+import re
+from datetime import datetime
+from typing import Any, NamedTuple, Optional
 from pydantic import BaseModel, Field, validator
 from pydantic.main import Extra
 import mlrun.common.model_monitoring
+import mlrun.common.types
 from ..object import ObjectKind, ObjectSpec, ObjectStatus
 from .constants import (
@@ -29,6 +31,8 @@ from .constants import (
     EventKeyMetrics,
     EventLiveStats,
     ModelMonitoringMode,
+    ResultKindApp,
+    ResultStatusApp,
 )
@@ -100,6 +104,7 @@ class ModelEndpointSpec(ObjectSpec):
         )
     @validator("monitor_configuration")
+    @classmethod
     def set_name(cls, monitor_configuration):
         return monitor_configuration or {
             EventFieldType.DRIFT_DETECTED_THRESHOLD: (
@@ -111,6 +116,7 @@ class ModelEndpointSpec(ObjectSpec):
         }
     @validator("model_uri")
+    @classmethod
     def validate_model_uri(cls, model_uri):
         """Validate that the model uri includes the required prefix"""
         prefix, uri = mlrun.datastore.parse_store_uri(model_uri)
@@ -292,6 +298,84 @@ class ModelEndpointList(BaseModel):
     endpoints: list[ModelEndpoint] = []
+class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
+    RESULT = "result"
+    METRIC = "metric"
+class ModelEndpointMonitoringMetric(BaseModel):
+    project: str
+    app: str
+    type: ModelEndpointMonitoringMetricType
+    name: str
+    full_name: str
+def _compose_full_name(
+    *,
+    project: str,
+    app: str,
+    name: str,
+    type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT,
+) -> str:
+    return ".".join([project, app, type, name])
+_FQN_PART_PATTERN = r"[a-zA-Z0-9_-]+"
+_FQN_PATTERN = (
+    rf"^(?P<project>{_FQN_PART_PATTERN})\."
+    rf"(?P<app>{_FQN_PART_PATTERN})\."
+    rf"(?P<type>{ModelEndpointMonitoringMetricType.RESULT}|{ModelEndpointMonitoringMetricType.METRIC})\."
+    rf"(?P<name>{_FQN_PART_PATTERN})$"
+)
+_FQN_REGEX = re.compile(_FQN_PATTERN)
+def _parse_metric_fqn_to_monitoring_metric(fqn: str) -> ModelEndpointMonitoringMetric:
+    match = _FQN_REGEX.fullmatch(fqn)
+    if match is None:
+        raise ValueError("The fully qualified name is not in the expected format")
+    return ModelEndpointMonitoringMetric.parse_obj(
+        match.groupdict() | {"full_name": fqn}
+    )
+class _MetricPoint(NamedTuple):
+    timestamp: datetime
+    value: float
+class _ResultPoint(NamedTuple):
+    timestamp: datetime
+    value: float
+    status: ResultStatusApp
+class _ModelEndpointMonitoringMetricValuesBase(BaseModel):
+    full_name: str
+    type: ModelEndpointMonitoringMetricType
+    data: bool
+class ModelEndpointMonitoringMetricValues(_ModelEndpointMonitoringMetricValuesBase):
+    type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.METRIC
+    values: list[_MetricPoint]
+    data: bool = True
+class ModelEndpointMonitoringResultValues(_ModelEndpointMonitoringMetricValuesBase):
+    type: ModelEndpointMonitoringMetricType = ModelEndpointMonitoringMetricType.RESULT
+    result_kind: ResultKindApp
+    values: list[_ResultPoint]
+    data: bool = True
+class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBase):
+    full_name: str
+    type: ModelEndpointMonitoringMetricType
+    data: bool = False
 def _mapping_attributes(
     base_model: BaseModel,
     flattened_dictionary: dict,

mlrun/common/schemas/pipeline.py CHANGED Viewed

@@ -16,15 +16,6 @@ import typing
 import pydantic
-import mlrun.common.types
-class PipelinesFormat(mlrun.common.types.StrEnum):
-    full = "full"
-    metadata_only = "metadata_only"
-    summary = "summary"
-    name_only = "name_only"
 class PipelinesPagination(str):
     default_page_size = 20

mlrun/common/schemas/project.py CHANGED Viewed

@@ -23,16 +23,6 @@ from .common import ImageBuilder
 from .object import ObjectKind, ObjectStatus
-class ProjectsFormat(mlrun.common.types.StrEnum):
-    full = "full"
-    name_only = "name_only"
-    # minimal format removes large fields from the response (e.g. functions, workflows, artifacts)
-    # and is used for faster response times (in the UI)
-    minimal = "minimal"
-    # internal - allowed only in follower mode, only for the leader for upgrade purposes
-    leader = "leader"
 class ProjectMetadata(pydantic.BaseModel):
     name: str
     created: typing.Optional[datetime.datetime] = None
@@ -113,7 +103,11 @@ class ProjectSummary(pydantic.BaseModel):
     runs_completed_recent_count: int
     runs_failed_recent_count: int
     runs_running_count: int
-    schedules_count: int
+    distinct_schedules_count: int
+    distinct_scheduled_jobs_pending_count: int
+    distinct_scheduled_pipelines_pending_count: int
+    pipelines_completed_recent_count: typing.Optional[int] = None
+    pipelines_failed_recent_count: typing.Optional[int] = None
     pipelines_running_count: typing.Optional[int] = None

mlrun/common/types.py CHANGED Viewed

@@ -29,3 +29,4 @@ class StrEnum(str, enum.Enum):
 class HTTPMethod(StrEnum):
     GET = "GET"
     POST = "POST"
+    DELETE = "DELETE"

mlrun/config.py CHANGED Viewed

@@ -37,6 +37,7 @@ import dotenv
 import semver
 import yaml
+import mlrun.common.constants
 import mlrun.common.schemas
 import mlrun.errors
@@ -87,7 +88,7 @@ default_config = {
     "mpijob_crd_version": "",  # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
     "ipython_widget": True,
     "log_level": "INFO",
-    # log formatter (options: human | json)
+    # log formatter (options: human | human_extended | json)
     "log_formatter": "human",
     "submit_timeout": "180",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
@@ -232,6 +233,10 @@ default_config = {
         "databricks": {
             "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
         },
+        "application": {
+            "default_sidecar_internal_port": 8050,
+            "default_authentication_mode": "accessKey",
+        },
     },
     # TODO: function defaults should be moved to the function spec config above
     "function_defaults": {
@@ -361,12 +366,12 @@ default_config = {
             #                  is set to ClusterIP
             #  ---------------------------------------------------------------------
             # Note: adding a mode requires special handling on
-            # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
+            # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
             # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
             "add_templated_ingress_host_mode": "never",
             "explicit_ack": "enabled",
             # size of serving spec to move to config maps
-            "serving_spec_env_cutoff": 4096,
+            "serving_spec_env_cutoff": 0,
         },
         "logs": {
             "decode": {
@@ -503,6 +508,7 @@ default_config = {
             "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
             "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
             "stream": "",
+            "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
         },
         # Offline storage path can be either relative or a full path. This path is used for general offline data
         # storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
@@ -516,6 +522,9 @@ default_config = {
         # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
         "store_type": "v3io-nosql",
         "endpoint_store_connection": "",
+        # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
+        "tsdb_connector_type": "v3io-tsdb",
+        "tsdb_connection": "",
     },
     "secret_stores": {
         # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -554,7 +563,7 @@ default_config = {
             "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
             # "authority" is optional and generalizes [userinfo "@"] host [":" port]
             "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
-            "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/nosql",
+            "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
         },
         "default_targets": "parquet,nosql",
         "default_job_image": "mlrun/mlrun",
@@ -692,7 +701,10 @@ default_config = {
     "grafana_url": "",
     "alerts": {
         # supported modes: "enabled", "disabled".
-        "mode": "disabled"
+        "mode": "enabled",
+        # maximum number of alerts we allow to be configured.
+        # user will get an error when exceeding this
+        "max_allowed": 1000,
     },
     "auth_with_client_id": {
         "enabled": False,
@@ -797,6 +809,7 @@ class Config:
     ):
         """
         decodes and loads the config attribute to expected type
         :param attribute_path: the path in the default_config e.g. preemptible_nodes.node_selector
         :param expected_type: the object type valid values are : `dict`, `list` etc...
         :return: the expected type instance
@@ -959,6 +972,10 @@ class Config:
         self.httpdb.clusterization.chief.url = chief_api_url
         return self.httpdb.clusterization.chief.url
+    @staticmethod
+    def internal_labels():
+        return mlrun.common.constants.MLRunInternalLabels.all()
     @staticmethod
     def get_storage_auto_mount_params():
         auto_mount_params = {}
@@ -1088,6 +1105,7 @@ class Config:
         target: str = "online",
         artifact_path: str = None,
         function_name: str = None,
+        **kwargs,
     ) -> typing.Union[str, list[str]]:
         """Get the full path from the configuration based on the provided project and kind.
@@ -1114,7 +1132,7 @@ class Config:
             )
             if store_prefix_dict.get(kind):
                 # Target exist in store prefix and has a valid string value
-                return store_prefix_dict[kind].format(project=project)
+                return store_prefix_dict[kind].format(project=project, **kwargs)
             if (
                 function_name
@@ -1399,14 +1417,14 @@ def read_env(env=None, prefix=env_prefix):
     if log_formatter_name := config.get("log_formatter"):
         import mlrun.utils.logger
-        log_formatter = mlrun.utils.create_formatter_instance(
+        log_formatter = mlrun.utils.resolve_formatter_by_kind(
             mlrun.utils.FormatterKinds(log_formatter_name)
         )
         current_handler = mlrun.utils.logger.get_handler("default")
         current_formatter_name = current_handler.formatter.__class__.__name__
-        desired_formatter_name = log_formatter.__class__.__name__
+        desired_formatter_name = log_formatter.__name__
         if current_formatter_name != desired_formatter_name:
-            current_handler.setFormatter(log_formatter)
+            current_handler.setFormatter(log_formatter())
     # The default function pod resource values are of type str; however, when reading from environment variable numbers,
     # it converts them to type int if contains only number, so we want to convert them to str.

mlrun/data_types/to_pandas.py CHANGED Viewed

@@ -65,10 +65,10 @@ def toPandas(spark_df):
                 msg = (
                     "toPandas attempted Arrow optimization because "
                     "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
-                    "failed by the reason below:\n  %s\n"
+                    f"failed by the reason below:\n  {e}\n"
                     "Attempting non-optimization as "
                     "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
-                    "true." % str(e)
+                    "true."
                 )
                 warnings.warn(msg)
                 use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
                     "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
                     "reached the error below and will not continue because automatic fallback "
                     "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
-                    "false.\n  %s" % str(e)
+                    f"false.\n  {e}"
                 )
                 warnings.warn(msg)
                 raise
@@ -144,7 +144,7 @@ def toPandas(spark_df):
                     "reached the error below and can not continue. Note that "
                     "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
                     "effect on failures in the middle of "
-                    "computation.\n  %s" % str(e)
+                    f"computation.\n  {e}"
                 )
                 warnings.warn(msg)
                 raise
@@ -154,10 +154,10 @@ def toPandas(spark_df):
     column_counter = Counter(spark_df.columns)
     dtype = [None] * len(spark_df.schema)
-    for fieldIdx, field in enumerate(spark_df.schema):
+    for field_idx, field in enumerate(spark_df.schema):
         # For duplicate column name, we use `iloc` to access it.
         if column_counter[field.name] > 1:
-            pandas_col = pdf.iloc[:, fieldIdx]
+            pandas_col = pdf.iloc[:, field_idx]
         else:
             pandas_col = pdf[field.name]
@@ -171,12 +171,12 @@ def toPandas(spark_df):
             and field.nullable
             and pandas_col.isnull().any()
         ):
-            dtype[fieldIdx] = pandas_type
+            dtype[field_idx] = pandas_type
         # Ensure we fall back to nullable numpy types, even when whole column is null:
         if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
-            dtype[fieldIdx] = np.float64
+            dtype[field_idx] = np.float64
         if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
-            dtype[fieldIdx] = object
+            dtype[field_idx] = object
     df = pd.DataFrame()
     for index, t in enumerate(dtype):

mlrun/datastore/base.py CHANGED Viewed

@@ -179,11 +179,23 @@ class DataStore:
         return {}
     @staticmethod
-    def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
+    def _parquet_reader(
+        df_module,
+        url,
+        file_system,
+        time_column,
+        start_time,
+        end_time,
+        additional_filters,
+    ):
         from storey.utils import find_filters, find_partitions
         def set_filters(
-            partitions_time_attributes, start_time_inner, end_time_inner, kwargs
+            partitions_time_attributes,
+            start_time_inner,
+            end_time_inner,
+            filters_inner,
+            kwargs,
         ):
             filters = []
             find_filters(
@@ -193,20 +205,23 @@ class DataStore:
                 filters,
                 time_column,
             )
+            if filters and filters_inner:
+                filters[0] += filters_inner
             kwargs["filters"] = filters
         def reader(*args, **kwargs):
-            if start_time or end_time:
-                if time_column is None:
-                    raise mlrun.errors.MLRunInvalidArgumentError(
-                        "When providing start_time or end_time, must provide time_column"
-                    )
+            if time_column is None and (start_time or end_time):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "When providing start_time or end_time, must provide time_column"
+                )
+            if start_time or end_time or additional_filters:
                 partitions_time_attributes = find_partitions(url, file_system)
                 set_filters(
                     partitions_time_attributes,
                     start_time,
                     end_time,
+                    additional_filters,
                     kwargs,
                 )
                 try:
@@ -217,6 +232,7 @@ class DataStore:
                     ):
                         raise ex
+                    # TODO: fix timezone issue (ML-6308)
                     if start_time.tzinfo:
                         start_time_inner = start_time.replace(tzinfo=None)
                         end_time_inner = end_time.replace(tzinfo=None)
@@ -228,6 +244,7 @@ class DataStore:
                         partitions_time_attributes,
                         start_time_inner,
                         end_time_inner,
+                        additional_filters,
                         kwargs,
                     )
                     return df_module.read_parquet(*args, **kwargs)
@@ -246,6 +263,7 @@ class DataStore:
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         df_module = df_module or pd
@@ -310,7 +328,13 @@ class DataStore:
                 kwargs["columns"] = columns
             reader = self._parquet_reader(
-                df_module, url, file_system, time_column, start_time, end_time
+                df_module,
+                url,
+                file_system,
+                time_column,
+                start_time,
+                end_time,
+                additional_filters,
             )
         elif file_url.endswith(".json") or format == "json":
@@ -539,6 +563,7 @@ class DataItem:
         time_column=None,
         start_time=None,
         end_time=None,
+        additional_filters=None,
         **kwargs,
     ):
         """return a dataframe object (generated from the dataitem).
@@ -550,6 +575,12 @@ class DataItem:
         :param end_time:    filters out data after this time
         :param time_column: Store timestamp_key will be used if None.
                             The results will be filtered by this column and start_time & end_time.
+        :param additional_filters: List of additional_filter conditions as tuples.
+                                    Each tuple should be in the format (column_name, operator, value).
+                                    Supported operators: "=", ">=", "<=", ">", "<".
+                                    Example: [("Product", "=", "Computer")]
+                                    For all supported filters, please see:
+                                    https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
         """
         df = self._store.as_df(
             self._url,
@@ -560,6 +591,7 @@ class DataItem:
             time_column=time_column,
             start_time=start_time,
             end_time=end_time,
+            additional_filters=additional_filters,
             **kwargs,
         )
         return df

mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc14py3-none-any.whl → 1.7.0rc21py3-none-any.whl