PyPI - mlrun - Versions diffs - 1.7.0rc26__py3-none-any.whl → 1.7.0rc29__py3-none-any.whl - Mend

mlrun 1.7.0rc26py3-none-any.whl → 1.7.0rc29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (66) hide show

mlrun/__main__.py +7 -7
mlrun/alerts/alert.py +13 -1
mlrun/artifacts/manager.py +5 -0
mlrun/common/constants.py +2 -2
mlrun/common/formatters/base.py +9 -9
mlrun/common/schemas/alert.py +4 -8
mlrun/common/schemas/api_gateway.py +7 -0
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +27 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
mlrun/common/schemas/schedule.py +1 -1
mlrun/config.py +16 -9
mlrun/datastore/azure_blob.py +2 -1
mlrun/datastore/base.py +1 -5
mlrun/datastore/datastore.py +3 -3
mlrun/datastore/inmem.py +1 -1
mlrun/datastore/snowflake_utils.py +3 -1
mlrun/datastore/sources.py +26 -11
mlrun/datastore/store_resources.py +2 -0
mlrun/datastore/targets.py +60 -25
mlrun/db/base.py +10 -0
mlrun/db/httpdb.py +41 -30
mlrun/db/nopdb.py +10 -1
mlrun/errors.py +4 -0
mlrun/execution.py +18 -10
mlrun/feature_store/retrieval/spark_merger.py +2 -1
mlrun/launcher/local.py +2 -2
mlrun/model.py +30 -0
mlrun/model_monitoring/api.py +6 -52
mlrun/model_monitoring/applications/histogram_data_drift.py +4 -1
mlrun/model_monitoring/db/stores/__init__.py +21 -9
mlrun/model_monitoring/db/stores/base/store.py +39 -1
mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +4 -2
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +34 -79
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -27
mlrun/model_monitoring/db/tsdb/__init__.py +19 -14
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +4 -2
mlrun/model_monitoring/helpers.py +9 -5
mlrun/model_monitoring/writer.py +1 -5
mlrun/projects/operations.py +1 -0
mlrun/projects/project.py +71 -75
mlrun/render.py +10 -5
mlrun/run.py +2 -2
mlrun/runtimes/daskjob.py +7 -1
mlrun/runtimes/local.py +24 -7
mlrun/runtimes/nuclio/function.py +20 -0
mlrun/runtimes/pod.py +5 -29
mlrun/serving/routers.py +75 -59
mlrun/serving/server.py +1 -0
mlrun/serving/v2_serving.py +8 -1
mlrun/utils/helpers.py +46 -2
mlrun/utils/logger.py +36 -2
mlrun/utils/notifications/notification/base.py +4 -0
mlrun/utils/notifications/notification/git.py +21 -0
mlrun/utils/notifications/notification/slack.py +8 -0
mlrun/utils/notifications/notification/webhook.py +41 -1
mlrun/utils/notifications/notification_pusher.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/METADATA +9 -4
{mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/RECORD +66 -66
{mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc29.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -50,12 +50,12 @@ from .run import (
 from .runtimes import RemoteRuntime, RunError, RuntimeKinds, ServingRuntime
 from .secrets import SecretsStore
 from .utils import (
+    RunKeys,
     dict_to_yaml,
     get_in,
     is_relative_path,
     list2dict,
     logger,
-    run_keys,
     update_in,
 )
 from .utils.version import Version
@@ -380,15 +380,15 @@ def run(
     set_item(runobj.spec.hyper_param_options, hyper_param_strategy, "strategy")
     set_item(runobj.spec.hyper_param_options, selector, "selector")
-    set_item(runobj.spec, inputs, run_keys.inputs, list2dict(inputs))
+    set_item(runobj.spec, inputs, RunKeys.inputs, list2dict(inputs))
     set_item(
-        runobj.spec, returns, run_keys.returns, [py_eval(value) for value in returns]
+        runobj.spec, returns, RunKeys.returns, [py_eval(value) for value in returns]
     )
-    set_item(runobj.spec, in_path, run_keys.input_path)
-    set_item(runobj.spec, out_path, run_keys.output_path)
-    set_item(runobj.spec, outputs, run_keys.outputs, list(outputs))
+    set_item(runobj.spec, in_path, RunKeys.input_path)
+    set_item(runobj.spec, out_path, RunKeys.output_path)
+    set_item(runobj.spec, outputs, RunKeys.outputs, list(outputs))
     set_item(
-        runobj.spec, secrets, run_keys.secrets, line2keylist(secrets, "kind", "source")
+        runobj.spec, secrets, RunKeys.secrets, line2keylist(secrets, "kind", "source")
     )
     set_item(runobj.spec, verbose, "verbose")
     set_item(runobj.spec, scrape_metrics, "scrape_metrics")

mlrun/alerts/alert.py CHANGED Viewed

@@ -26,7 +26,6 @@ class AlertConfig(ModelObj):
         "description",
         "summary",
         "severity",
-        "criteria",
         "reset_policy",
         "state",
     ]
@@ -34,6 +33,7 @@ class AlertConfig(ModelObj):
         "entities",
         "notifications",
         "trigger",
+        "criteria",
     ]
     def __init__(
@@ -104,6 +104,14 @@ class AlertConfig(ModelObj):
                     else self.trigger
                 )
             return None
+        if field_name == "criteria":
+            if self.criteria:
+                return (
+                    self.criteria.dict()
+                    if not isinstance(self.criteria, dict)
+                    else self.criteria
+                )
+            return None
         return super()._serialize_field(struct, field_name, strip)
     def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
@@ -137,6 +145,10 @@ class AlertConfig(ModelObj):
             trigger_obj = alert_objects.AlertTrigger.parse_obj(trigger_data)
             new_obj.trigger = trigger_obj
+        criteria_data = struct.get("criteria")
+        if criteria_data:
+            criteria_obj = alert_objects.AlertCriteria.parse_obj(criteria_data)
+            new_obj.criteria = criteria_obj
         return new_obj
     def with_notifications(self, notifications: list[alert_objects.AlertNotification]):

mlrun/artifacts/manager.py CHANGED Viewed

@@ -100,6 +100,11 @@ class ArtifactProducer:
 def dict_to_artifact(struct: dict) -> Artifact:
     kind = struct.get("kind", "")
+    # TODO: remove this in 1.8.0
+    if mlrun.utils.is_legacy_artifact(struct):
+        return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
     artifact_class = artifact_types[kind]
     return artifact_class.from_dict(struct)

mlrun/common/constants.py CHANGED Viewed

@@ -64,12 +64,12 @@ class MLRunInternalLabels:
     username = f"{MLRUN_LABEL_PREFIX}username"
     username_domain = f"{MLRUN_LABEL_PREFIX}username_domain"
     task_name = f"{MLRUN_LABEL_PREFIX}task-name"
+    resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
+    created = f"{MLRUN_LABEL_PREFIX}created"
     host = "host"
     job_type = "job-type"
     kind = "kind"
     component = "component"
-    resource_name = "resource_name"
-    created = "mlrun-created"
     owner = "owner"
     v3io_user = "v3io_user"

mlrun/common/formatters/base.py CHANGED Viewed

@@ -28,42 +28,42 @@ class ObjectFormat:
     full = "full"
     @staticmethod
-    def format_method(_format: str) -> typing.Optional[typing.Callable]:
+    def format_method(format_: str) -> typing.Optional[typing.Callable]:
         """
         Get the formatting method for the provided format.
         A `None` value signifies a pass-through formatting method (no formatting).
-        :param _format: The format as a string representation.
+        :param format_: The format as a string representation.
         :return: The formatting method.
         """
         return {
             ObjectFormat.full: None,
-        }[_format]
+        }[format_]
     @classmethod
     def format_obj(
         cls,
         obj: typing.Any,
-        _format: str,
+        format_: str,
         exclude_formats: typing.Optional[list[str]] = None,
     ) -> typing.Any:
         """
         Format the provided object based on the provided format.
         :param obj: The object to format.
-        :param _format: The format as a string representation.
+        :param format_: The format as a string representation.
         :param exclude_formats: A list of formats to exclude from the formatting process. If the provided format is in
                                 this list, an invalid format exception will be raised.
         """
         exclude_formats = exclude_formats or []
-        _format = _format or cls.full
+        format_ = format_ or cls.full
         invalid_format_exc = mlrun.errors.MLRunBadRequestError(
-            f"Provided format is not supported. format={_format}"
+            f"Provided format is not supported. format={format_}"
         )
-        if _format in exclude_formats:
+        if format_ in exclude_formats:
             raise invalid_format_exc
         try:
-            format_method = cls.format_method(_format)
+            format_method = cls.format_method(format_)
         except KeyError:
             raise invalid_format_exc

mlrun/common/schemas/alert.py CHANGED Viewed

@@ -39,8 +39,8 @@ class EventKind(StrEnum):
     CONCEPT_DRIFT_SUSPECTED = "concept_drift_suspected"
     MODEL_PERFORMANCE_DETECTED = "model_performance_detected"
     MODEL_PERFORMANCE_SUSPECTED = "model_performance_suspected"
-    MODEL_SERVING_PERFORMANCE_DETECTED = "model_serving_performance_detected"
-    MODEL_SERVING_PERFORMANCE_SUSPECTED = "model_serving_performance_suspected"
+    SYSTEM_PERFORMANCE_DETECTED = "system_performance_detected"
+    SYSTEM_PERFORMANCE_SUSPECTED = "system_performance_suspected"
     MM_APP_ANOMALY_DETECTED = "mm_app_anomaly_detected"
     MM_APP_ANOMALY_SUSPECTED = "mm_app_anomaly_suspected"
     FAILED = "failed"
@@ -53,12 +53,8 @@ _event_kind_entity_map = {
     EventKind.CONCEPT_DRIFT_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
     EventKind.MODEL_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
     EventKind.MODEL_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
-    EventKind.MODEL_SERVING_PERFORMANCE_DETECTED: [
-        EventEntityKind.MODEL_ENDPOINT_RESULT
-    ],
-    EventKind.MODEL_SERVING_PERFORMANCE_SUSPECTED: [
-        EventEntityKind.MODEL_ENDPOINT_RESULT
-    ],
+    EventKind.SYSTEM_PERFORMANCE_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
+    EventKind.SYSTEM_PERFORMANCE_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
     EventKind.MM_APP_ANOMALY_DETECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
     EventKind.MM_APP_ANOMALY_SUSPECTED: [EventEntityKind.MODEL_ENDPOINT_RESULT],
     EventKind.FAILED: [EventEntityKind.JOB],

mlrun/common/schemas/api_gateway.py CHANGED Viewed

@@ -102,6 +102,13 @@ class APIGateway(_APIGatewayBaseModel):
             if upstream.nucliofunction.get("name")
         ]
+    def get_invoke_url(self):
+        return (
+            self.spec.host + self.spec.path
+            if self.spec.path and self.spec.host
+            else self.spec.host
+        )
     def enrich_mlrun_names(self):
         self._enrich_api_gateway_mlrun_name()
         self._enrich_mlrun_function_names()

mlrun/common/schemas/constants.py CHANGED Viewed

@@ -120,10 +120,13 @@ class FeatureStorePartitionByField(mlrun.common.types.StrEnum):
 class RunPartitionByField(mlrun.common.types.StrEnum):
     name = "name"  # Supported for runs objects
+    project_and_name = "project_and_name"  # Supported for runs objects
     def to_partition_by_db_field(self, db_cls):
         if self.value == RunPartitionByField.name:
             return db_cls.name
+        elif self.value == RunPartitionByField.project_and_name:
+            return db_cls.project, db_cls.name
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 f"Unknown group by field: {self.value}"

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .constants import (
     FunctionURI,
     MetricData,
     ModelEndpointTarget,
+    ModelEndpointTargetSchemas,
     ModelMonitoringMode,
     ModelMonitoringStoreKinds,
     MonitoringFunctionNames,

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -78,8 +78,6 @@ class EventFieldType:
     FEATURE_SET_URI = "monitoring_feature_set_uri"
     ALGORITHM = "algorithm"
     VALUE = "value"
-    DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
-    POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
     SAMPLE_PARQUET_PATH = "sample_parquet_path"
     TIME = "time"
     TABLE_COLUMN = "table_column"
@@ -158,19 +156,42 @@ class EventKeyMetrics:
     REAL_TIME = "real_time"
-class ModelEndpointTarget:
+class ModelEndpointTarget(MonitoringStrEnum):
     V3IO_NOSQL = "v3io-nosql"
     SQL = "sql"
+class StreamKind(MonitoringStrEnum):
+    V3IO_STREAM = "v3io_stream"
+    KAFKA = "kafka"
+class TSDBTarget(MonitoringStrEnum):
+    V3IO_TSDB = "v3io-tsdb"
+    TDEngine = "tdengine"
+    PROMETHEUS = "prometheus"
 class ProjectSecretKeys:
     ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
     ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
-    PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
-    KAFKA_BROKERS = "KAFKA_BROKERS"
     STREAM_PATH = "STREAM_PATH"
     TSDB_CONNECTION = "TSDB_CONNECTION"
+    @classmethod
+    def mandatory_secrets(cls):
+        return [
+            cls.ENDPOINT_STORE_CONNECTION,
+            cls.STREAM_PATH,
+            cls.TSDB_CONNECTION,
+        ]
+class ModelEndpointTargetSchemas(MonitoringStrEnum):
+    V3IO = "v3io"
+    MYSQL = "mysql"
+    SQLITE = "sqlite"
 class ModelMonitoringStoreKinds:
     ENDPOINTS = "endpoints"
@@ -318,7 +339,7 @@ class ResultKindApp(Enum):
     concept_drift = 1
     model_performance = 2
     system_performance = 3
-    custom = 4
+    mm_app_anomaly = 4
 class ResultStatusApp(IntEnum):
@@ -344,12 +365,6 @@ class ControllerPolicy:
     BASE_PERIOD = "base_period"
-class TSDBTarget:
-    V3IO_TSDB = "v3io-tsdb"
-    TDEngine = "tdengine"
-    PROMETHEUS = "prometheus"
 class HistogramDataDriftApplicationConstants:
     NAME = "histogram-data-drift"
     GENERAL_RESULT_NAME = "general_drift"

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -103,18 +103,6 @@ class ModelEndpointSpec(ObjectSpec):
             json_parse_values=json_parse_values,
         )
-    @validator("monitor_configuration")
-    @classmethod
-    def set_name(cls, monitor_configuration):
-        return monitor_configuration or {
-            EventFieldType.DRIFT_DETECTED_THRESHOLD: (
-                mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected
-            ),
-            EventFieldType.POSSIBLE_DRIFT_THRESHOLD: (
-                mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift
-            ),
-        }
     @validator("model_uri")
     @classmethod
     def validate_model_uri(cls, model_uri):

mlrun/common/schemas/schedule.py CHANGED Viewed

@@ -96,7 +96,7 @@ class ScheduleUpdate(BaseModel):
     scheduled_object: Optional[Any]
     cron_trigger: Optional[Union[str, ScheduleCronTrigger]]
     desired_state: Optional[str]
-    labels: Optional[dict] = {}
+    labels: Optional[dict] = None
     concurrency_limit: Optional[int]
     credentials: Credentials = Credentials()

mlrun/config.py CHANGED Viewed

@@ -64,11 +64,15 @@ default_config = {
     "api_base_version": "v1",
     "version": "",  # will be set to current version
     "images_tag": "",  # tag to use with mlrun images e.g. mlrun/mlrun (defaults to version)
-    "images_registry": "",  # registry to use with mlrun images e.g. quay.io/ (defaults to empty, for dockerhub)
+    # registry to use with mlrun images that start with "mlrun/" e.g. quay.io/ (defaults to empty, for dockerhub)
+    "images_registry": "",
+    # registry to use with non-mlrun images (don't start with "mlrun/") specified in 'images_to_enrich_registry'
+    # defaults to empty, for dockerhub
+    "vendor_images_registry": "",
     # comma separated list of images that are in the specified images_registry, and therefore will be enriched with this
     # registry when used. default to mlrun/* which means any image which is of the mlrun repository (mlrun/mlrun,
     # mlrun/ml-base, etc...)
-    "images_to_enrich_registry": "^mlrun/*",
+    "images_to_enrich_registry": "^mlrun/*,python:3.9",
     "kfp_url": "",
     "kfp_ttl": "14400",  # KFP ttl in sec, after that completed PODs will be deleted
     "kfp_image": "mlrun/mlrun",  # image to use for KFP runner (defaults to mlrun/mlrun)
@@ -250,7 +254,7 @@ default_config = {
             "remote": "mlrun/mlrun",
             "dask": "mlrun/ml-base",
             "mpijob": "mlrun/mlrun",
-            "application": "python:3.9-slim",
+            "application": "python:3.9",
         },
         # see enrich_function_preemption_spec for more info,
         # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -504,13 +508,12 @@ default_config = {
     "model_endpoint_monitoring": {
         "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
         "application_stream_args": {"shard_count": 1, "retention_period_hours": 24},
-        "drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
         # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
         # stream, and endpoints.
         "store_prefixes": {
             "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
             "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
-            "stream": "",
+            "stream": "",  # TODO: Delete in 1.9.0
             "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
         },
         # Offline storage path can be either relative or a full path. This path is used for general offline data
@@ -523,11 +526,12 @@ default_config = {
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
         # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
-        "store_type": "v3io-nosql",
+        "store_type": "v3io-nosql",  # TODO: Delete in 1.9.0
         "endpoint_store_connection": "",
         # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
-        "tsdb_connector_type": "v3io-tsdb",
         "tsdb_connection": "",
+        # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
+        "stream_connection": "",
     },
     "secret_stores": {
         # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -660,7 +664,9 @@ default_config = {
         "failed_runs_grace_period": 3600,
         "verbose": True,
         # the number of workers which will be used to trigger the start log collection
-        "concurrent_start_logs_workers": 15,
+        "concurrent_start_logs_workers": 50,
+        # the number of runs for which to start logs on api startup
+        "start_logs_startup_run_limit": 150,
         # the time in hours in which to start log collection from.
         # after upgrade, we might have runs which completed in the mean time or still in non-terminal state and
         # we want to collect their logs in the new log collection method (sidecar)
@@ -708,6 +714,8 @@ default_config = {
         # maximum number of alerts we allow to be configured.
         # user will get an error when exceeding this
         "max_allowed": 10000,
+        # maximum allowed value for count in criteria field inside AlertConfig
+        "max_criteria_count": 100,
     },
     "auth_with_client_id": {
         "enabled": False,
@@ -1118,7 +1126,6 @@ class Config:
             if store_prefix_dict.get(kind):
                 # Target exist in store prefix and has a valid string value
                 return store_prefix_dict[kind].format(project=project, **kwargs)
             if (
                 function_name
                 and function_name

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -208,6 +208,7 @@ class AzureBlobStore(DataStore):
             for key in spark_options:
                 if key.startswith(prefix):
                     account_key = key[len(prefix) :]
-                    url += f"@{account_key}"
+                    if not url.endswith(account_key):
+                        url += f"@{account_key}"
                     break
         return url

mlrun/datastore/base.py CHANGED Viewed

@@ -319,11 +319,7 @@ class DataStore:
                                 dfs.append(df_module.read_csv(*updated_args, **kwargs))
                         return df_module.concat(dfs)
-        elif (
-            file_url.endswith(".parquet")
-            or file_url.endswith(".pq")
-            or format == "parquet"
-        ):
+        elif mlrun.utils.helpers.is_parquet_file(file_url, format):
             if columns:
                 kwargs["columns"] = columns

mlrun/datastore/datastore.py CHANGED Viewed

@@ -21,7 +21,7 @@ from mlrun.datastore.datastore_profile import datastore_profile_read
 from mlrun.errors import err_to_str
 from mlrun.utils.helpers import get_local_file_schema
-from ..utils import DB_SCHEMA, run_keys
+from ..utils import DB_SCHEMA, RunKeys
 from .base import DataItem, DataStore, HttpStore
 from .filestore import FileStore
 from .inmem import InMemoryStore
@@ -133,7 +133,7 @@ class StoreManager:
         return self._db
     def from_dict(self, struct: dict):
-        stor_list = struct.get(run_keys.data_stores)
+        stor_list = struct.get(RunKeys.data_stores)
         if stor_list and isinstance(stor_list, list):
             for stor in stor_list:
                 schema, endpoint, parsed_url = parse_url(stor.get("url"))
@@ -145,7 +145,7 @@ class StoreManager:
                 self._stores[stor["name"]] = new_stor
     def to_dict(self, struct):
-        struct[run_keys.data_stores] = [
+        struct[RunKeys.data_stores] = [
             stor.to_dict() for stor in self._stores.values() if stor.from_spec
         ]

mlrun/datastore/inmem.py CHANGED Viewed

@@ -72,7 +72,7 @@ class InMemoryStore(DataStore):
             if columns:
                 kwargs["usecols"] = columns
             reader = df_module.read_csv
-        elif url.endswith(".parquet") or url.endswith(".pq") or format == "parquet":
+        elif mlrun.utils.helpers.is_parquet_file(url, format):
             if columns:
                 kwargs["columns"] = columns
             reader = df_module.read_parquet

mlrun/datastore/snowflake_utils.py CHANGED Viewed

@@ -30,13 +30,15 @@ def get_snowflake_password():
 def get_snowflake_spark_options(attributes):
+    if not attributes:
+        return {}
     return {
         "format": "net.snowflake.spark.snowflake",
         "sfURL": attributes.get("url"),
         "sfUser": attributes.get("user"),
         "sfPassword": get_snowflake_password(),
         "sfDatabase": attributes.get("database"),
-        "sfSchema": attributes.get("schema"),
+        "sfSchema": attributes.get("db_schema"),
         "sfWarehouse": attributes.get("warehouse"),
         "application": "iguazio_platform",
         "TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",

mlrun/datastore/sources.py CHANGED Viewed

@@ -747,7 +747,7 @@ class SnowflakeSource(BaseSourceDriver):
             url="...",
             user="...",
             database="...",
-            schema="...",
+            db_schema="...",
             warehouse="...",
         )
@@ -762,7 +762,8 @@ class SnowflakeSource(BaseSourceDriver):
     :parameter url: URL of the snowflake cluster
     :parameter user: snowflake user
     :parameter database: snowflake database
-    :parameter schema: snowflake schema
+    :parameter schema: snowflake schema - deprecated, use db_schema
+    :parameter db_schema: snowflake schema
     :parameter warehouse: snowflake warehouse
     """
@@ -774,6 +775,7 @@ class SnowflakeSource(BaseSourceDriver):
         self,
         name: str = "",
         key_field: str = None,
+        attributes: dict[str, object] = None,
         time_field: str = None,
         schedule: str = None,
         start_time=None,
@@ -783,21 +785,34 @@ class SnowflakeSource(BaseSourceDriver):
         user: str = None,
         database: str = None,
         schema: str = None,
+        db_schema: str = None,
         warehouse: str = None,
         **kwargs,
     ):
-        attrs = {
-            "query": query,
-            "url": url,
-            "user": user,
-            "database": database,
-            "schema": schema,
-            "warehouse": warehouse,
-        }
+        # TODO: Remove in 1.9.0
+        if schema:
+            warnings.warn(
+                "schema is deprecated in 1.7.0, and will be removed in 1.9.0, please use db_schema"
+            )
+        db_schema = db_schema or schema  # TODO: Remove in 1.9.0
+        attributes = attributes or {}
+        if url:
+            attributes["url"] = url
+        if user:
+            attributes["user"] = user
+        if database:
+            attributes["database"] = database
+        if db_schema:
+            attributes["db_schema"] = db_schema
+        if warehouse:
+            attributes["warehouse"] = warehouse
+        if query:
+            attributes["query"] = query
         super().__init__(
             name,
-            attributes=attrs,
+            attributes=attributes,
             key_field=key_field,
             time_field=time_field,
             schedule=schedule,

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -27,6 +27,8 @@ from .targets import get_online_target
 def is_store_uri(url):
     """detect if the uri starts with the store schema prefix"""
+    if not url:
+        return False
     return url.startswith(DB_SCHEMA + "://")

mlrun 1.7.0rc26__py3-none-any.whl → 1.7.0rc29__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc26py3-none-any.whl → 1.7.0rc29py3-none-any.whl