PyPI - mlrun - Versions diffs - 1.7.0rc20__py3-none-any.whl → 1.7.0rc28__py3-none-any.whl - Mend

mlrun 1.7.0rc20py3-none-any.whl → 1.7.0rc28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (92) hide show

mlrun/__main__.py +10 -8
mlrun/alerts/alert.py +55 -18
mlrun/api/schemas/__init__.py +3 -3
mlrun/artifacts/manager.py +26 -0
mlrun/common/constants.py +3 -2
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +26 -3
mlrun/common/formatters/base.py +44 -9
mlrun/common/formatters/function.py +12 -7
mlrun/common/formatters/run.py +26 -0
mlrun/common/helpers.py +11 -0
mlrun/common/schemas/__init__.py +4 -0
mlrun/common/schemas/alert.py +5 -9
mlrun/common/schemas/api_gateway.py +64 -16
mlrun/common/schemas/artifact.py +11 -0
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/feature_store.py +58 -28
mlrun/common/schemas/model_monitoring/constants.py +21 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
mlrun/common/schemas/pipeline.py +16 -0
mlrun/common/schemas/project.py +17 -0
mlrun/common/schemas/runs.py +17 -0
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/types.py +6 -0
mlrun/config.py +17 -25
mlrun/datastore/azure_blob.py +2 -1
mlrun/datastore/datastore.py +3 -3
mlrun/datastore/google_cloud_storage.py +6 -2
mlrun/datastore/snowflake_utils.py +3 -1
mlrun/datastore/sources.py +26 -11
mlrun/datastore/store_resources.py +2 -0
mlrun/datastore/targets.py +68 -16
mlrun/db/base.py +83 -2
mlrun/db/httpdb.py +280 -63
mlrun/db/nopdb.py +60 -3
mlrun/errors.py +5 -3
mlrun/execution.py +28 -13
mlrun/feature_store/feature_vector.py +8 -0
mlrun/feature_store/retrieval/spark_merger.py +13 -2
mlrun/launcher/local.py +4 -0
mlrun/launcher/remote.py +1 -0
mlrun/model.py +32 -3
mlrun/model_monitoring/api.py +7 -52
mlrun/model_monitoring/applications/base.py +5 -7
mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
mlrun/model_monitoring/db/stores/__init__.py +37 -24
mlrun/model_monitoring/db/stores/base/store.py +40 -1
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +42 -87
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +27 -35
mlrun/model_monitoring/db/tsdb/__init__.py +15 -15
mlrun/model_monitoring/db/tsdb/base.py +1 -14
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +22 -18
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +86 -56
mlrun/model_monitoring/helpers.py +34 -9
mlrun/model_monitoring/stream_processing.py +12 -11
mlrun/model_monitoring/writer.py +11 -11
mlrun/projects/operations.py +5 -0
mlrun/projects/pipelines.py +35 -21
mlrun/projects/project.py +216 -107
mlrun/render.py +10 -5
mlrun/run.py +15 -5
mlrun/runtimes/__init__.py +2 -0
mlrun/runtimes/base.py +17 -4
mlrun/runtimes/daskjob.py +8 -1
mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
mlrun/runtimes/local.py +23 -4
mlrun/runtimes/nuclio/application/application.py +0 -2
mlrun/runtimes/nuclio/function.py +31 -2
mlrun/runtimes/nuclio/serving.py +9 -6
mlrun/runtimes/pod.py +5 -29
mlrun/runtimes/remotesparkjob.py +8 -2
mlrun/serving/__init__.py +8 -1
mlrun/serving/routers.py +75 -59
mlrun/serving/server.py +11 -0
mlrun/serving/states.py +80 -8
mlrun/serving/utils.py +19 -11
mlrun/serving/v2_serving.py +66 -39
mlrun/utils/helpers.py +91 -11
mlrun/utils/logger.py +36 -2
mlrun/utils/notifications/notification/base.py +43 -7
mlrun/utils/notifications/notification/git.py +21 -0
mlrun/utils/notifications/notification/slack.py +9 -14
mlrun/utils/notifications/notification/webhook.py +41 -1
mlrun/utils/notifications/notification_pusher.py +3 -9
mlrun/utils/regex.py +9 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/METADATA +16 -9
{mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/RECORD +92 -91
{mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/feature_store.py CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 from typing import Optional
-from pydantic import BaseModel, Extra, Field
+import pydantic
 from .auth import AuthorizationResourceTypes, Credentials
 from .object import (
@@ -27,32 +27,42 @@ from .object import (
 )
-class Feature(BaseModel):
+class FeatureStoreBaseModel(pydantic.BaseModel):
+    """
+    Intermediate base class, in order to override pydantic's configuration, as per
+    https://docs.pydantic.dev/1.10/usage/model_config/#change-behaviour-globally
+    """
+    class Config:
+        copy_on_model_validation = "none"
+class Feature(FeatureStoreBaseModel):
     name: str
     value_type: str
     labels: Optional[dict] = {}
     class Config:
-        extra = Extra.allow
+        extra = pydantic.Extra.allow
-class Entity(BaseModel):
+class Entity(FeatureStoreBaseModel):
     name: str
     value_type: str
     labels: Optional[dict] = {}
     class Config:
-        extra = Extra.allow
+        extra = pydantic.Extra.allow
 class FeatureSetSpec(ObjectSpec):
     entities: list[Entity] = []
     features: list[Feature] = []
-    engine: Optional[str] = Field(default="storey")
+    engine: Optional[str] = pydantic.Field(default="storey")
-class FeatureSet(BaseModel):
-    kind: ObjectKind = Field(ObjectKind.feature_set, const=True)
+class FeatureSet(FeatureStoreBaseModel):
+    kind: ObjectKind = pydantic.Field(ObjectKind.feature_set, const=True)
     metadata: ObjectMetadata
     spec: FeatureSetSpec
     status: ObjectStatus
@@ -62,7 +72,7 @@ class FeatureSet(BaseModel):
         return AuthorizationResourceTypes.feature_set
-class EntityRecord(BaseModel):
+class EntityRecord(FeatureStoreBaseModel):
     name: str
     value_type: str
     labels: list[LabelRecord]
@@ -71,7 +81,7 @@ class EntityRecord(BaseModel):
         orm_mode = True
-class FeatureRecord(BaseModel):
+class FeatureRecord(FeatureStoreBaseModel):
     name: str
     value_type: str
     labels: list[LabelRecord]
@@ -88,44 +98,64 @@ class FeatureSetRecord(ObjectRecord):
         orm_mode = True
-class FeatureSetsOutput(BaseModel):
+class FeatureSetsOutput(FeatureStoreBaseModel):
     feature_sets: list[FeatureSet]
-class FeatureSetsTagsOutput(BaseModel):
+class FeatureSetsTagsOutput(FeatureStoreBaseModel):
     tags: list[str] = []
-class FeatureSetDigestSpec(BaseModel):
+class FeatureSetDigestSpec(FeatureStoreBaseModel):
     entities: list[Entity]
     features: list[Feature]
-class FeatureSetDigestOutput(BaseModel):
+class FeatureSetDigestOutput(FeatureStoreBaseModel):
     metadata: ObjectMetadata
     spec: FeatureSetDigestSpec
-class FeatureListOutput(BaseModel):
+class FeatureSetDigestSpecV2(FeatureStoreBaseModel):
+    entities: list[Entity]
+class FeatureSetDigestOutputV2(FeatureStoreBaseModel):
+    feature_set_index: int
+    metadata: ObjectMetadata
+    spec: FeatureSetDigestSpecV2
+class FeatureListOutput(FeatureStoreBaseModel):
     feature: Feature
     feature_set_digest: FeatureSetDigestOutput
-class FeaturesOutput(BaseModel):
+class FeaturesOutput(FeatureStoreBaseModel):
     features: list[FeatureListOutput]
-class EntityListOutput(BaseModel):
+class FeaturesOutputV2(FeatureStoreBaseModel):
+    features: list[Feature]
+    feature_set_digests: list[FeatureSetDigestOutputV2]
+class EntityListOutput(FeatureStoreBaseModel):
     entity: Entity
     feature_set_digest: FeatureSetDigestOutput
-class EntitiesOutput(BaseModel):
+class EntitiesOutputV2(FeatureStoreBaseModel):
+    entities: list[Entity]
+    feature_set_digests: list[FeatureSetDigestOutputV2]
+class EntitiesOutput(FeatureStoreBaseModel):
     entities: list[EntityListOutput]
-class FeatureVector(BaseModel):
-    kind: ObjectKind = Field(ObjectKind.feature_vector, const=True)
+class FeatureVector(FeatureStoreBaseModel):
+    kind: ObjectKind = pydantic.Field(ObjectKind.feature_vector, const=True)
     metadata: ObjectMetadata
     spec: ObjectSpec
     status: ObjectStatus
@@ -139,39 +169,39 @@ class FeatureVectorRecord(ObjectRecord):
     pass
-class FeatureVectorsOutput(BaseModel):
+class FeatureVectorsOutput(FeatureStoreBaseModel):
     feature_vectors: list[FeatureVector]
-class FeatureVectorsTagsOutput(BaseModel):
+class FeatureVectorsTagsOutput(FeatureStoreBaseModel):
     tags: list[str] = []
-class DataSource(BaseModel):
+class DataSource(FeatureStoreBaseModel):
     kind: str
     name: str
     path: str
     class Config:
-        extra = Extra.allow
+        extra = pydantic.Extra.allow
-class DataTarget(BaseModel):
+class DataTarget(FeatureStoreBaseModel):
     kind: str
     name: str
     path: Optional[str]
     class Config:
-        extra = Extra.allow
+        extra = pydantic.Extra.allow
-class FeatureSetIngestInput(BaseModel):
+class FeatureSetIngestInput(FeatureStoreBaseModel):
     source: Optional[DataSource]
     targets: Optional[list[DataTarget]]
     infer_options: Optional[int]
     credentials: Credentials = Credentials()
-class FeatureSetIngestOutput(BaseModel):
+class FeatureSetIngestOutput(FeatureStoreBaseModel):
     feature_set: FeatureSet
     run_object: dict

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -78,8 +78,6 @@ class EventFieldType:
     FEATURE_SET_URI = "monitoring_feature_set_uri"
     ALGORITHM = "algorithm"
     VALUE = "value"
-    DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
-    POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
     SAMPLE_PARQUET_PATH = "sample_parquet_path"
     TIME = "time"
     TABLE_COLUMN = "table_column"
@@ -158,19 +156,36 @@ class EventKeyMetrics:
     REAL_TIME = "real_time"
-class ModelEndpointTarget:
+class ModelEndpointTarget(MonitoringStrEnum):
     V3IO_NOSQL = "v3io-nosql"
     SQL = "sql"
+class StreamKind(MonitoringStrEnum):
+    V3IO_STREAM = "v3io_stream"
+    KAFKA = "kafka"
+class TSDBTarget(MonitoringStrEnum):
+    V3IO_TSDB = "v3io-tsdb"
+    TDEngine = "tdengine"
+    PROMETHEUS = "prometheus"
 class ProjectSecretKeys:
     ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
     ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
-    PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
-    KAFKA_BROKERS = "KAFKA_BROKERS"
     STREAM_PATH = "STREAM_PATH"
     TSDB_CONNECTION = "TSDB_CONNECTION"
+    @classmethod
+    def mandatory_secrets(cls):
+        return [
+            cls.ENDPOINT_STORE_CONNECTION,
+            cls.STREAM_PATH,
+            cls.TSDB_CONNECTION,
+        ]
 class ModelMonitoringStoreKinds:
     ENDPOINTS = "endpoints"
@@ -318,7 +333,7 @@ class ResultKindApp(Enum):
     concept_drift = 1
     model_performance = 2
     system_performance = 3
-    custom = 4
+    mm_app_anomaly = 4
 class ResultStatusApp(IntEnum):
@@ -344,12 +359,6 @@ class ControllerPolicy:
     BASE_PERIOD = "base_period"
-class TSDBTarget:
-    V3IO_TSDB = "v3io-tsdb"
-    TDEngine = "tdengine"
-    PROMETHEUS = "prometheus"
 class HistogramDataDriftApplicationConstants:
     NAME = "histogram-data-drift"
     GENERAL_RESULT_NAME = "general_drift"

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -103,18 +103,6 @@ class ModelEndpointSpec(ObjectSpec):
             json_parse_values=json_parse_values,
         )
-    @validator("monitor_configuration")
-    @classmethod
-    def set_name(cls, monitor_configuration):
-        return monitor_configuration or {
-            EventFieldType.DRIFT_DETECTED_THRESHOLD: (
-                mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected
-            ),
-            EventFieldType.POSSIBLE_DRIFT_THRESHOLD: (
-                mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift
-            ),
-        }
     @validator("model_uri")
     @classmethod
     def validate_model_uri(cls, model_uri):

mlrun/common/schemas/pipeline.py CHANGED Viewed

@@ -15,6 +15,22 @@
 import typing
 import pydantic
+from deprecated import deprecated
+import mlrun.common.types
+@deprecated(
+    version="1.7.0",
+    reason="mlrun.common.schemas.PipelinesFormat is deprecated and will be removed in 1.9.0. "
+    "Use mlrun.common.formatters.PipelineFormat instead.",
+    category=FutureWarning,
+)
+class PipelinesFormat(mlrun.common.types.StrEnum):
+    full = "full"
+    metadata_only = "metadata_only"
+    summary = "summary"
+    name_only = "name_only"
 class PipelinesPagination(str):

mlrun/common/schemas/project.py CHANGED Viewed

@@ -16,6 +16,7 @@ import datetime
 import typing
 import pydantic
+from deprecated import deprecated
 import mlrun.common.types
@@ -23,6 +24,22 @@ from .common import ImageBuilder
 from .object import ObjectKind, ObjectStatus
+@deprecated(
+    version="1.7.0",
+    reason="mlrun.common.schemas.ProjectsFormat is deprecated and will be removed in 1.9.0. "
+    "Use mlrun.common.formatters.ProjectFormat instead.",
+    category=FutureWarning,
+)
+class ProjectsFormat(mlrun.common.types.StrEnum):
+    full = "full"
+    name_only = "name_only"
+    # minimal format removes large fields from the response (e.g. functions, workflows, artifacts)
+    # and is used for faster response times (in the UI)
+    minimal = "minimal"
+    # internal - allowed only in follower mode, only for the leader for upgrade purposes
+    leader = "leader"
 class ProjectMetadata(pydantic.BaseModel):
     name: str
     created: typing.Optional[datetime.datetime] = None

mlrun/common/schemas/runs.py CHANGED Viewed

@@ -15,9 +15,26 @@
 import typing
 import pydantic
+from deprecated import deprecated
+import mlrun.common.types
 class RunIdentifier(pydantic.BaseModel):
     kind: typing.Literal["run"] = "run"
     uid: typing.Optional[str]
     iter: typing.Optional[int]
+@deprecated(
+    version="1.7.0",
+    reason="mlrun.common.schemas.RunsFormat is deprecated and will be removed in 1.9.0. "
+    "Use mlrun.common.formatters.RunFormat instead.",
+    category=FutureWarning,
+)
+class RunsFormat(mlrun.common.types.StrEnum):
+    # No enrichment, data is pulled as-is from the database.
+    standard = "standard"
+    # Performs run enrichment, including the run's artifacts. Only available for the `get` run API.
+    full = "full"

mlrun/common/schemas/schedule.py CHANGED Viewed

@@ -96,7 +96,7 @@ class ScheduleUpdate(BaseModel):
     scheduled_object: Optional[Any]
     cron_trigger: Optional[Union[str, ScheduleCronTrigger]]
     desired_state: Optional[str]
-    labels: Optional[dict] = {}
+    labels: Optional[dict] = None
     concurrency_limit: Optional[int]
     credentials: Credentials = Credentials()

mlrun/common/types.py CHANGED Viewed

@@ -29,3 +29,9 @@ class StrEnum(str, enum.Enum):
 class HTTPMethod(StrEnum):
     GET = "GET"
     POST = "POST"
+    DELETE = "DELETE"
+class Operation(StrEnum):
+    ADD = "add"
+    REMOVE = "remove"

mlrun/config.py CHANGED Viewed

@@ -229,6 +229,9 @@ default_config = {
                     "executing": "24h",
                 }
             },
+            # When the module is reloaded, the maximum depth recursion configuration for the recursive reload
+            # function is used to prevent infinite loop
+            "reload_max_recursion_depth": 100,
         },
         "databricks": {
             "artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
@@ -501,13 +504,12 @@ default_config = {
     "model_endpoint_monitoring": {
         "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
         "application_stream_args": {"shard_count": 1, "retention_period_hours": 24},
-        "drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
         # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
         # stream, and endpoints.
         "store_prefixes": {
             "default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
             "user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
-            "stream": "",
+            "stream": "",  # TODO: Delete in 1.9.0
             "monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
         },
         # Offline storage path can be either relative or a full path. This path is used for general offline data
@@ -520,11 +522,12 @@ default_config = {
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
         # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
-        "store_type": "v3io-nosql",
+        "store_type": "v3io-nosql",  # TODO: Delete in 1.9.0
         "endpoint_store_connection": "",
         # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
-        "tsdb_connector_type": "v3io-tsdb",
         "tsdb_connection": "",
+        # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
+        "stream_connection": "",
     },
     "secret_stores": {
         # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -657,7 +660,9 @@ default_config = {
         "failed_runs_grace_period": 3600,
         "verbose": True,
         # the number of workers which will be used to trigger the start log collection
-        "concurrent_start_logs_workers": 15,
+        "concurrent_start_logs_workers": 50,
+        # the number of runs for which to start logs on api startup
+        "start_logs_startup_run_limit": 150,
         # the time in hours in which to start log collection from.
         # after upgrade, we might have runs which completed in the mean time or still in non-terminal state and
         # we want to collect their logs in the new log collection method (sidecar)
@@ -701,7 +706,12 @@ default_config = {
     "grafana_url": "",
     "alerts": {
         # supported modes: "enabled", "disabled".
-        "mode": "enabled"
+        "mode": "enabled",
+        # maximum number of alerts we allow to be configured.
+        # user will get an error when exceeding this
+        "max_allowed": 10000,
+        # maximum allowed value for count in criteria field inside AlertConfig
+        "max_criteria_count": 100,
     },
     "auth_with_client_id": {
         "enabled": False,
@@ -806,6 +816,7 @@ class Config:
     ):
         """
         decodes and loads the config attribute to expected type
         :param attribute_path: the path in the default_config e.g. preemptible_nodes.node_selector
         :param expected_type: the object type valid values are : `dict`, `list` etc...
         :return: the expected type instance
@@ -931,24 +942,6 @@ class Config:
                 f"is not allowed for iguazio version: {igz_version} < 3.5.1"
             )
-    def resolve_kfp_url(self, namespace=None):
-        if config.kfp_url:
-            return config.kfp_url
-        igz_version = self.get_parsed_igz_version()
-        # TODO: When Iguazio 3.4 will deprecate we can remove this line
-        if igz_version and igz_version <= semver.VersionInfo.parse("3.6.0-b1"):
-            if namespace is None:
-                if not config.namespace:
-                    raise mlrun.errors.MLRunNotFoundError(
-                        "For KubeFlow Pipelines to function, a namespace must be configured"
-                    )
-                namespace = config.namespace
-            # When instead of host we provided namespace we tackled this issue
-            # https://github.com/canonical/bundle-kubeflow/issues/412
-            # TODO: When we'll move to kfp 1.4.0 (server side) it should be resolved
-            return f"http://ml-pipeline.{namespace}.svc.cluster.local:8888"
-        return None
     def resolve_chief_api_url(self) -> str:
         if self.httpdb.clusterization.chief.url:
             return self.httpdb.clusterization.chief.url
@@ -1129,7 +1122,6 @@ class Config:
             if store_prefix_dict.get(kind):
                 # Target exist in store prefix and has a valid string value
                 return store_prefix_dict[kind].format(project=project, **kwargs)
             if (
                 function_name
                 and function_name

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -208,6 +208,7 @@ class AzureBlobStore(DataStore):
             for key in spark_options:
                 if key.startswith(prefix):
                     account_key = key[len(prefix) :]
-                    url += f"@{account_key}"
+                    if not url.endswith(account_key):
+                        url += f"@{account_key}"
                     break
         return url

mlrun/datastore/datastore.py CHANGED Viewed

@@ -21,7 +21,7 @@ from mlrun.datastore.datastore_profile import datastore_profile_read
 from mlrun.errors import err_to_str
 from mlrun.utils.helpers import get_local_file_schema
-from ..utils import DB_SCHEMA, run_keys
+from ..utils import DB_SCHEMA, RunKeys
 from .base import DataItem, DataStore, HttpStore
 from .filestore import FileStore
 from .inmem import InMemoryStore
@@ -133,7 +133,7 @@ class StoreManager:
         return self._db
     def from_dict(self, struct: dict):
-        stor_list = struct.get(run_keys.data_stores)
+        stor_list = struct.get(RunKeys.data_stores)
         if stor_list and isinstance(stor_list, list):
             for stor in stor_list:
                 schema, endpoint, parsed_url = parse_url(stor.get("url"))
@@ -145,7 +145,7 @@ class StoreManager:
                 self._stores[stor["name"]] = new_stor
     def to_dict(self, struct):
-        struct[run_keys.data_stores] = [
+        struct[RunKeys.data_stores] = [
             stor.to_dict() for stor in self._stores.values() if stor.from_spec
         ]

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -55,8 +55,12 @@ class GoogleCloudStorageStore(DataStore):
         ) or self._get_secret_or_env("GOOGLE_APPLICATION_CREDENTIALS")
         if credentials:
             try:
-                # Try to handle credentials as a json connection string
-                token = json.loads(credentials)
+                # Try to handle credentials as a json connection string or do nothing if already a dict
+                token = (
+                    credentials
+                    if isinstance(credentials, dict)
+                    else json.loads(credentials)
+                )
             except json.JSONDecodeError:
                 # If it's not json, handle it as a filename
                 token = credentials

mlrun/datastore/snowflake_utils.py CHANGED Viewed

@@ -30,13 +30,15 @@ def get_snowflake_password():
 def get_snowflake_spark_options(attributes):
+    if not attributes:
+        return {}
     return {
         "format": "net.snowflake.spark.snowflake",
         "sfURL": attributes.get("url"),
         "sfUser": attributes.get("user"),
         "sfPassword": get_snowflake_password(),
         "sfDatabase": attributes.get("database"),
-        "sfSchema": attributes.get("schema"),
+        "sfSchema": attributes.get("db_schema"),
         "sfWarehouse": attributes.get("warehouse"),
         "application": "iguazio_platform",
         "TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",

mlrun/datastore/sources.py CHANGED Viewed

@@ -747,7 +747,7 @@ class SnowflakeSource(BaseSourceDriver):
             url="...",
             user="...",
             database="...",
-            schema="...",
+            db_schema="...",
             warehouse="...",
         )
@@ -762,7 +762,8 @@ class SnowflakeSource(BaseSourceDriver):
     :parameter url: URL of the snowflake cluster
     :parameter user: snowflake user
     :parameter database: snowflake database
-    :parameter schema: snowflake schema
+    :parameter schema: snowflake schema - deprecated, use db_schema
+    :parameter db_schema: snowflake schema
     :parameter warehouse: snowflake warehouse
     """
@@ -774,6 +775,7 @@ class SnowflakeSource(BaseSourceDriver):
         self,
         name: str = "",
         key_field: str = None,
+        attributes: dict[str, object] = None,
         time_field: str = None,
         schedule: str = None,
         start_time=None,
@@ -783,21 +785,34 @@ class SnowflakeSource(BaseSourceDriver):
         user: str = None,
         database: str = None,
         schema: str = None,
+        db_schema: str = None,
         warehouse: str = None,
         **kwargs,
     ):
-        attrs = {
-            "query": query,
-            "url": url,
-            "user": user,
-            "database": database,
-            "schema": schema,
-            "warehouse": warehouse,
-        }
+        # TODO: Remove in 1.9.0
+        if schema:
+            warnings.warn(
+                "schema is deprecated in 1.7.0, and will be removed in 1.9.0, please use db_schema"
+            )
+        db_schema = db_schema or schema  # TODO: Remove in 1.9.0
+        attributes = attributes or {}
+        if url:
+            attributes["url"] = url
+        if user:
+            attributes["user"] = user
+        if database:
+            attributes["database"] = database
+        if db_schema:
+            attributes["db_schema"] = db_schema
+        if warehouse:
+            attributes["warehouse"] = warehouse
+        if query:
+            attributes["query"] = query
         super().__init__(
             name,
-            attributes=attrs,
+            attributes=attributes,
             key_field=key_field,
             time_field=time_field,
             schedule=schedule,

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -27,6 +27,8 @@ from .targets import get_online_target
 def is_store_uri(url):
     """detect if the uri starts with the store schema prefix"""
+    if not url:
+        return False
     return url.startswith(DB_SCHEMA + "://")

mlrun 1.7.0rc20__py3-none-any.whl → 1.7.0rc28__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc20py3-none-any.whl → 1.7.0rc28py3-none-any.whl