PyPI - mlrun - Versions diffs - 1.7.0rc6__py3-none-any.whl → 1.7.0rc9__py3-none-any.whl - Mend

mlrun 1.7.0rc6py3-none-any.whl → 1.7.0rc9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (84) hide show

mlrun/__main__.py +2 -0
mlrun/common/constants.py +6 -0
mlrun/common/schemas/__init__.py +5 -0
mlrun/common/schemas/api_gateway.py +8 -1
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/__init__.py +4 -0
mlrun/common/schemas/model_monitoring/constants.py +36 -19
mlrun/{model_monitoring/stores/models/__init__.py → common/schemas/pagination.py} +9 -10
mlrun/common/schemas/project.py +16 -10
mlrun/common/types.py +7 -1
mlrun/config.py +35 -10
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/__init__.py +3 -7
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +4 -5
mlrun/datastore/base.py +22 -16
mlrun/datastore/datastore.py +4 -0
mlrun/datastore/datastore_profile.py +19 -1
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/snowflake_utils.py +43 -0
mlrun/datastore/sources.py +11 -29
mlrun/datastore/targets.py +131 -11
mlrun/datastore/utils.py +10 -5
mlrun/db/base.py +58 -6
mlrun/db/httpdb.py +183 -77
mlrun/db/nopdb.py +110 -0
mlrun/feature_store/api.py +3 -2
mlrun/feature_store/retrieval/spark_merger.py +27 -23
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/kfpops.py +2 -5
mlrun/launcher/base.py +1 -1
mlrun/launcher/client.py +2 -2
mlrun/model.py +1 -0
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +104 -295
mlrun/model_monitoring/controller.py +25 -25
mlrun/model_monitoring/db/__init__.py +16 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
mlrun/model_monitoring/helpers.py +3 -3
mlrun/model_monitoring/stream_processing.py +41 -9
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +4 -36
mlrun/projects/pipelines.py +14 -2
mlrun/projects/project.py +141 -122
mlrun/run.py +8 -2
mlrun/runtimes/__init__.py +16 -0
mlrun/runtimes/base.py +10 -1
mlrun/runtimes/kubejob.py +26 -121
mlrun/runtimes/nuclio/api_gateway.py +243 -66
mlrun/runtimes/nuclio/application/application.py +79 -1
mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
mlrun/runtimes/nuclio/function.py +14 -8
mlrun/runtimes/nuclio/serving.py +30 -34
mlrun/runtimes/pod.py +171 -0
mlrun/runtimes/utils.py +0 -28
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +4 -3
mlrun/serving/server.py +5 -7
mlrun/serving/states.py +40 -23
mlrun/serving/v2_serving.py +4 -3
mlrun/utils/helpers.py +34 -0
mlrun/utils/http.py +1 -1
mlrun/utils/retryer.py +1 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/METADATA +25 -16
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/RECORD +81 -75
mlrun/model_monitoring/batch.py +0 -933
mlrun/model_monitoring/stores/models/mysql.py +0 -34
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -505,6 +505,8 @@ def build(
         if kfp:
             print("Runtime:")
             pprint(runtime)
+        # use kind = "job" by default if not specified
+        runtime.setdefault("kind", "job")
         func = new_function(runtime=runtime)
     elif func_url:

mlrun/common/constants.py CHANGED Viewed

@@ -14,4 +14,10 @@
 #
 IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "."  # prefix for image name to enrich with registry
 MLRUN_CREATED_LABEL = "mlrun-created"
+MLRUN_MODEL_CONF = "model-conf"
+MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
+MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
+MLRUN_SERVING_SPEC_PATH = (
+    f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
+)
 MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -21,6 +21,7 @@ from .api_gateway import (
     APIGatewayMetadata,
     APIGatewaysOutput,
     APIGatewaySpec,
+    APIGatewayState,
     APIGatewayStatus,
     APIGatewayUpstream,
 )
@@ -124,6 +125,7 @@ from .model_monitoring import (
     EventFieldType,
     EventKeyMetrics,
     Features,
+    FeatureSetFeatures,
     FeatureValues,
     GrafanaColumn,
     GrafanaDataPoint,
@@ -139,6 +141,7 @@ from .model_monitoring import (
     ModelMonitoringMode,
     ModelMonitoringStoreKinds,
     MonitoringFunctionNames,
+    PrometheusEndpoints,
     TimeSeriesTarget,
 )
 from .notification import (
@@ -149,12 +152,14 @@ from .notification import (
     SetNotificationRequest,
 )
 from .object import ObjectKind, ObjectMetadata, ObjectSpec, ObjectStatus
+from .pagination import PaginationInfo
 from .pipeline import PipelinesFormat, PipelinesOutput, PipelinesPagination
 from .project import (
     IguazioProject,
     Project,
     ProjectDesiredState,
     ProjectMetadata,
+    ProjectOutput,
     ProjectOwner,
     ProjectsFormat,
     ProjectsOutput,

mlrun/common/schemas/api_gateway.py CHANGED Viewed

@@ -36,6 +36,13 @@ class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
             )
+class APIGatewayState(mlrun.common.types.StrEnum):
+    none = ""
+    ready = "ready"
+    error = "error"
+    waiting_for_provisioning = "waitingForProvisioning"
 class _APIGatewayBaseModel(pydantic.BaseModel):
     class Config:
         extra = pydantic.Extra.allow
@@ -72,7 +79,7 @@ class APIGatewaySpec(_APIGatewayBaseModel):
 class APIGatewayStatus(_APIGatewayBaseModel):
     name: Optional[str]
-    state: Optional[str]
+    state: Optional[APIGatewayState]
 class APIGateway(_APIGatewayBaseModel):

mlrun/common/schemas/hub.py CHANGED Viewed

@@ -59,28 +59,26 @@ class HubSource(BaseModel):
         return f"{self.spec.path}/{self.spec.object_type}/{self.spec.channel}/{relative_path}"
     def get_catalog_uri(self):
-        return self.get_full_uri(mlrun.config.config.hub.catalog_filename)
+        return self.get_full_uri(mlrun.mlconf.hub.catalog_filename)
     @classmethod
     def generate_default_source(cls):
-        if not mlrun.config.config.hub.default_source.create:
+        if not mlrun.mlconf.hub.default_source.create:
             return None
         now = datetime.now(timezone.utc)
         hub_metadata = HubObjectMetadata(
-            name=mlrun.config.config.hub.default_source.name,
-            description=mlrun.config.config.hub.default_source.description,
+            name=mlrun.mlconf.hub.default_source.name,
+            description=mlrun.mlconf.hub.default_source.description,
             created=now,
             updated=now,
         )
         return cls(
             metadata=hub_metadata,
             spec=HubSourceSpec(
-                path=mlrun.config.config.hub.default_source.url,
-                channel=mlrun.config.config.hub.default_source.channel,
-                object_type=HubSourceType(
-                    mlrun.config.config.hub.default_source.object_type
-                ),
+                path=mlrun.mlconf.hub.default_source.url,
+                channel=mlrun.mlconf.hub.default_source.channel,
+                object_type=HubSourceType(mlrun.mlconf.hub.default_source.object_type),
             ),
             status=ObjectStatus(state="created"),
         )

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -22,6 +22,7 @@ from .constants import (
     EventFieldType,
     EventKeyMetrics,
     EventLiveStats,
+    FeatureSetFeatures,
     FileTargetKind,
     FunctionURI,
     ModelEndpointTarget,
@@ -29,9 +30,12 @@ from .constants import (
     ModelMonitoringStoreKinds,
     MonitoringFunctionNames,
     ProjectSecretKeys,
+    PrometheusEndpoints,
     PrometheusMetric,
+    SchedulingKeys,
     TimeSeriesTarget,
     VersionedModel,
+    WriterEvent,
 )
 from .grafana import (
     GrafanaColumn,

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -21,6 +21,12 @@ import mlrun.common.helpers
 from mlrun.common.types import StrEnum
+class MonitoringStrEnum(StrEnum):
+    @classmethod
+    def list(cls):
+        return list(map(lambda c: c.value, cls))
 class EventFieldType:
     FUNCTION_URI = "function_uri"
     FUNCTION = "function"
@@ -77,6 +83,20 @@ class EventFieldType:
     SAMPLE_PARQUET_PATH = "sample_parquet_path"
+class FeatureSetFeatures(MonitoringStrEnum):
+    LATENCY = EventFieldType.LATENCY
+    ERROR_COUNT = EventFieldType.ERROR_COUNT
+    METRICS = EventFieldType.METRICS
+    @classmethod
+    def time_stamp(cls):
+        return EventFieldType.TIMESTAMP
+    @classmethod
+    def entity(cls):
+        return EventFieldType.ENDPOINT_ID
 class ApplicationEvent:
     APPLICATION_NAME = "application_name"
     CURRENT_STATS = "current_stats"
@@ -89,7 +109,7 @@ class ApplicationEvent:
     OUTPUT_STREAM_URI = "output_stream_uri"
-class WriterEvent(StrEnum):
+class WriterEvent(MonitoringStrEnum):
     APPLICATION_NAME = "application_name"
     ENDPOINT_ID = "endpoint_id"
     START_INFER_TIME = "start_infer_time"
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
     RESULT_EXTRA_DATA = "result_extra_data"
     CURRENT_STATS = "current_stats"
-    @classmethod
-    def list(cls):
-        return list(map(lambda c: c.value, cls))
 class EventLiveStats:
     LATENCY_AVG_5M = "latency_avg_5m"
@@ -135,7 +151,7 @@ class ProjectSecretKeys:
     ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
     ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
     PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
-    KAFKA_BOOTSTRAP_SERVERS = "KAFKA_BOOTSTRAP_SERVERS"
+    KAFKA_BROKERS = "KAFKA_BROKERS"
     STREAM_PATH = "STREAM_PATH"
@@ -146,6 +162,9 @@ class ModelMonitoringStoreKinds:
 class SchedulingKeys:
     LAST_ANALYZED = "last_analyzed"
+    ENDPOINT_ID = "endpoint_id"
+    APPLICATION_NAME = "application_name"
+    UID = "uid"
 class FileTargetKind:
@@ -155,6 +174,8 @@ class FileTargetKind:
     PARQUET = "parquet"
     APPS_PARQUET = "apps_parquet"
     LOG_STREAM = "log_stream"
+    APP_RESULTS = "app_results"
+    MONITORING_SCHEDULES = "monitoring_schedules"
 class ModelMonitoringMode(str, Enum):
@@ -177,20 +198,16 @@ class PrometheusMetric:
     DRIFT_STATUS = "drift_status"
-class MonitoringFunctionNames:
-    WRITER = "model-monitoring-writer"
-    BATCH = "model-monitoring-batch"
-    APPLICATION_CONTROLLER = "model-monitoring-controller"
-    STREAM = "model-monitoring-stream"
+class PrometheusEndpoints(MonitoringStrEnum):
+    MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
+    MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
+    MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
-    @staticmethod
-    def all():
-        return [
-            MonitoringFunctionNames.WRITER,
-            MonitoringFunctionNames.STREAM,
-            MonitoringFunctionNames.BATCH,
-            MonitoringFunctionNames.APPLICATION_CONTROLLER,
-        ]
+class MonitoringFunctionNames(MonitoringStrEnum):
+    STREAM = "model-monitoring-stream"
+    APPLICATION_CONTROLLER = "model-monitoring-controller"
+    WRITER = "model-monitoring-writer"
 @dataclass

mlrun/{model_monitoring/stores/models/__init__.py → common/schemas/pagination.py} RENAMED Viewed

@@ -12,16 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Optional, Union
+import typing
-from .mysql import ModelEndpointsTable as MySQLModelEndpointsTable
-from .sqlite import ModelEndpointsTable as SQLiteModelEndpointsTable
+import pydantic
-def get_model_endpoints_table(
-    connection_string: Optional[str] = None,
-) -> Union[type[MySQLModelEndpointsTable], type[SQLiteModelEndpointsTable]]:
-    """Return ModelEndpointsTable based on the provided connection string"""
-    if connection_string and "mysql:" in connection_string:
-        return MySQLModelEndpointsTable
-    return SQLiteModelEndpointsTable
+class PaginationInfo(pydantic.BaseModel):
+    class Config:
+        allow_population_by_field_name = True
+    page: typing.Optional[int]
+    page_size: typing.Optional[int] = pydantic.Field(alias="page-size")
+    page_token: typing.Optional[str] = pydantic.Field(alias="page-token")

mlrun/common/schemas/project.py CHANGED Viewed

@@ -87,6 +87,7 @@ class ProjectSpec(pydantic.BaseModel):
     custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
     default_image: typing.Optional[str] = None
     build: typing.Optional[ImageBuilder] = None
+    default_function_node_selector: typing.Optional[dict] = {}
     class Config:
         extra = pydantic.Extra.allow
@@ -119,17 +120,22 @@ class IguazioProject(pydantic.BaseModel):
     data: dict
+# The format query param controls the project type used:
+# full - Project
+# name_only - str
+# summary - ProjectSummary
+# leader - currently only IguazioProject supported
+# The way pydantic handles typing.Union is that it takes the object and tries to coerce it to be the types of the
+# union by the definition order. Therefore we can't currently add generic dict for all leader formats, but we need
+# to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
+# https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
+ProjectOutput = typing.TypeVar(
+    "ProjectOutput", Project, str, ProjectSummary, IguazioProject
+)
 class ProjectsOutput(pydantic.BaseModel):
-    # The format query param controls the project type used:
-    # full - Project
-    # name_only - str
-    # summary - ProjectSummary
-    # leader - currently only IguazioProject supported
-    # The way pydantic handles typing.Union is that it takes the object and tries to coerce it to be the types of the
-    # union by the definition order. Therefore we can't currently add generic dict for all leader formats, but we need
-    # to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
-    # https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
-    projects: list[typing.Union[Project, str, ProjectSummary, IguazioProject]]
+    projects: list[ProjectOutput]
 class ProjectSummariesOutput(pydantic.BaseModel):

mlrun/common/types.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import enum
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
     def __repr__(self):
         return self.value
+# Partial backport from Python 3.11
+# https://docs.python.org/3/library/http.html#http.HTTPMethod
+class HTTPMethod(StrEnum):
+    GET = "GET"
+    POST = "POST"

mlrun/config.py CHANGED Viewed

@@ -240,6 +240,7 @@ default_config = {
             "remote": "mlrun/mlrun",
             "dask": "mlrun/ml-base",
             "mpijob": "mlrun/mlrun",
+            "application": "python:3.9-slim",
         },
         # see enrich_function_preemption_spec for more info,
         # and mlrun.common.schemas.function.PreemptionModes for available options
@@ -362,6 +363,8 @@ default_config = {
             # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
             "add_templated_ingress_host_mode": "never",
             "explicit_ack": "enabled",
+            # size of serving spec to move to config maps
+            "serving_spec_env_cutoff": 4096,
         },
         "logs": {
             "decode": {
@@ -479,6 +482,14 @@ default_config = {
             # if set to true, will log a warning for trying to use run db functionality while in nop db mode
             "verbose": True,
         },
+        "pagination": {
+            "default_page_size": 20,
+            "pagination_cache": {
+                "interval": 60,
+                "ttl": 3600,
+                "max_size": 10000,
+            },
+        },
     },
     "model_endpoint_monitoring": {
         "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
@@ -498,10 +509,9 @@ default_config = {
         # when the user is working in CE environment and has not provided any stream path.
         "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
         "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
-        "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
+        # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
         "store_type": "v3io-nosql",
         "endpoint_store_connection": "",
     },
@@ -542,6 +552,7 @@ default_config = {
             "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
             # "authority" is optional and generalizes [userinfo "@"] host [":" port]
             "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
+            "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
         },
         "default_targets": "parquet,nosql",
         "default_job_image": "mlrun/mlrun",
@@ -616,8 +627,9 @@ default_config = {
     },
     "workflows": {
         "default_workflow_runner_name": "workflow-runner-{}",
-        # Default timeout seconds for retrieving workflow id after execution:
-        "timeouts": {"local": 120, "kfp": 30, "remote": 90},
+        # Default timeout seconds for retrieving workflow id after execution
+        # Remote workflow timeout is the maximum between remote and the inner engine timeout
+        "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
     },
     "log_collector": {
         "address": "localhost:8282",
@@ -1066,7 +1078,7 @@ class Config:
         target: str = "online",
         artifact_path: str = None,
         function_name: str = None,
-    ) -> str:
+    ) -> typing.Union[str, list[str]]:
         """Get the full path from the configuration based on the provided project and kind.
         :param project:         Project name.
@@ -1082,7 +1094,8 @@ class Config:
                                 relative artifact path will be taken from the global MLRun artifact path.
         :param function_name:    Application name, None for model_monitoring_stream.
-        :return:                Full configured path for the provided kind.
+        :return:                Full configured path for the provided kind. Can be either a single path
+                                or a list of paths in the case of the online model monitoring stream path.
         """
         if target != "offline":
@@ -1104,10 +1117,22 @@ class Config:
                     if function_name is None
                     else f"{kind}-{function_name.lower()}",
                 )
-            return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
-                project=project,
-                kind=kind,
-            )
+            elif kind == "stream":  # return list for mlrun<1.6.3 BC
+                return [
+                    mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
+                        project=project,
+                        kind=kind,
+                    ),  # old stream uri (pipelines) for BC ML-6043
+                    mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
+                        project=project,
+                        kind=kind,
+                    ),  # new stream uri (projects)
+                ]
+            else:
+                return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
+                    project=project,
+                    kind=kind,
+                )
         # Get the current offline path from the configuration
         file_path = mlrun.mlconf.model_endpoint_monitoring.offline_storage_path.format(

mlrun/data_types/data_types.py CHANGED Viewed

@@ -41,6 +41,7 @@ class ValueType(str, Enum):
     BYTES = "bytes"
     STRING = "str"
     DATETIME = "datetime"
+    LIST = "List"
     BYTES_LIST = "List[bytes]"
     STRING_LIST = "List[string]"
     INT32_LIST = "List[int32]"
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
     DOUBLE_LIST = "List[float]"
     FLOAT_LIST = "List[float32]"
     BOOL_LIST = "List[bool]"
+    Tuple = "Tuple"
 def pd_schema_to_value_type(value):
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
         "datetime64[ns]": ValueType.INT64,
         "datetime64[ns, tz]": ValueType.INT64,
         "category": ValueType.STRING,
+        "list": ValueType.LIST,
+        "tuple": ValueType.Tuple,
     }
     if type_name in type_map:

mlrun/datastore/__init__.py CHANGED Viewed

@@ -107,13 +107,9 @@ def get_stream_pusher(stream_path: str, **kwargs):
     :param stream_path:        path/url of stream
     """
-    if stream_path.startswith("kafka://") or "kafka_bootstrap_servers" in kwargs:
-        topic, bootstrap_servers = parse_kafka_url(
-            stream_path, kwargs.get("kafka_bootstrap_servers")
-        )
-        return KafkaOutputStream(
-            topic, bootstrap_servers, kwargs.get("kafka_producer_options")
-        )
+    if stream_path.startswith("kafka://") or "kafka_brokers" in kwargs:
+        topic, brokers = parse_kafka_url(stream_path, kwargs.get("kafka_brokers"))
+        return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
     elif stream_path.startswith("http://") or stream_path.startswith("https://"):
         return HTTPOutputStream(stream_path=stream_path)
     elif "://" not in stream_path:

mlrun/datastore/alibaba_oss.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import urlparse
+import oss2
+from fsspec.registry import get_filesystem_class
+import mlrun.errors
+from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
+class OSSStore(DataStore):
+    using_bucket = True
+    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+        super().__init__(parent, name, schema, endpoint, secrets)
+        # will be used in case user asks to assume a role and work through fsspec
+        access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
+        secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
+        endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
+        if access_key_id and secret_key and endpoint_url:
+            self.auth = oss2.Auth(access_key_id, secret_key)
+            self.endpoint_url = endpoint_url
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
+            )
+    @property
+    def filesystem(self):
+        """return fsspec file system object, if supported"""
+        if self._filesystem:
+            return self._filesystem
+        try:
+            import ossfs  # noqa
+        except ImportError as exc:
+            raise ImportError("ALIBABA ossfs not installed") from exc
+        filesystem_class = get_filesystem_class(protocol=self.kind)
+        self._filesystem = makeDatastoreSchemaSanitizer(
+            filesystem_class,
+            using_bucket=self.using_bucket,
+            **self.get_storage_options(),
+        )
+        return self._filesystem
+    def get_storage_options(self):
+        res = dict(
+            endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
+            key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
+            secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
+        )
+        return self._sanitize_storage_options(res)
+    def get_bucket_and_key(self, key):
+        path = self._join(key)[1:]
+        return self.endpoint, path
+    def upload(self, key, src_path):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.put_object(key, open(src_path, "rb"))
+    def get(self, key, size=None, offset=0):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        if size or offset:
+            return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
+        return oss.get_object(key).read()
+    def put(self, key, data, append=False):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.put_object(key, data)
+    def stat(self, key):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        obj = oss.get_object_meta(key)
+        size = obj.content_length
+        modified = datetime.fromtimestamp(obj.last_modified)
+        return FileStats(size, time.mktime(modified.timetuple()))
+    def listdir(self, key):
+        remote_path = self._convert_key_to_remote_path(key)
+        if self.filesystem.isfile(remote_path):
+            return key
+        remote_path = f"{remote_path}/**"
+        files = self.filesystem.glob(remote_path)
+        key_length = len(key)
+        files = [
+            f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
+        ]
+        return files
+    def delete(self, key):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.delete_object(key)
+    def _convert_key_to_remote_path(self, key):
+        key = key.strip("/")
+        schema = urlparse(key).scheme
+        #  if called without passing dataitem - like in fset.purge_targets,
+        #  key will include schema.
+        if not schema:
+            key = Path(self.endpoint, key).as_posix()
+        return key
+    @staticmethod
+    def get_range(size, offset):
+        if size:
+            return [offset, size]
+        return [offset, None]

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
                     st[key] = parsed_value
         account_name = st.get("account_name")
-        if not account_name:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "Property 'account_name' is absent both in storage settings and connection string"
-            )
         if primary_url:
             if primary_url.startswith("http://"):
                 primary_url = primary_url[len("http://") :]
             if primary_url.startswith("https://"):
                 primary_url = primary_url[len("https://") :]
             host = primary_url
-        else:
+        elif account_name:
             host = f"{account_name}.{service}.core.windows.net"
+        else:
+            return res
         if "account_key" in st:
             res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]

mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc9__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc6py3-none-any.whl → 1.7.0rc9py3-none-any.whl