PyPI - mlrun - Versions diffs - 1.10.0rc11__py3-none-any.whl → 1.10.0rc12__py3-none-any.whl - Mend

mlrun 1.10.0rc11py3-none-any.whl → 1.10.0rc12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (54) hide show

mlrun/__init__.py +2 -1
mlrun/__main__.py +7 -1
mlrun/artifacts/base.py +9 -3
mlrun/artifacts/dataset.py +2 -1
mlrun/artifacts/llm_prompt.py +1 -1
mlrun/artifacts/model.py +2 -2
mlrun/common/constants.py +1 -0
mlrun/common/runtimes/constants.py +10 -1
mlrun/config.py +19 -2
mlrun/datastore/__init__.py +3 -1
mlrun/datastore/alibaba_oss.py +1 -1
mlrun/datastore/azure_blob.py +1 -1
mlrun/datastore/base.py +6 -31
mlrun/datastore/datastore.py +109 -33
mlrun/datastore/datastore_profile.py +31 -0
mlrun/datastore/dbfs_store.py +1 -1
mlrun/datastore/google_cloud_storage.py +2 -2
mlrun/datastore/model_provider/__init__.py +13 -0
mlrun/datastore/model_provider/model_provider.py +82 -0
mlrun/datastore/model_provider/openai_provider.py +120 -0
mlrun/datastore/remote_client.py +54 -0
mlrun/datastore/s3.py +1 -1
mlrun/datastore/storeytargets.py +1 -1
mlrun/datastore/utils.py +22 -0
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +1 -1
mlrun/db/httpdb.py +9 -4
mlrun/db/nopdb.py +1 -1
mlrun/execution.py +23 -7
mlrun/launcher/base.py +23 -13
mlrun/launcher/local.py +3 -1
mlrun/launcher/remote.py +4 -2
mlrun/model.py +65 -0
mlrun/package/packagers_manager.py +2 -0
mlrun/projects/operations.py +8 -1
mlrun/projects/project.py +23 -5
mlrun/run.py +17 -0
mlrun/runtimes/__init__.py +6 -0
mlrun/runtimes/base.py +24 -6
mlrun/runtimes/daskjob.py +1 -0
mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
mlrun/runtimes/local.py +1 -6
mlrun/serving/server.py +0 -2
mlrun/serving/states.py +30 -5
mlrun/serving/system_steps.py +22 -28
mlrun/utils/helpers.py +13 -2
mlrun/utils/notifications/notification_pusher.py +15 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc12.dist-info}/METADATA +2 -2
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc12.dist-info}/RECORD +54 -50
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc12.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc12.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc12.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc11.dist-info → mlrun-1.10.0rc12.dist-info}/top_level.txt +0 -0

mlrun/__init__.py CHANGED Viewed

@@ -32,7 +32,7 @@ from typing import Optional
 import dotenv
 from .config import config as mlconf
-from .datastore import DataItem, store_manager
+from .datastore import DataItem, ModelProvider, store_manager
 from .db import get_run_db
 from .errors import MLRunInvalidArgumentError, MLRunNotFoundError
 from .execution import MLClientCtx
@@ -55,6 +55,7 @@ from .run import (
     code_to_function,
     function_to_module,
     get_dataitem,
+    get_model_provider,
     get_object,
     get_or_create_ctx,
     get_pipeline,

mlrun/__main__.py CHANGED Viewed

@@ -261,7 +261,13 @@ def run(
     config = environ.get("MLRUN_EXEC_CONFIG")
     if from_env and config:
         config = json.loads(config)
-        runobj = RunTemplate.from_dict(config)
+        # If run is a retry we need to maintain the run status therefore using RunObject instead of RunTemplate
+        retry_count = config.get("status", {}).get("retry_count")
+        if retry_count:
+            logger.info(f"Retrying run - attempt: {retry_count + 1}")
+            runobj = mlrun.RunObject.from_dict(config)
+        else:
+            runobj = RunTemplate.from_dict(config)
     elif task:
         obj = get_object(task)
         task = yaml.load(obj, Loader=yaml.FullLoader)

mlrun/artifacts/base.py CHANGED Viewed

@@ -839,9 +839,7 @@ def get_artifact_meta(artifact):
         artifact = artifact.artifact_url
     if mlrun.datastore.is_store_uri(artifact):
-        artifact_spec, target = mlrun.datastore.store_manager.get_store_artifact(
-            artifact
-        )
+        artifact_spec, _ = mlrun.datastore.store_manager.get_store_artifact(artifact)
     elif artifact.lower().endswith(".yaml"):
         data = mlrun.datastore.store_manager.object(url=artifact).get()
@@ -942,3 +940,11 @@ def fill_artifact_object_hash(object_dict, iteration=None, producer_id=None):
             object_dict["spec"][key] = value
     return uid
+def verify_target_path(artifact: Artifact):
+    if not artifact.get_target_path():
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"artifact {artifact.uri} "
+            f"does not have a valid/persistent offline target"
+        )

mlrun/artifacts/dataset.py CHANGED Viewed

@@ -26,7 +26,7 @@ import mlrun.datastore
 import mlrun.utils.helpers
 from mlrun.config import config as mlconf
-from .base import Artifact, ArtifactSpec, StorePrefix
+from .base import Artifact, ArtifactSpec, StorePrefix, verify_target_path
 default_preview_rows_length = 20
 max_preview_columns = mlconf.artifacts.datasets.max_preview_columns
@@ -424,6 +424,7 @@ def update_dataset_meta(
         artifact_spec = artifact
     elif mlrun.datastore.is_store_uri(artifact):
         artifact_spec, _ = mlrun.datastore.store_manager.get_store_artifact(artifact)
+        verify_target_path(artifact_spec)
     else:
         raise ValueError("model path must be a model store object/URL/DataItem")

mlrun/artifacts/llm_prompt.py CHANGED Viewed

@@ -127,7 +127,7 @@ class LLMPromptArtifact(Artifact):
         if self.spec._model_artifact:
             return self.spec._model_artifact
         if self.spec.model_uri:
-            self.spec._model_artifact, target = (
+            self.spec._model_artifact, _ = (
                 mlrun.datastore.store_manager.get_store_artifact(self.spec.model_uri)
             )
             return self.spec._model_artifact

mlrun/artifacts/model.py CHANGED Viewed

@@ -26,7 +26,7 @@ from ..data_types import InferOptions, get_infer_interface
 from ..features import Feature
 from ..model import ObjectList
 from ..utils import StorePrefix, is_relative_path
-from .base import Artifact, ArtifactSpec, upload_extra_data
+from .base import Artifact, ArtifactSpec, upload_extra_data, verify_target_path
 model_spec_filename = "model_spec.yaml"
 MODEL_OPTIONAL_SUFFIXES = [".tar.gz", ".pkl", ".bin", ".pickle"]
@@ -493,7 +493,6 @@ def get_model(
     :returns: model filename, model artifact object, extra data dict
     """
-    # TODO support LLMPromptArtifact
     model_file = ""
     model_spec = None
     extra_dataitems = {}
@@ -518,6 +517,7 @@ def get_model(
             model_spec, target = mlrun.datastore.store_manager.get_store_artifact(
                 model_dir
             )
+            verify_target_path(model_spec)
         else:
             model_spec, target = model_dir, model_dir.get_target_path()
         if not model_spec or model_spec.kind != "model":

mlrun/common/constants.py CHANGED Viewed

@@ -66,6 +66,7 @@ class MLRunInternalLabels:
     scrape_metrics = f"{MLRUN_LABEL_PREFIX}scrape-metrics"
     tag = f"{MLRUN_LABEL_PREFIX}tag"
     uid = f"{MLRUN_LABEL_PREFIX}uid"
+    retry = f"{MLRUN_LABEL_PREFIX}retry-attempt"
     username = f"{MLRUN_LABEL_PREFIX}username"
     username_domain = f"{MLRUN_LABEL_PREFIX}username_domain"
     task_name = f"{MLRUN_LABEL_PREFIX}task-name"

mlrun/common/runtimes/constants.py CHANGED Viewed

@@ -139,6 +139,7 @@ class RunStates:
     aborted = "aborted"
     aborting = "aborting"
     skipped = "skipped"
+    pending_retry = "pendingRetry"
     @staticmethod
     def all():
@@ -152,6 +153,7 @@ class RunStates:
             RunStates.aborted,
             RunStates.aborting,
             RunStates.skipped,
+            RunStates.pending_retry,
         ]
     @staticmethod
@@ -168,6 +170,7 @@ class RunStates:
         return [
             RunStates.error,
             RunStates.aborted,
+            RunStates.pending_retry,
         ]
     @staticmethod
@@ -185,12 +188,18 @@ class RunStates:
     def non_terminal_states():
         return list(set(RunStates.all()) - set(RunStates.terminal_states()))
+    @staticmethod
+    def terminal_or_error_states():
+        return list(
+            set(RunStates.terminal_states())
+            | set(RunStates.error_and_abortion_states())
+        )
     @staticmethod
     def not_allowed_for_deletion_states():
         return [
             RunStates.running,
             RunStates.pending,
-            # TODO: add aborting state once we have it
         ]
     @staticmethod

mlrun/config.py CHANGED Viewed

@@ -120,6 +120,12 @@ default_config = {
             # max number of parallel abort run jobs in runs monitoring
             "concurrent_abort_stale_runs_workers": 10,
             "list_runs_time_period_in_days": 7,  # days
+            "retry": {
+                # periodic job for triggering retries interval in seconds
+                "interval": "30",
+                # runs limit to fetch for retrying
+                "fetch_runs_limit": 1000,
+            },
         },
         "projects": {
             "summaries": {
@@ -184,6 +190,9 @@ default_config = {
         "url": "",
     },
     "v3io_framesd": "http://framesd:8080",
+    "model_providers": {
+        "openai_default_model": "gpt-4",
+    },
     # default node selector to be applied to all functions - json string base64 encoded format
     "default_function_node_selector": "e30=",
     # default priority class to be applied to functions running on k8s cluster
@@ -270,6 +279,12 @@ default_config = {
                     "executing": "24h",
                 }
             },
+            "retry": {
+                "backoff": {
+                    "default_base_delay": "30s",
+                    "min_base_delay": "30s",
+                },
+            },
             # When the module is reloaded, the maximum depth recursion configuration for the recursive reload
             # function is used to prevent infinite loop
             "reload_max_recursion_depth": 100,
@@ -316,6 +331,7 @@ default_config = {
                     "project_summaries": "enabled",
                     "start_logs": "enabled",
                     "stop_logs": "enabled",
+                    "retry_jobs": "enabled",
                 },
             },
             "worker": {
@@ -539,7 +555,7 @@ default_config = {
         },
         "v3io_api": "",
         "v3io_framesd": "",
-        # If running from sdk and MLRUN_DBPATH is not set, the db will fallback to a nop db which will not preform any
+        # If running from sdk and MLRUN_DBPATH is not set, the db will fallback to a nop db which will not perform any
         # run db operations.
         "nop_db": {
             # if set to true, will raise an error for trying to use run db functionality
@@ -641,7 +657,7 @@ default_config = {
         "offline_storage_path": "model-endpoints/{kind}",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        "model_endpoint_creation_check_period": "15",
+        "model_endpoint_creation_check_period": 15,
     },
     "secret_stores": {
         # Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
@@ -1219,6 +1235,7 @@ class Config:
         """
         Get the default value for the ssl_redirect configuration.
         In Iguazio we always want to redirect to HTTPS, in other cases we don't.
         :return: True if we should redirect to HTTPS, False otherwise.
         """
         return self.is_running_on_iguazio()

mlrun/datastore/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 __all__ = [
     "DataItem",
+    "ModelProvider",
     "get_store_resource",
     "ParquetTarget",
     "CSVTarget",
@@ -32,12 +33,12 @@ __all__ = [
     "get_stream_pusher",
     "ConfigProfile",
     "VectorStoreCollection",
+    "store_manager",
 ]
 from urllib.parse import urlparse
 import fsspec
-from mergedeep import merge
 import mlrun.datastore.wasbfs
 from mlrun.datastore.datastore_profile import (
@@ -45,6 +46,7 @@ from mlrun.datastore.datastore_profile import (
     DatastoreProfileKafkaTarget,
     DatastoreProfileV3io,
 )
+from mlrun.datastore.model_provider.model_provider import ModelProvider
 from mlrun.platforms.iguazio import (
     HTTPOutputStream,
     KafkaOutputStream,

mlrun/datastore/alibaba_oss.py CHANGED Viewed

@@ -69,7 +69,7 @@ class OSSStore(DataStore):
             key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
             secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
         )
-        return self._sanitize_storage_options(res)
+        return self._sanitize_options(res)
     def get_bucket_and_key(self, key):
         path = self._join(key)[1:]

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -67,7 +67,7 @@ class AzureBlobStore(DataStore):
                 or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
                 credential=self._get_secret_or_env("credential"),
             )
-            self._storage_options = self._sanitize_storage_options(res)
+            self._storage_options = self._sanitize_options(res)
         return self._storage_options
     @property

mlrun/datastore/base.py CHANGED Viewed

@@ -28,6 +28,7 @@ import requests
 import mlrun.config
 import mlrun.errors
+from mlrun.datastore.remote_client import BaseRemoteClient
 from mlrun.errors import err_to_str
 from mlrun.utils import StorePrefix, is_jupyter, logger
@@ -45,22 +46,19 @@ class FileStats:
         return f"FileStats(size={self.size}, modified={self.modified}, type={self.content_type})"
-class DataStore:
+class DataStore(BaseRemoteClient):
     using_bucket = False
     def __init__(
         self, parent, name, kind, endpoint="", secrets: Optional[dict] = None, **kwargs
     ):
-        self._parent = parent
-        self.kind = kind
-        self.name = name
-        self.endpoint = endpoint
+        super().__init__(
+            parent=parent, kind=kind, name=name, endpoint=endpoint, secrets=secrets
+        )
         self.subpath = ""
-        self.secret_pfx = ""
         self.options = {}
         self.from_spec = False
         self._filesystem = None
-        self._secrets = secrets or {}
     @property
     def is_structured(self):
@@ -70,13 +68,6 @@ class DataStore:
     def is_unstructured(self):
         return True
-    @staticmethod
-    def _sanitize_storage_options(options):
-        if not options:
-            return {}
-        options = {k: v for k, v in options.items() if v is not None and v != ""}
-        return options
     @staticmethod
     def _sanitize_url(url):
         """
@@ -106,15 +97,9 @@ class DataStore:
         """Whether the data store supports isdir"""
         return True
-    def _get_secret_or_env(self, key, default=None):
-        # Project-secrets are mounted as env variables whose name can be retrieved from SecretsStore
-        return mlrun.get_secret_or_env(
-            key, secret_provider=self._get_secret, default=default
-        )
     def get_storage_options(self):
         """get fsspec storage options"""
-        return self._sanitize_storage_options(None)
+        return self._sanitize_options(None)
     def open(self, filepath, mode):
         file_system = self.filesystem
@@ -125,16 +110,6 @@ class DataStore:
             return f"{self.subpath}/{key}"
         return key
-    def _get_parent_secret(self, key):
-        return self._parent.secret(self.secret_pfx + key)
-    def _get_secret(self, key: str, default=None):
-        return self._secrets.get(key, default) or self._get_parent_secret(key)
-    @property
-    def url(self):
-        return f"{self.kind}://{self.endpoint}"
     @property
     def spark_url(self):
         return self.url

mlrun/datastore/datastore.py CHANGED Viewed

@@ -11,50 +11,40 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import warnings
+from functools import partial
 from typing import Optional
-from urllib.parse import urlparse
 from mergedeep import merge
 import mlrun
 import mlrun.errors
+from mlrun.artifacts.llm_prompt import LLMPromptArtifact
+from mlrun.artifacts.model import ModelArtifact
 from mlrun.datastore.datastore_profile import datastore_profile_read
+from mlrun.datastore.model_provider.model_provider import (
+    ModelProvider,
+)
+from mlrun.datastore.remote_client import BaseRemoteClient
+from mlrun.datastore.utils import (
+    parse_url,
+)
 from mlrun.errors import err_to_str
 from mlrun.utils.helpers import get_local_file_schema
+from ..artifacts.base import verify_target_path
 from ..utils import DB_SCHEMA, RunKeys
 from .base import DataItem, DataStore, HttpStore
 from .filestore import FileStore
 from .inmem import InMemoryStore
+from .model_provider.openai_provider import OpenAIProvider
 from .store_resources import get_store_resource, is_store_uri
 from .v3io import V3ioStore
 in_memory_store = InMemoryStore()
-def parse_url(url):
-    if url and url.startswith("v3io://") and not url.startswith("v3io:///"):
-        url = url.replace("v3io://", "v3io:///", 1)
-    parsed_url = urlparse(url)
-    schema = parsed_url.scheme.lower()
-    endpoint = parsed_url.hostname
-    if endpoint:
-        # HACK - urlparse returns the hostname after in lower case - we want the original case:
-        # the hostname is a substring of the netloc, in which it's the original case, so we find the indexes of the
-        # hostname in the netloc and take it from there
-        lower_hostname = parsed_url.hostname
-        netloc = str(parsed_url.netloc)
-        lower_netloc = netloc.lower()
-        hostname_index_in_netloc = lower_netloc.index(str(lower_hostname))
-        endpoint = netloc[
-            hostname_index_in_netloc : hostname_index_in_netloc + len(lower_hostname)
-        ]
-    if parsed_url.port:
-        endpoint += f":{parsed_url.port}"
-    return schema, endpoint, parsed_url
 def schema_to_store(schema) -> DataStore.__subclasses__():
     # import store classes inside to enable making their dependencies optional (package extras)
@@ -109,6 +99,20 @@ def schema_to_store(schema) -> DataStore.__subclasses__():
     raise ValueError(f"unsupported store scheme ({schema})")
+def schema_to_model_provider(
+    schema: str, raise_missing_schema_exception=True
+) -> type[ModelProvider]:
+    #  TODO add hugging face and http
+    schema_dict = {"openai": OpenAIProvider}
+    provider_class = schema_dict.get(schema, None)
+    if not provider_class:
+        if raise_missing_schema_exception:
+            raise ValueError(f"unsupported model provider schema ({schema})")
+        else:
+            warnings.warn(f"unsupported model provider schema: {schema}")
+    return provider_class
 def uri_to_ipython(link):
     schema, endpoint, parsed_url = parse_url(link)
     if schema in [DB_SCHEMA, "memory", "ds"]:
@@ -159,7 +163,11 @@ class StoreManager:
         self._stores[store.name] = store
     def get_store_artifact(
-        self, url, project="", allow_empty_resources=None, secrets=None
+        self,
+        url,
+        project="",
+        allow_empty_resources=None,
+        secrets=None,
     ):
         """
         This is expected to be run only on client side. server is not expected to load artifacts.
@@ -175,12 +183,21 @@ class StoreManager:
         except Exception as exc:
             raise OSError(f"artifact {url} not found, {err_to_str(exc)}")
         target = resource.get_target_path()
         # the allow_empty.. flag allows us to have functions which dont depend on having targets e.g. a function
         # which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
-        if not target and not allow_empty_resources:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                f"Resource {url} does not have a valid/persistent offline target"
-            )
+        if not allow_empty_resources:
+            if isinstance(resource, LLMPromptArtifact):
+                if not resource.spec.model_uri:
+                    raise mlrun.errors.MLRunInvalidArgumentError(
+                        f"LLMPromptArtifact {url} does not contain model artifact uri"
+                    )
+            elif not target and not (
+                isinstance(resource, ModelArtifact) and resource.model_url
+            ):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Resource {url} does not have a valid/persistent offline target or model_url"
+                )
         return resource, target or ""
     def object(
@@ -190,6 +207,7 @@ class StoreManager:
         project="",
         allow_empty_resources=None,
         secrets: Optional[dict] = None,
+        **kwargs,
     ) -> DataItem:
         meta = artifact_url = None
         if is_store_uri(url):
@@ -197,6 +215,8 @@ class StoreManager:
             meta, url = self.get_store_artifact(
                 url, project, allow_empty_resources, secrets
             )
+            if not allow_empty_resources:
+                verify_target_path(meta)
         store, subpath, url = self.get_or_create_store(
             url, secrets=secrets, project_name=project
@@ -218,7 +238,7 @@ class StoreManager:
         cache: Optional[dict] = None,
         schema_to_class: callable = schema_to_store,
         **kwargs,
-    ) -> (DataStore, str, str):
+    ) -> (BaseRemoteClient, str, str):
         # The cache can be an empty dictionary ({}), even if it is a _stores object
         cache = cache if cache is not None else {}
         schema, endpoint, parsed_url = parse_url(url)
@@ -227,10 +247,7 @@ class StoreManager:
         if schema == "ds":
             datastore_profile = datastore_profile_read(url, project_name, secrets)
-            if secrets and datastore_profile.secrets():
-                secrets = merge(secrets, datastore_profile.secrets())
-            else:
-                secrets = secrets or datastore_profile.secrets()
+            secrets = merge(secrets or {}, datastore_profile.secrets() or {})
             url = datastore_profile.url(subpath)
             schema, endpoint, parsed_url = parse_url(url)
             subpath = parsed_url.path
@@ -260,6 +277,9 @@ class StoreManager:
         remote_client_class = schema_to_class(schema)
         remote_client = None
         if remote_client_class:
+            endpoint, subpath = remote_client_class.parse_endpoint_and_path(
+                endpoint, subpath
+            )
             remote_client = remote_client_class(
                 self, schema, cache_key, parsed_url.netloc, secrets=secrets, **kwargs
             )
@@ -288,5 +308,61 @@ class StoreManager:
             )
         return datastore, sub_path, url
+    def get_or_create_model_provider(
+        self,
+        url,
+        secrets: Optional[dict] = None,
+        project_name="",
+        default_invoke_kwargs: Optional[dict] = None,
+        raise_missing_schema_exception=True,
+    ) -> ModelProvider:
+        schema_to_provider_with_raise = partial(
+            schema_to_model_provider,
+            raise_missing_schema_exception=raise_missing_schema_exception,
+        )
+        model_provider, _, _ = self._get_or_create_remote_client(
+            url=url,
+            secrets=secrets,
+            project_name=project_name,
+            schema_to_class=schema_to_provider_with_raise,
+            default_invoke_kwargs=default_invoke_kwargs,
+        )
+        if model_provider and not isinstance(model_provider, ModelProvider):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "remote client by url is not model_provider"
+            )
+        return model_provider
     def reset_secrets(self):
         self._secrets = {}
+    def model_provider_object(
+        self,
+        url,
+        project="",
+        allow_empty_resources=None,
+        secrets: Optional[dict] = None,
+        default_invoke_kwargs: Optional[dict] = None,
+        raise_missing_schema_exception=True,
+    ) -> ModelProvider:
+        if mlrun.datastore.is_store_uri(url):
+            resource = self.get_store_artifact(
+                url,
+                project,
+                allow_empty_resources,
+                secrets,
+            )
+            if not isinstance(resource, ModelArtifact) or not resource.model_url:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "unable to create the model provider from the given resource URI"
+                )
+            url = resource.model_url
+            default_invoke_kwargs = default_invoke_kwargs or resource.default_config
+        model_provider = self.get_or_create_model_provider(
+            url,
+            secrets=secrets,
+            project_name=project,
+            default_invoke_kwargs=default_invoke_kwargs,
+            raise_missing_schema_exception=raise_missing_schema_exception,
+        )
+        return model_provider

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -456,6 +456,36 @@ class DatastoreProfileTDEngine(DatastoreProfile):
         )
+class OpenAIProfile(DatastoreProfile):
+    type: str = pydantic.v1.Field("openai")
+    _private_attributes = "api_key"
+    api_key: typing.Optional[str] = None
+    organization: typing.Optional[str] = None
+    project: typing.Optional[str] = None
+    base_url: typing.Optional[str] = None
+    timeout: typing.Optional[float] = None
+    max_retries: typing.Optional[int] = None
+    def secrets(self) -> dict:
+        res = {}
+        if self.api_key:
+            res["OPENAI_API_KEY"] = self.api_key
+        if self.organization:
+            res["OPENAI_ORG_ID"] = self.organization
+        if self.project:
+            res["OPENAI_PROJECT_ID"] = self.project
+        if self.base_url:
+            res["OPENAI_BASE_URL"] = self.base_url
+        if self.timeout:
+            res["OPENAI_TIMEOUT"] = self.timeout
+        if self.max_retries:
+            res["OPENAI_MAX_RETRIES"] = self.max_retries
+        return res
+    def url(self, subpath):
+        return f"{self.type}://{subpath.lstrip('/')}"
 _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
     "v3io": DatastoreProfileV3io,
     "s3": DatastoreProfileS3,
@@ -469,6 +499,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
     "hdfs": DatastoreProfileHdfs,
     "taosws": DatastoreProfileTDEngine,
     "config": ConfigProfile,
+    "openai": OpenAIProfile,
 }

mlrun/datastore/dbfs_store.py CHANGED Viewed

@@ -104,7 +104,7 @@ class DBFSStore(DataStore):
             token=self._get_secret_or_env("DATABRICKS_TOKEN"),
             instance=self._get_secret_or_env("DATABRICKS_HOST"),
         )
-        return self._sanitize_storage_options(res)
+        return self._sanitize_options(res)
     def _verify_filesystem_and_key(self, key: str):
         if not self.filesystem:

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -105,12 +105,12 @@ class GoogleCloudStorageStore(DataStore):
             except json.JSONDecodeError:
                 # If it's not json, handle it as a filename
                 token = credentials
-            return self._sanitize_storage_options(dict(token=token))
+            return self._sanitize_options(dict(token=token))
         else:
             logger.info(
                 "No GCS credentials available - auth will rely on auto-discovery of credentials"
             )
-            return self._sanitize_storage_options(None)
+            return self._sanitize_options(None)
     def get_storage_options(self):
         return self.storage_options

mlrun/datastore/model_provider/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

mlrun 1.10.0rc11__py3-none-any.whl → 1.10.0rc12__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc11py3-none-any.whl → 1.10.0rc12py3-none-any.whl