PyPI - ob-metaflow-extensions - Versions diffs - 1.1.156__py2.py3-none-any.whl → 1.1.158__py2.py3-none-any.whl - Mend

ob-metaflow-extensions 1.1.156py2.py3-none-any.whl → 1.1.158py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (9) hide show

metaflow_extensions/outerbounds/plugins/nim/nim_manager.py CHANGED Viewed

@@ -186,18 +186,61 @@ class NimManager(object):
         ]
         self.models = {}
-        for each_model in models:
-            if each_model in nvcf_models:
-                self.models[each_model] = NimChatCompletion(
-                    model=each_model,
+        # Convert models to a standard format
+        standardized_models = []
+        # If models is a single string, convert it to a list with a dict
+        if isinstance(models, str):
+            standardized_models = [{"name": models}]
+        # If models is a list, process each item
+        elif isinstance(models, list):
+            for model_item in models:
+                # If the item is a string, convert it to a dict
+                if isinstance(model_item, str):
+                    standardized_models.append({"name": model_item})
+                # If it's already a dict, use it as is
+                elif isinstance(model_item, dict):
+                    standardized_models.append(model_item)
+                else:
+                    raise ValueError(
+                        f"Model specification must be a string or dictionary, got {type(model_item)}"
+                    )
+        else:
+            raise ValueError(
+                f"Models must be a string or a list of strings/dictionaries, got {type(models)}"
+            )
+        # Process each standardized model
+        for each_model_dict in standardized_models:
+            model_name = each_model_dict.get("name", "")
+            nvcf_id = each_model_dict.get("nvcf_id", "")
+            nvcf_version = each_model_dict.get("nvcf_version", "")
+            if model_name and not (nvcf_id and nvcf_version):
+                if model_name in nvcf_models:
+                    self.models[model_name] = NimChatCompletion(
+                        model=model_name,
+                        nvcf_id=nvcf_id,
+                        nvcf_version=nvcf_version,
+                        nim_metadata=nim_metadata,
+                        monitor=monitor,
+                    )
+                else:
+                    raise ValueError(
+                        f"Model {model_name} not supported by the Outerbounds @nim offering."
+                        f"\nYou can choose from these options: {nvcf_models}\n\n"
+                        "Reach out to Outerbounds if there are other models you'd like supported."
+                    )
+            elif nvcf_id and nvcf_version:
+                self.models[model_name] = NimChatCompletion(
+                    model=model_name,
+                    nvcf_id=nvcf_id,
+                    nvcf_version=nvcf_version,
                     nim_metadata=nim_metadata,
                     monitor=monitor,
                 )
             else:
                 raise ValueError(
-                    f"Model {each_model} not supported by the Outerbounds @nim offering."
-                    f"\nYou can choose from these options: {nvcf_models}\n\n"
-                    "Reach out to Outerbounds if there are other models you'd like supported."
+                    "You must provide either a valid 'name' or a custom 'name' along with both 'nvcf_id' and 'nvcf_version'."
                 )
@@ -205,6 +248,8 @@ class NimChatCompletion(object):
     def __init__(
         self,
         model: str = "meta/llama3-8b-instruct",
+        nvcf_id: str = "",
+        nvcf_version: str = "",
         nim_metadata: NimMetadata = None,
         monitor: bool = False,
         **kwargs,
@@ -217,18 +262,34 @@ class NimChatCompletion(object):
         self.model_name = model
         self.nim_metadata = nim_metadata
         self.monitor = monitor
         all_nvcf_models = self.nim_metadata.get_nvcf_chat_completion_models()
-        all_nvcf_model_names = [m["name"] for m in all_nvcf_models]
-        if self.model_name not in all_nvcf_model_names:
-            raise ValueError(
-                f"Model {self.model_name} not found in available NVCF models"
-            )
+        if nvcf_id and nvcf_version:
+            matching_models = [
+                m
+                for m in all_nvcf_models
+                if m["function-id"] == nvcf_id and m["version-id"] == nvcf_version
+            ]
+            if matching_models:
+                self.model = matching_models[0]
+                self.function_id = self.model["function-id"]
+                self.version_id = self.model["version-id"]
+                self.model_name = self.model["name"]
+            else:
+                raise ValueError(
+                    f"Function {self.function_id} with version {self.version_id} not found on NVCF"
+                )
+        else:
+            all_nvcf_model_names = [m["name"] for m in all_nvcf_models]
+            if self.model_name not in all_nvcf_model_names:
+                raise ValueError(
+                    f"Model {self.model_name} not found in available NVCF models"
+                )
-        self.model = all_nvcf_models[all_nvcf_model_names.index(self.model_name)]
-        self.function_id = self.model["function-id"]
-        self.version_id = self.model["version-id"]
+            self.model = all_nvcf_models[all_nvcf_model_names.index(self.model_name)]
+            self.function_id = self.model["function-id"]
+            self.version_id = self.model["version-id"]
         self.first_request = True

metaflow_extensions/outerbounds/plugins/ollama/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from metaflow.decorators import StepDecorator
 from metaflow import current
 import functools
+import os
 from .ollama import OllamaManager
 from ..card_utilities.injector import CardDecoratorInjector
@@ -13,10 +14,10 @@ class OllamaDecorator(StepDecorator, CardDecoratorInjector):
     This decorator is used to run Ollama APIs as Metaflow task sidecars.
     User code call
-    -----------
+    --------------
     @ollama(
-        models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
-        backend='local'
+        models=[...],
+        ...
     )
     Valid backend options
@@ -26,21 +27,39 @@ class OllamaDecorator(StepDecorator, CardDecoratorInjector):
     - (TODO) 'remote': Spin up separate instance to serve Ollama models.
     Valid model options
-    ----------------
-        - 'llama3.2'
-        - 'llama3.3'
-        - any model here https://ollama.com/search
+    -------------------
+    Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
     Parameters
     ----------
-    models: list[Ollama]
+    models: list[str]
         List of Ollama containers running models in sidecars.
     backend: str
         Determines where and how to run the Ollama process.
+    force_pull: bool
+        Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
+    skip_push_check: bool
+        Whether to skip the check that populates/overwrites remote cache on terminating an ollama model.
+    debug: bool
+        Whether to turn on verbose debugging logs.
     """
     name = "ollama"
-    defaults = {"models": [], "backend": "local", "debug": False}
+    defaults = {
+        "models": [],
+        "backend": "local",
+        "force_pull": False,
+        "skip_push_check": False,
+        "debug": False,
+    }
+    def step_init(
+        self, flow, graph, step_name, decorators, environment, flow_datastore, logger
+    ):
+        super().step_init(
+            flow, graph, step_name, decorators, environment, flow_datastore, logger
+        )
+        self.flow_datastore_backend = flow_datastore._storage_impl
     def task_decorate(
         self, step_func, flow, graph, retry_count, max_user_code_retries, ubf_context
@@ -51,6 +70,9 @@ class OllamaDecorator(StepDecorator, CardDecoratorInjector):
                 self.ollama_manager = OllamaManager(
                     models=self.attributes["models"],
                     backend=self.attributes["backend"],
+                    flow_datastore_backend=self.flow_datastore_backend,
+                    force_pull=self.attributes["force_pull"],
+                    skip_push_check=self.attributes["skip_push_check"],
                     debug=self.attributes["debug"],
                 )
             except Exception as e:
@@ -59,10 +81,7 @@ class OllamaDecorator(StepDecorator, CardDecoratorInjector):
             try:
                 step_func()
             finally:
-                try:
-                    self.ollama_manager.terminate_models()
-                except Exception as term_e:
-                    print(f"[@ollama] Error during sidecar termination: {term_e}")
+                self.ollama_manager.terminate_models()
             if self.attributes["debug"]:
                 print(f"[@ollama] process statuses: {self.ollama_manager.processes}")
                 print(f"[@ollama] process runtime stats: {self.ollama_manager.stats}")

metaflow_extensions/outerbounds/plugins/ollama/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ OLLAMA_SUFFIX = "mf.ollama"

metaflow_extensions/outerbounds/plugins/ollama/exceptions.py ADDED Viewed

@@ -0,0 +1,22 @@
+from metaflow.exception import MetaflowException
+class UnspecifiedRemoteStorageRootException(MetaflowException):
+    headline = "Storage root not specified."
+    def __init__(self, message):
+        super(UnspecifiedRemoteStorageRootException, self).__init__(message)
+class EmptyOllamaManifestCacheException(MetaflowException):
+    headline = "Model not found."
+    def __init__(self, message):
+        super(EmptyOllamaManifestCacheException, self).__init__(message)
+class EmptyOllamaBlobCacheException(MetaflowException):
+    headline = "Blob not found."
+    def __init__(self, message):
+        super(EmptyOllamaBlobCacheException, self).__init__(message)

metaflow_extensions/outerbounds/plugins/ollama/ollama.py CHANGED Viewed

@@ -5,6 +5,15 @@ import socket
 import sys
 import os
 import functools
+import json
+import requests
+from .constants import OLLAMA_SUFFIX
+from .exceptions import (
+    EmptyOllamaManifestCacheException,
+    EmptyOllamaBlobCacheException,
+    UnspecifiedRemoteStorageRootException,
+)
 class ProcessStatus:
@@ -14,17 +23,40 @@ class ProcessStatus:
 class OllamaManager:
     """
     A process manager for Ollama runtimes.
-    This is run locally, e.g., whether @ollama has a local, remote, or managed backend.
+    Implements interface @ollama([models=...], ...) has a local, remote, or managed backend.
     """
-    def __init__(self, models, backend="local", debug=False):
+    def __init__(
+        self,
+        models,
+        backend="local",
+        flow_datastore_backend=None,
+        remote_storage_root=None,
+        force_pull=False,
+        skip_push_check=False,
+        debug=False,
+    ):
         self.models = {}
         self.processes = {}
+        self.flow_datastore_backend = flow_datastore_backend
+        if self.flow_datastore_backend is not None:
+            self.remote_storage_root = self.get_ollama_storage_root(
+                self.flow_datastore_backend
+            )
+        elif remote_storage_root is not None:
+            self.remote_storage_root = remote_storage_root
+        else:
+            raise UnspecifiedRemoteStorageRootException(
+                "Can not determine the storage root, as both flow_datastore_backend and remote_storage_root arguments of OllamaManager are None."
+            )
+        self.force_pull = force_pull
+        self.skip_push_check = skip_push_check
         self.debug = debug
         self.stats = {}
+        self.storage_info = {}
+        self.ollama_url = "http://localhost:11434"  # Ollama API base URL
         if backend != "local":
             raise ValueError(
@@ -41,7 +73,7 @@ class OllamaManager:
                 try:
                     future.result()
                 except Exception as e:
-                    raise RuntimeError(f"Error pulling one or more models: {e}") from e
+                    raise RuntimeError(f"Error pulling one or more models. {e}") from e
         # Run models as background processes.
         for m in models:
@@ -55,22 +87,24 @@ class OllamaManager:
         self.stats[name] = {"process_runtime": tf - t0}
     def _install_ollama(self, max_retries=3):
         try:
             result = subprocess.run(["which", "ollama"], capture_output=True, text=True)
             if result.returncode == 0:
-                if self.debug:
-                    print("[@ollama] is already installed.")
+                print("[@ollama] Ollama is already installed.")
                 return
         except Exception as e:
-            print("[@ollama] Did not find Ollama installation: %s" % e)
+            if self.debug:
+                print(f"[@ollama] Did not find Ollama installation: {e}")
             if sys.platform == "darwin":
                 raise RuntimeError(
-                    "On macOS, please install Ollama manually from https://ollama.com/download"
+                    "On macOS, please install Ollama manually from https://ollama.com/download."
                 )
+        if self.debug:
+            print("[@ollama] Installing Ollama...")
         env = os.environ.copy()
         env["CURL_IPRESOLVE"] = "4"
         for attempt in range(max_retries):
             try:
                 install_cmd = ["curl", "-fsSL", "https://ollama.com/install.sh"]
@@ -93,10 +127,11 @@ class OllamaManager:
                         f"Ollama installation script failed: stdout: {sh_proc.stdout}, stderr: {sh_proc.stderr}"
                     )
                 if self.debug:
-                    print("[@ollama] Installed successfully.")
-                    break
+                    print("[@ollama] Ollama installed successfully.")
+                break
             except Exception as e:
-                print(f"Installation attempt {attempt+1} failed: {e}")
+                if self.debug:
+                    print(f"[@ollama] Installation attempt {attempt+1} failed: {e}")
                 if attempt < max_retries - 1:
                     time.sleep(5)
                 else:
@@ -117,11 +152,9 @@ class OllamaManager:
     def _launch_server(self):
         """
         Start the Ollama server process and ensure it's running.
-        This version waits until the server is listening on port 11434.
         """
         try:
-            if self.debug:
-                print("[@ollama] Starting Ollama server...")
+            print("[@ollama] Starting Ollama server...")
             process = subprocess.Popen(
                 ["ollama", "serve"],
                 stdout=subprocess.PIPE,
@@ -133,23 +166,22 @@ class OllamaManager:
                 "properties": {"type": "api-server", "error_details": None},
                 "status": ProcessStatus.RUNNING,
             }
             if self.debug:
-                print(
-                    "[@ollama] Started Ollama server process with PID %s" % process.pid
-                )
+                print(f"[@ollama] Started server process with PID {process.pid}.")
-            # Wait until the server is ready (listening on 127.0.0.1:11434)
+            # Wait until the server is ready
             host, port = "127.0.0.1", 11434
             retries = 0
             max_retries = 10
             while (
                 not self._is_port_open(host, port, timeout=1) and retries < max_retries
             ):
-                print(
-                    "[@ollama] Waiting for server to be ready... (%d/%d)"
-                    % (retries + 1, max_retries)
-                )
-                time.sleep(1)
+                if retries == 0:
+                    print("[@ollama] Waiting for server to be ready...")
+                elif retries % 3 == 0:
+                    print(f"[@ollama] Still waiting... ({retries + 1}/{max_retries})")
+                time.sleep(5)
                 retries += 1
             if not self._is_port_open(host, port, timeout=1):
@@ -162,7 +194,7 @@ class OllamaManager:
                 self.processes[process.pid]["status"] = ProcessStatus.FAILED
                 raise RuntimeError(f"Ollama server failed to start. {error_details}")
-            # Check if the process has unexpectedly terminated
+            # Check if process terminated unexpectedly
             returncode = process.poll()
             if returncode is not None:
                 stdout, stderr = process.communicate()
@@ -181,21 +213,384 @@ class OllamaManager:
                 self.processes[process.pid]["properties"]["error_details"] = str(e)
             raise RuntimeError(f"Error starting Ollama server: {e}") from e
-    def _pull_model(self, m):
+    def _setup_storage(self, m):
+        """
+        Configure local and remote storage paths for an Ollama model.
+        """
+        # Parse model and tag name
+        ollama_model_name_components = m.split(":")
+        if len(ollama_model_name_components) == 1:
+            model_name = ollama_model_name_components[0]
+            tag = "latest"
+        elif len(ollama_model_name_components) == 2:
+            model_name = ollama_model_name_components[0]
+            tag = ollama_model_name_components[1]
+        # Find where Ollama actually stores models
+        possible_storage_roots = [
+            os.environ.get("OLLAMA_MODELS"),
+            "/usr/share/ollama/.ollama/models",
+            os.path.expanduser("~/.ollama/models"),
+            "/root/.ollama/models",
+        ]
+        ollama_local_storage_root = None
+        for root in possible_storage_roots:
+            if root and os.path.exists(root):
+                ollama_local_storage_root = root
+                break
+        if not ollama_local_storage_root:
+            # https://github.com/ollama/ollama/blob/main/docs/faq.md#where-are-models-stored
+            if sys.platform.startswith("linux"):
+                ollama_local_storage_root = "/usr/share/ollama/.ollama/models"
+            elif sys.platform == "darwin":
+                ollama_local_storage_root = os.path.expanduser("~/.ollama/models")
+        if self.debug:
+            print(
+                f"[@ollama {m}] Using Ollama storage root: {ollama_local_storage_root}."
+            )
+        blob_local_path = os.path.join(ollama_local_storage_root, "blobs")
+        manifest_base_path = os.path.join(
+            ollama_local_storage_root,
+            "manifests/registry.ollama.ai/library",
+            model_name,
+        )
+        # Create directories
         try:
+            os.makedirs(blob_local_path, exist_ok=True)
+            os.makedirs(manifest_base_path, exist_ok=True)
+        except FileExistsError:
+            pass
+        # Set up remote paths
+        if not self.local_datastore and self.remote_storage_root is not None:
+            blob_remote_key = os.path.join(self.remote_storage_root, "blobs")
+            manifest_remote_key = os.path.join(
+                self.remote_storage_root,
+                "manifests/registry.ollama.ai/library",
+                model_name,
+                tag,
+            )
+        else:
+            blob_remote_key = None
+            manifest_remote_key = None
+        self.storage_info[m] = {
+            "blob_local_root": blob_local_path,
+            "blob_remote_root": blob_remote_key,
+            "manifest_local": os.path.join(manifest_base_path, tag),
+            "manifest_remote": manifest_remote_key,
+            "manifest_content": None,
+            "model_name": model_name,
+            "tag": tag,
+            "storage_root": ollama_local_storage_root,
+        }
+        if self.debug:
+            print(f"[@ollama {m}] Storage paths configured.")
+    def _fetch_manifest(self, m):
+        """
+        Load the manifest file and content, either from local storage or remote cache.
+        """
+        if self.debug:
+            print(f"[@ollama {m}] Checking for cached manifest...")
+        def _disk_to_memory():
+            with open(self.storage_info[m]["manifest_local"], "r") as f:
+                self.storage_info[m]["manifest_content"] = json.load(f)
+        if os.path.exists(self.storage_info[m]["manifest_local"]):
+            if self.storage_info[m]["manifest_content"] is None:
+                _disk_to_memory()
             if self.debug:
-                print("[@ollama] Pulling model: %s" % m)
-            result = subprocess.run(
-                ["ollama", "pull", m], capture_output=True, text=True
+                print(f"[@ollama {m}] Manifest found locally.")
+        elif self.local_datastore:
+            if self.debug:
+                print(f"[@ollama {m}] No manifest found in local datastore.")
+            return None
+        else:
+            from metaflow import S3
+            from metaflow.plugins.datatools.s3.s3 import MetaflowS3NotFound
+            try:
+                with S3() as s3:
+                    s3obj = s3.get(self.storage_info[m]["manifest_remote"])
+                    if not s3obj.exists:
+                        raise EmptyOllamaManifestCacheException(
+                            f"No manifest in remote storage for model {m}"
+                        )
+                    if self.debug:
+                        print(f"[@ollama {m}] Downloaded manifest from cache.")
+                    os.rename(s3obj.path, self.storage_info[m]["manifest_local"])
+                    _disk_to_memory()
+                    if self.debug:
+                        print(
+                            f"[@ollama {m}] Manifest found in remote cache, downloaded locally."
+                        )
+            except (MetaflowS3NotFound, EmptyOllamaManifestCacheException):
+                if self.debug:
+                    print(
+                        f"[@ollama {m}] No manifest found locally or in remote cache."
+                    )
+                return None
+        return self.storage_info[m]["manifest_content"]
+    def _fetch_blobs(self, m):
+        """
+        Fetch missing blobs from remote cache.
+        """
+        if self.debug:
+            print(f"[@ollama {m}] Checking for cached blobs...")
+        manifest = self._fetch_manifest(m)
+        if not manifest:
+            raise EmptyOllamaBlobCacheException(f"No manifest available for model {m}")
+        blobs_required = [layer["digest"] for layer in manifest["layers"]]
+        missing_blob_info = []
+        # Check which blobs are missing locally
+        for blob_digest in blobs_required:
+            blob_filename = blob_digest.replace(":", "-")
+            local_blob_path = os.path.join(
+                self.storage_info[m]["blob_local_root"], blob_filename
             )
-            if result.returncode != 0:
-                raise RuntimeError(
-                    f"Failed to pull model {m}: stdout: {result.stdout}, stderr: {result.stderr}"
+            if not os.path.exists(local_blob_path):
+                if self.debug:
+                    print(f"[@ollama {m}] Blob {blob_digest} not found locally.")
+                remote_blob_path = os.path.join(
+                    self.storage_info[m]["blob_remote_root"], blob_filename
+                )
+                missing_blob_info.append(
+                    {
+                        "digest": blob_digest,
+                        "filename": blob_filename,
+                        "remote_path": remote_blob_path,
+                        "local_path": local_blob_path,
+                    }
                 )
+        if not missing_blob_info:
+            if self.debug:
+                print(f"[@ollama {m}] All blobs found locally.")
+            return
+        if self.debug:
+            print(
+                f"[@ollama {m}] Downloading {len(missing_blob_info)} missing blobs from cache..."
+            )
+        remote_urls = [blob_info["remote_path"] for blob_info in missing_blob_info]
+        from metaflow import S3
+        try:
+            with S3() as s3:
+                if len(remote_urls) == 1:
+                    s3objs = [s3.get(remote_urls[0])]
+                else:
+                    s3objs = s3.get_many(remote_urls)
+                if not isinstance(s3objs, list):
+                    s3objs = [s3objs]
+                # Move each downloaded blob to correct location
+                for i, s3obj in enumerate(s3objs):
+                    if not s3obj.exists:
+                        blob_info = missing_blob_info[i]
+                        raise EmptyOllamaBlobCacheException(
+                            f"Blob {blob_info['digest']} not found in remote cache for model {m}"
+                        )
+                    blob_info = missing_blob_info[i]
+                    os.makedirs(os.path.dirname(blob_info["local_path"]), exist_ok=True)
+                    os.rename(s3obj.path, blob_info["local_path"])
+                    if self.debug:
+                        print(f"[@ollama {m}] Downloaded blob {blob_info['filename']}.")
+        except Exception as e:
+            if self.debug:
+                print(f"[@ollama {m}] Error during blob fetch: {e}")
+            raise EmptyOllamaBlobCacheException(
+                f"Failed to fetch blobs for model {m}: {e}"
+            )
+        if self.debug:
+            print(
+                f"[@ollama {m}] Successfully downloaded all missing blobs from cache."
+            )
+    def _verify_model_available(self, m):
+        """
+        Verify model is available using Ollama API
+        """
+        try:
+            response = requests.post(
+                f"{self.ollama_url}/api/show", json={"model": m}, timeout=10
+            )
+            available = response.status_code == 200
             if self.debug:
-                print("[@ollama] Model %s pulled successfully." % m)
+                if available:
+                    print(f"[@ollama {m}] ✓ Model is available via API.")
+                else:
+                    print(
+                        f"[@ollama {m}] ✗ Model not available via API (status: {response.status_code})."
+                    )
+            return available
         except Exception as e:
-            raise RuntimeError(f"Error pulling Ollama model {m}: {e}") from e
+            if self.debug:
+                print(f"[@ollama {m}] Error verifying model: {e}")
+            return False
+    def _register_cached_model_with_ollama(self, m):
+        """
+        Register a cached model with Ollama using the API.
+        """
+        try:
+            show_response = requests.post(
+                f"{self.ollama_url}/api/show", json={"model": m}, timeout=10
+            )
+            if show_response.status_code == 200:
+                if self.debug:
+                    print(f"[@ollama {m}] Model already registered with Ollama.")
+                return True
+            # Try to create/register the model from existing files
+            if self.debug:
+                print(f"[@ollama {m}] Registering cached model with Ollama...")
+            create_response = requests.post(
+                f"{self.ollama_url}/api/create",
+                json={
+                    "model": m,
+                    "from": m,  # Use same name - should find existing files
+                    "stream": False,
+                },
+                timeout=60,
+            )
+            if create_response.status_code == 200:
+                result = create_response.json()
+                if result.get("status") == "success":
+                    if self.debug:
+                        print(f"[@ollama {m}] Successfully registered cached model.")
+                    return True
+                else:
+                    if self.debug:
+                        print(f"[@ollama {m}] Create response: {result}.")
+            # Fallback: try a pull which should be fast if files exist
+            if self.debug:
+                print(f"[@ollama {m}] Create failed, trying pull to register...")
+            pull_response = requests.post(
+                f"{self.ollama_url}/api/pull",
+                json={"model": m, "stream": False},
+                timeout=120,
+            )
+            if pull_response.status_code == 200:
+                result = pull_response.json()
+                if result.get("status") == "success":
+                    if self.debug:
+                        print(f"[@ollama {m}] Model registered via pull.")
+                    return True
+        except requests.exceptions.RequestException as e:
+            if self.debug:
+                print(f"[@ollama {m}] API registration failed: {e}")
+        except Exception as e:
+            if self.debug:
+                print(f"[@ollama {m}] Error during registration: {e}")
+        return False
+    def _pull_model(self, m):
+        """
+        Pull/setup a model, using cache when possible.
+        """
+        self._setup_storage(m)
+        # Try to fetch manifest from cache first
+        manifest = None
+        try:
+            manifest = self._fetch_manifest(m)
+        except (EmptyOllamaManifestCacheException, Exception) as e:
+            if self.debug:
+                print(f"[@ollama {m}] No cached manifest found or error fetching: {e}")
+            manifest = None
+        # If we don't have a cached manifest or force_pull is True, pull the model
+        if self.force_pull or not manifest:
+            try:
+                print(f"[@ollama {m}] Not using cache. Downloading model {m}...")
+                result = subprocess.run(
+                    ["ollama", "pull", m], capture_output=True, text=True
+                )
+                if result.returncode != 0:
+                    raise RuntimeError(
+                        f"Failed to pull model {m}: stdout: {result.stdout}, stderr: {result.stderr}"
+                    )
+                print(f"[@ollama {m}] Model downloaded successfully.")
+            except Exception as e:
+                raise RuntimeError(f"Error pulling Ollama model {m}: {e}") from e
+        else:
+            # We have a cached manifest, try to fetch the blobs
+            try:
+                self._fetch_blobs(m)
+                print(f"[@ollama {m}] Using cached model.")
+                # Register the cached model with Ollama
+                if not self._verify_model_available(m):
+                    if not self._register_cached_model_with_ollama(m):
+                        raise RuntimeError(
+                            f"Failed to register cached model {m} with Ollama"
+                        )
+                # self.skip_push_check = True
+            except (EmptyOllamaBlobCacheException, Exception) as e:
+                if self.debug:
+                    print(f"[@ollama {m}] Cache failed, downloading model...")
+                    print(f"[@ollama {m}] Error: {e}")
+                # Fallback to pulling the model
+                try:
+                    result = subprocess.run(
+                        ["ollama", "pull", m], capture_output=True, text=True
+                    )
+                    if result.returncode != 0:
+                        raise RuntimeError(
+                            f"Failed to pull model {m}: stdout: {result.stdout}, stderr: {result.stderr}"
+                        )
+                    print(f"[@ollama {m}] Model downloaded successfully (fallback).")
+                except Exception as pull_e:
+                    raise RuntimeError(
+                        f"Error pulling Ollama model {m} as fallback: {pull_e}"
+                    ) from pull_e
+        # Final verification that the model is available
+        if not self._verify_model_available(m):
+            raise RuntimeError(f"Model {m} is not available to Ollama after setup")
+        if self.debug:
+            print(f"[@ollama {m}] Model setup complete and verified.")
     def _run_model(self, m):
         """
@@ -204,7 +599,8 @@ class OllamaManager:
         process = None
         try:
             if self.debug:
-                print("[@ollama] Running model: %s" % m)
+                print(f"[@ollama {m}] Starting model process...")
             process = subprocess.Popen(
                 ["ollama", "run", m],
                 stdout=subprocess.PIPE,
@@ -216,8 +612,9 @@ class OllamaManager:
                 "properties": {"type": "model", "model": m, "error_details": None},
                 "status": ProcessStatus.RUNNING,
             }
             if self.debug:
-                print("[@ollama] Stored process %s for model %s." % (process.pid, m))
+                print(f"[@ollama {m}] Model process PID: {process.pid}.")
             try:
                 process.wait(timeout=1)
@@ -231,8 +628,7 @@ class OllamaManager:
                     self.processes[process.pid]["status"] = ProcessStatus.SUCCESSFUL
                     if self.debug:
                         print(
-                            "[@ollama] Process %s for model %s exited successfully."
-                            % (process.pid, m)
+                            f"[@ollama {m}] Process {process.pid} exited successfully."
                         )
                 else:
                     error_details = f"Return code: {returncode}, Error: {stderr}"
@@ -242,8 +638,7 @@ class OllamaManager:
                     self.processes[process.pid]["status"] = ProcessStatus.FAILED
                     if self.debug:
                         print(
-                            "[@ollama] Process %s for model %s failed: %s"
-                            % (process.pid, m, error_details)
+                            f"[@ollama {m}] Process {process.pid} failed: {error_details}."
                         )
         except Exception as e:
             if process and process.pid in self.processes:
@@ -251,20 +646,25 @@ class OllamaManager:
                 self.processes[process.pid]["properties"]["error_details"] = str(e)
             raise RuntimeError(f"Error running Ollama model {m}: {e}") from e
-    def terminate_models(self):
+    def terminate_models(self, skip_push_check=None):
         """
-        Terminate all processes gracefully.
-        First, stop model processes using 'ollama stop <model>'.
-        Then, shut down the API server process.
+        Terminate all processes gracefully and update cache.
         """
+        print("[@ollama] Shutting down models...")
+        if skip_push_check is not None:
+            assert isinstance(
+                skip_push_check, bool
+            ), "skip_push_check passed to terminate_models must be a bool if specified."
+            self.skip_push_check = skip_push_check
         for pid, process_info in list(self.processes.items()):
             if process_info["properties"].get("type") == "model":
                 model_name = process_info["properties"].get("model")
                 if self.debug:
-                    print(
-                        "[@ollama] Stopping model %s using 'ollama stop'" % model_name
-                    )
+                    print(f"[@ollama {model_name}] Stopping model process...")
                 try:
                     result = subprocess.run(
                         ["ollama", "stop", model_name], capture_output=True, text=True
@@ -272,28 +672,27 @@ class OllamaManager:
                     if result.returncode == 0:
                         process_info["status"] = ProcessStatus.SUCCESSFUL
                         if self.debug:
-                            print(
-                                "[@ollama] Model %s stopped successfully." % model_name
-                            )
+                            print(f"[@ollama {model_name}] Stopped successfully.")
                     else:
                         process_info["status"] = ProcessStatus.FAILED
                         if self.debug:
                             print(
-                                "[@ollama] Model %s failed to stop gracefully. Return code: %s, Error: %s"
-                                % (model_name, result.returncode, result.stderr)
+                                f"[@ollama {model_name}] Stop failed: {result.stderr}"
                             )
                 except Exception as e:
                     process_info["status"] = ProcessStatus.FAILED
-                    print("[@ollama] Error stopping model %s: %s" % (model_name, e))
+                    print(f"[@ollama {model_name}] Error stopping: {e}")
-        # Then, stop the API server
+                # Update cache if needed
+                if not self.skip_push_check:
+                    self._update_model_cache(model_name)
+        # Stop the API server
         for pid, process_info in list(self.processes.items()):
             if process_info["properties"].get("type") == "api-server":
                 if self.debug:
-                    print(
-                        "[@ollama] Stopping API server process with PID %s using process.terminate()"
-                        % pid
-                    )
+                    print(f"[@ollama] Stopping API server process PID {pid}.")
                 process = process_info["p"]
                 try:
                     process.terminate()
@@ -301,28 +700,114 @@ class OllamaManager:
                         process.wait(timeout=5)
                     except subprocess.TimeoutExpired:
                         print(
-                            "[@ollama] API server process %s did not terminate in time; killing it."
-                            % pid
+                            f"[@ollama] API server PID {pid} did not terminate, killing..."
                         )
                         process.kill()
                         process.wait()
-                    returncode = process.poll()
-                    if returncode is None or returncode != 0:
-                        process_info["status"] = ProcessStatus.FAILED
-                        print(
-                            "[@ollama] API server process %s terminated with error code %s."
-                            % (pid, returncode)
-                        )
-                    else:
-                        process_info["status"] = ProcessStatus.SUCCESSFUL
-                        if self.debug:
-                            print(
-                                "[@ollama] API server process %s terminated successfully."
-                                % pid
-                            )
+                    process_info["status"] = ProcessStatus.SUCCESSFUL
+                    if self.debug:
+                        print(f"[@ollama] API server terminated successfully.")
                 except Exception as e:
                     process_info["status"] = ProcessStatus.FAILED
+                    print(f"[@ollama] Warning: Error terminating API server: {e}")
+        print("[@ollama] All models stopped.")
+        # Show performance summary
+        if self.debug:
+            if hasattr(self, "stats") and self.stats:
+                print("[@ollama] Performance summary:")
+                for operation, stats in self.stats.items():
+                    runtime = stats.get("process_runtime", 0)
+                    if runtime > 1:  # Only show operations that took meaningful time
+                        print(f"[@ollama]   {operation}: {runtime:.1f}s")
+    def _update_model_cache(self, model_name):
+        """
+        Update the remote cache with model files if needed.
+        """
+        try:
+            manifest = self._fetch_manifest(model_name)
+            if not manifest:
+                if self.debug:
                     print(
-                        "[@ollama] Warning: Error while terminating API server process %s: %s"
-                        % (pid, e)
+                        f"[@ollama {model_name}] No manifest available for cache update."
                     )
+                return
+            from metaflow import S3
+            cache_up_to_date = True
+            key_paths = [
+                (
+                    self.storage_info[model_name]["manifest_remote"],
+                    self.storage_info[model_name]["manifest_local"],
+                )
+            ]
+            with S3() as s3:
+                # Check if blobs need updating
+                s3objs = s3.list_paths(
+                    [self.storage_info[model_name]["blob_remote_root"]]
+                )
+                for layer in manifest["layers"]:
+                    expected_blob_sha = layer["digest"]
+                    if expected_blob_sha not in s3objs:
+                        cache_up_to_date = False
+                        break
+                if not cache_up_to_date:
+                    blob_count = len(manifest.get("layers", []))
+                    print(
+                        f"[@ollama {model_name}] Uploading {blob_count} files to cache..."
+                    )
+                    # Add blob paths to upload
+                    for layer in manifest["layers"]:
+                        blob_filename = layer["digest"].replace(":", "-")
+                        key_paths.append(
+                            (
+                                os.path.join(
+                                    self.storage_info[model_name]["blob_remote_root"],
+                                    blob_filename,
+                                ),
+                                os.path.join(
+                                    self.storage_info[model_name]["blob_local_root"],
+                                    blob_filename,
+                                ),
+                            )
+                        )
+                    s3.put_files(key_paths)
+                    print(f"[@ollama {model_name}] Cache updated.")
+                else:
+                    if self.debug:
+                        print(f"[@ollama {model_name}] Cache is up to date.")
+        except Exception as e:
+            if self.debug:
+                print(f"[@ollama {model_name}] Error updating cache: {e}")
+    def get_ollama_storage_root(self, backend):
+        """
+        Return the path to the root of the datastore.
+        """
+        if backend.TYPE == "s3":
+            from metaflow.metaflow_config import DATASTORE_SYSROOT_S3
+            self.local_datastore = False
+            return os.path.join(DATASTORE_SYSROOT_S3, OLLAMA_SUFFIX)
+        elif backend.TYPE == "azure":
+            from metaflow.metaflow_config import DATASTORE_SYSROOT_AZURE
+            self.local_datastore = False
+            return os.path.join(DATASTORE_SYSROOT_AZURE, OLLAMA_SUFFIX)
+        elif backend.TYPE == "gs":
+            from metaflow.metaflow_config import DATASTORE_SYSROOT_GS
+            self.local_datastore = False
+            return os.path.join(DATASTORE_SYSROOT_GS, OLLAMA_SUFFIX)
+        else:
+            self.local_datastore = True
+            return None

{ob_metaflow_extensions-1.1.156.dist-info → ob_metaflow_extensions-1.1.158.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ob-metaflow-extensions
-Version: 1.1.156
+Version: 1.1.158
 Summary: Outerbounds Platform Extensions for Metaflow
 Author: Outerbounds, Inc.
 License: Commercial

{ob_metaflow_extensions-1.1.156.dist-info → ob_metaflow_extensions-1.1.158.dist-info}/RECORD RENAMED Viewed

@@ -25,7 +25,7 @@ metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8
 metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py,sha256=fx_XUkgR4r6hF2ilDfT5LubRyVrYMVIv5f6clHkCaEk,5988
 metaflow_extensions/outerbounds/plugins/nim/card.py,sha256=dXOJvsZed5NyYyxYLPDvtwg9z_X4azL9HTJGYaiNriY,4690
 metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py,sha256=50YVvC7mcZYlPluM0Wq1UtufhzlQb-RxzZkTOJJ3LkM,3439
-metaflow_extensions/outerbounds/plugins/nim/nim_manager.py,sha256=5YkohM-vfoDHPUMWb19sY0HErORoKOKf4jexERJTO80,10912
+metaflow_extensions/outerbounds/plugins/nim/nim_manager.py,sha256=y8U71106KJtrC6nlhsNnzX9Xkv3RnyZ1KEpRFwqZZFk,13686
 metaflow_extensions/outerbounds/plugins/nim/utils.py,sha256=nU-v1sheBjmITXfHiJx2ucm_Tq_nGb5BcuAm5c235cQ,1164
 metaflow_extensions/outerbounds/plugins/nvcf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 metaflow_extensions/outerbounds/plugins/nvcf/constants.py,sha256=aGHdNw_hqBu8i0zWXcatQM6e769wUXox0l8g0f6fNZ8,146
@@ -42,8 +42,10 @@ metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py,sha256=bB9AURhRep9PV_-b
 metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py,sha256=LaJ_Tk-vNjvrglzSTR-U6pk8f9MtQRKObU9m7vBYtkI,8695
 metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py,sha256=8IPkdvuTZNIqgAAt75gVNn-ydr-Zz2sKC8UX_6pNEKI,7091
 metaflow_extensions/outerbounds/plugins/nvct/utils.py,sha256=U4_Fu8H94j_Bbox7mmMhNnlRhlYHqnK28R5w_TMWEFM,1029
-metaflow_extensions/outerbounds/plugins/ollama/__init__.py,sha256=HEsI5U4ckQby7K2NsGBOdizhPY3WWqXSnXx_IHL7_No,2307
-metaflow_extensions/outerbounds/plugins/ollama/ollama.py,sha256=KlP8_EmnUoi8-PidyU0IDuENYxKjQaHFC33yGsvaeic,13320
+metaflow_extensions/outerbounds/plugins/ollama/__init__.py,sha256=vzh8sQEfwKRdx0fsGFJ-km4mwfi0vm2q1_vsZv-EMcc,3034
+metaflow_extensions/outerbounds/plugins/ollama/constants.py,sha256=hxkTpWEJp1pKHwUcG4EE3-17M6x2CyeMfbeqgUzF9TA,28
+metaflow_extensions/outerbounds/plugins/ollama/exceptions.py,sha256=8Ss296_MGZl1wXAoDNwpH-hsPe6iYLe90Ji1pczNocU,668
+metaflow_extensions/outerbounds/plugins/ollama/ollama.py,sha256=oe-k1ISSMtUF2y3YpfmJhU_3yR7SP31PVilN5NPgKv0,31450
 metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py,sha256=oI_C3c64XBm7n88FILqHwn-Nnc5DeT_68I67lM9rXaI,2434
 metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py,sha256=gDHQ2sMIp4NuZSzUspbSd8RGdFAoO5mgZAyFcZ2a51Y,2619
 metaflow_extensions/outerbounds/plugins/secrets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,7 +70,7 @@ metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py,sha256=BbZiaH3u
 metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
 metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py,sha256=GRSz2zwqkvlmFS6bcfYD_CX6CMko9DHQokMaH1iBshA,47
 metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
-ob_metaflow_extensions-1.1.156.dist-info/METADATA,sha256=G9c19j9g0v8dDQU5sP5Zaaub2fot__EMCJ6iBQBb4Qo,521
-ob_metaflow_extensions-1.1.156.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
-ob_metaflow_extensions-1.1.156.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
-ob_metaflow_extensions-1.1.156.dist-info/RECORD,,
+ob_metaflow_extensions-1.1.158.dist-info/METADATA,sha256=0t_P8-Uhi3I39xyeSGv2BpRQO5Upe1eIjs04e6Stjd8,521
+ob_metaflow_extensions-1.1.158.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
+ob_metaflow_extensions-1.1.158.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
+ob_metaflow_extensions-1.1.158.dist-info/RECORD,,

{ob_metaflow_extensions-1.1.156.dist-info → ob_metaflow_extensions-1.1.158.dist-info}/WHEEL RENAMED Viewed

File without changes

{ob_metaflow_extensions-1.1.156.dist-info → ob_metaflow_extensions-1.1.158.dist-info}/top_level.txt RENAMED Viewed

File without changes

ob-metaflow-extensions 1.1.156__py2.py3-none-any.whl → 1.1.158__py2.py3-none-any.whl

Potentially problematic release.

ob-metaflow-extensions 1.1.156py2.py3-none-any.whl → 1.1.158py2.py3-none-any.whl