PyPI - orca-sdk - Versions diffs - 0.0.96__py3-none-any.whl → 0.0.97__py3-none-any.whl - Mend

orca-sdk 0.0.96py3-none-any.whl → 0.0.97py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

orca_sdk/datasource.py CHANGED Viewed

@@ -4,32 +4,105 @@ import logging
 import tempfile
 import zipfile
 from datetime import datetime
+from io import BytesIO
 from os import PathLike
 from pathlib import Path
-from typing import cast
+from typing import Union
 import pandas as pd
 import pyarrow as pa
-from datasets import Dataset
+from datasets import Dataset, DatasetDict
+from pyarrow import parquet
 from torch.utils.data import DataLoader as TorchDataLoader
 from torch.utils.data import Dataset as TorchDataset
 from tqdm.auto import tqdm
 from ._generated_api_client.api import (
+    create_datasource_from_content,
     delete_datasource,
     get_datasource,
     list_datasources,
 )
-from ._generated_api_client.api.datasource.create_datasource_datasource_post import (
+from ._generated_api_client.api.datasource.create_datasource_from_files_datasource_upload_post import (
     _parse_response as parse_create_response,
 )
 from ._generated_api_client.client import get_client
-from ._generated_api_client.models import ColumnType, DatasourceMetadata
+from ._generated_api_client.models import (
+    ColumnType,
+    CreateDatasourceFromContentRequest,
+    DatasourceMetadata,
+)
 from ._utils.common import CreateMode, DropMode
-from ._utils.data_parsing import hf_dataset_from_disk, hf_dataset_from_torch
+from ._utils.data_parsing import hf_dataset_from_torch
 from ._utils.tqdm_file_reader import TqdmFileReader
+def _upload_files_to_datasource(
+    name: str,
+    file_paths: list[Path],
+    description: str | None = None,
+) -> DatasourceMetadata:
+    """
+    Helper function to upload files to create a datasource using manual HTTP requests.
+    This bypasses the generated client because it doesn't handle file uploads properly.
+    Params:
+        name: Name for the datasource
+        file_paths: List of file paths to upload
+        description: Optional description for the datasource
+    Returns:
+        Metadata for the created datasource
+    """
+    client = get_client()
+    files = []
+    # Calculate total size for all files
+    total_size = sum(file_path.stat().st_size for file_path in file_paths)
+    with tqdm(total=total_size, unit="B", unit_scale=True, desc="Uploading") as pbar:
+        for file_path in file_paths:
+            buffered_reader = open(file_path, "rb")
+            tqdm_reader = TqdmFileReader(buffered_reader, pbar)
+            files.append(("files", (file_path.name, tqdm_reader)))
+        # Use manual HTTP request for file uploads
+        metadata = parse_create_response(
+            response=client.get_httpx_client().request(
+                method="post",
+                url="/datasource/upload",
+                files=files,
+                data={"name": name, "description": description},
+            )
+        )
+    return metadata
+def _handle_existing_datasource(name: str, if_exists: CreateMode) -> Union["Datasource", None]:
+    """
+    Helper function to handle the common pattern of checking if a datasource exists
+    and taking action based on the if_exists parameter.
+    Params:
+        name: Name of the datasource to check
+        if_exists: What to do if a datasource with the same name already exists
+    Returns:
+        Datasource instance if opening existing, None if should proceed with creation
+    Raises:
+        ValueError: If the datasource already exists and if_exists is "error"
+    """
+    if Datasource.exists(name):
+        if if_exists == "error":
+            raise ValueError(f"Dataset with name {name} already exists")
+        elif if_exists == "open":
+            return Datasource.open(name)
+    return None
 class Datasource:
     """
     A Handle to a datasource in the OrcaCloud
@@ -138,40 +211,54 @@ class Datasource:
         Raises:
             ValueError: If the datasource already exists and if_exists is `"error"`
         """
-        client = get_client()
-        if cls.exists(name):
-            if if_exists == "error":
-                raise ValueError(f"Dataset with name {name} already exists")
-            elif if_exists == "open":
-                return cls.open(name)
+        # Check if datasource already exists and handle accordingly
+        existing = _handle_existing_datasource(name, if_exists)
+        if existing is not None:
+            return existing
         with tempfile.TemporaryDirectory() as tmp_dir:
             dataset.save_to_disk(tmp_dir)
-            files = []
-            # Calculate total size for all files
+            # Get all file paths in the directory
             file_paths = list(Path(tmp_dir).iterdir())
-            total_size = sum(file_path.stat().st_size for file_path in file_paths)
-            with tqdm(total=total_size, unit="B", unit_scale=True, desc="Uploading") as pbar:
-                for file_path in file_paths:
-                    buffered_reader = open(file_path, "rb")
-                    tqdm_reader = TqdmFileReader(buffered_reader, pbar)
-                    files.append(("files", (file_path.name, tqdm_reader)))
-                # Do not use Generated client for this endpoint b/c it does not handle files properly
-                metadata = parse_create_response(
-                    response=client.get_httpx_client().request(
-                        method="post",
-                        url="/datasource/",
-                        files=files,
-                        data={"name": name, "description": description},
-                    )
-                )
+            # Use the helper function to upload files
+            metadata = _upload_files_to_datasource(name, file_paths, description)
             return cls(metadata=metadata)
+    @classmethod
+    def from_hf_dataset_dict(
+        cls,
+        name: str,
+        dataset_dict: DatasetDict,
+        if_exists: CreateMode = "error",
+        description: dict[str, str | None] | str | None = None,
+    ) -> dict[str, Datasource]:
+        """
+        Create datasources from a Hugging Face DatasetDict
+        Params:
+            name: Name prefix for the new datasources, will be suffixed with the dataset name
+            dataset_dict: The Hugging Face DatasetDict to create the datasources from
+            if_exists: What to do if a datasource with the same name already exists, defaults to
+                `"error"`. Other option is `"open"` to open the existing datasource.
+            description: Optional description for the datasources, can be a string or a dictionary of dataset names to descriptions
+        Returns:
+            A dictionary of datasource handles, keyed by the dataset name
+        Raises:
+            ValueError: If a datasource already exists and if_exists is `"error"`
+        """
+        if description is None or isinstance(description, str):
+            description = {dataset_name: description for dataset_name in dataset_dict.keys()}
+        return {
+            dataset_name: cls.from_hf_dataset(
+                f"{name}_{dataset_name}", dataset, if_exists=if_exists, description=description[dataset_name]
+            )
+            for dataset_name, dataset in dataset_dict.items()
+        }
     @classmethod
     def from_pytorch(
         cls,
@@ -225,8 +312,16 @@ class Datasource:
         Examples:
             >>> Datasource.from_list("my_datasource", [{"text": "Hello, world!", "label": 1}, {"text": "Goodbye", "label": 0}])
         """
-        hf_dataset = Dataset.from_list(data)
-        return cls.from_hf_dataset(name, hf_dataset, if_exists=if_exists, description=description)
+        # Check if datasource already exists and handle accordingly
+        existing = _handle_existing_datasource(name, if_exists)
+        if existing is not None:
+            return existing
+        # Use the generated API client function for content creation
+        body = CreateDatasourceFromContentRequest(name=name, description=description, content=data)
+        metadata = create_datasource_from_content(body=body)
+        return cls(metadata=metadata)
     @classmethod
     def from_dict(
@@ -251,8 +346,16 @@ class Datasource:
         Examples:
             >>> Datasource.from_dict("my_datasource", {"text": ["Hello, world!", "Goodbye"], "label": [1, 0]})
         """
-        hf_dataset = Dataset.from_dict(data)
-        return cls.from_hf_dataset(name, hf_dataset, if_exists=if_exists, description=description)
+        # Check if datasource already exists and handle accordingly
+        existing = _handle_existing_datasource(name, if_exists)
+        if existing is not None:
+            return existing
+        # Use the generated API client function for content creation
+        body = CreateDatasourceFromContentRequest(name=name, description=description, content=data)
+        metadata = create_datasource_from_content(body=body)
+        return cls(metadata=metadata)
     @classmethod
     def from_pandas(
@@ -274,8 +377,8 @@ class Datasource:
         Raises:
             ValueError: If the datasource already exists and if_exists is `"error"`
         """
-        hf_dataset = Dataset.from_pandas(dataframe)
-        return cls.from_hf_dataset(name, hf_dataset, if_exists=if_exists, description=description)
+        dataset = Dataset.from_pandas(dataframe)
+        return cls.from_hf_dataset(name, dataset, if_exists=if_exists, description=description)
     @classmethod
     def from_arrow(
@@ -297,8 +400,29 @@ class Datasource:
         Raises:
             ValueError: If the datasource already exists and if_exists is `"error"`
         """
-        hf_dataset = Dataset(pyarrow_table)
-        return cls.from_hf_dataset(name, hf_dataset, if_exists=if_exists, description=description)
+        # Check if datasource already exists and handle accordingly
+        existing = _handle_existing_datasource(name, if_exists)
+        if existing is not None:
+            return existing
+        # Write to bytes buffer
+        buffer = BytesIO()
+        parquet.write_table(pyarrow_table, buffer)
+        parquet_bytes = buffer.getvalue()
+        client = get_client()
+        # Use manual HTTP request for file uploads
+        metadata = parse_create_response(
+            response=client.get_httpx_client().request(
+                method="post",
+                url="/datasource/upload",
+                files=[("files", ("data.parquet", parquet_bytes))],
+                data={"name": name, "description": description},
+            )
+        )
+        return cls(metadata=metadata)
     @classmethod
     def from_disk(
@@ -328,8 +452,23 @@ class Datasource:
         Raises:
             ValueError: If the datasource already exists and if_exists is `"error"`
         """
-        hf_dataset = hf_dataset_from_disk(file_path)
-        return cls.from_hf_dataset(name, cast(Dataset, hf_dataset), if_exists=if_exists, description=description)
+        # Check if datasource already exists and handle accordingly
+        existing = _handle_existing_datasource(name, if_exists)
+        if existing is not None:
+            return existing
+        file_path = Path(file_path)
+        # For dataset directories, use the upload endpoint with multiple files
+        if file_path.is_dir():
+            return cls.from_hf_dataset(
+                name, Dataset.load_from_disk(file_path), if_exists=if_exists, description=description
+            )
+        # For single files, use the helper function to upload files
+        metadata = _upload_files_to_datasource(name, [file_path], description)
+        return cls(metadata=metadata)
     @classmethod
     def open(cls, name: str) -> Datasource:

orca_sdk/datasource_test.py CHANGED Viewed

@@ -2,6 +2,8 @@ import os
 import tempfile
 from uuid import uuid4
+import pandas as pd
+import pyarrow as pa
 import pytest
 from .datasource import Datasource
@@ -102,3 +104,195 @@ def test_download_datasource(datasource):
         output_path = os.path.join(temp_dir, "datasource.zip")
         datasource.download(output_path)
         assert os.path.exists(output_path)
+def test_from_list():
+    # Test creating datasource from list of dictionaries
+    data = [
+        {"column1": 1, "column2": "a"},
+        {"column1": 2, "column2": "b"},
+        {"column1": 3, "column2": "c"},
+    ]
+    datasource = Datasource.from_list(f"test_list_{uuid4()}", data)
+    assert datasource.name.startswith("test_list_")
+    assert datasource.length == 3
+    assert "column1" in datasource.columns
+    assert "column2" in datasource.columns
+def test_from_dict():
+    # Test creating datasource from dictionary of columns
+    data = {
+        "column1": [1, 2, 3],
+        "column2": ["a", "b", "c"],
+    }
+    datasource = Datasource.from_dict(f"test_dict_{uuid4()}", data)
+    assert datasource.name.startswith("test_dict_")
+    assert datasource.length == 3
+    assert "column1" in datasource.columns
+    assert "column2" in datasource.columns
+def test_from_pandas():
+    # Test creating datasource from pandas DataFrame
+    df = pd.DataFrame(
+        {
+            "column1": [1, 2, 3],
+            "column2": ["a", "b", "c"],
+        }
+    )
+    datasource = Datasource.from_pandas(f"test_pandas_{uuid4()}", df)
+    assert datasource.name.startswith("test_pandas_")
+    assert datasource.length == 3
+    assert "column1" in datasource.columns
+    assert "column2" in datasource.columns
+def test_from_arrow():
+    # Test creating datasource from pyarrow Table
+    table = pa.table(
+        {
+            "column1": [1, 2, 3],
+            "column2": ["a", "b", "c"],
+        }
+    )
+    datasource = Datasource.from_arrow(f"test_arrow_{uuid4()}", table)
+    assert datasource.name.startswith("test_arrow_")
+    assert datasource.length == 3
+    assert "column1" in datasource.columns
+    assert "column2" in datasource.columns
+def test_from_list_already_exists():
+    # Test the if_exists parameter with from_list
+    data = [{"column1": 1, "column2": "a"}]
+    name = f"test_list_exists_{uuid4()}"
+    # Create the first datasource
+    datasource1 = Datasource.from_list(name, data)
+    assert datasource1.length == 1
+    # Try to create again with if_exists="error" (should raise)
+    with pytest.raises(ValueError):
+        Datasource.from_list(name, data, if_exists="error")
+    # Try to create again with if_exists="open" (should return existing)
+    datasource2 = Datasource.from_list(name, data, if_exists="open")
+    assert datasource2.id == datasource1.id
+    assert datasource2.name == datasource1.name
+def test_from_dict_already_exists():
+    # Test the if_exists parameter with from_dict
+    data = {"column1": [1], "column2": ["a"]}
+    name = f"test_dict_exists_{uuid4()}"
+    # Create the first datasource
+    datasource1 = Datasource.from_dict(name, data)
+    assert datasource1.length == 1
+    # Try to create again with if_exists="error" (should raise)
+    with pytest.raises(ValueError):
+        Datasource.from_dict(name, data, if_exists="error")
+    # Try to create again with if_exists="open" (should return existing)
+    datasource2 = Datasource.from_dict(name, data, if_exists="open")
+    assert datasource2.id == datasource1.id
+    assert datasource2.name == datasource1.name
+def test_from_pandas_already_exists():
+    # Test the if_exists parameter with from_pandas
+    df = pd.DataFrame({"column1": [1], "column2": ["a"]})
+    name = f"test_pandas_exists_{uuid4()}"
+    # Create the first datasource
+    datasource1 = Datasource.from_pandas(name, df)
+    assert datasource1.length == 1
+    # Try to create again with if_exists="error" (should raise)
+    with pytest.raises(ValueError):
+        Datasource.from_pandas(name, df, if_exists="error")
+    # Try to create again with if_exists="open" (should return existing)
+    datasource2 = Datasource.from_pandas(name, df, if_exists="open")
+    assert datasource2.id == datasource1.id
+    assert datasource2.name == datasource1.name
+def test_from_arrow_already_exists():
+    # Test the if_exists parameter with from_arrow
+    table = pa.table({"column1": [1], "column2": ["a"]})
+    name = f"test_arrow_exists_{uuid4()}"
+    # Create the first datasource
+    datasource1 = Datasource.from_arrow(name, table)
+    assert datasource1.length == 1
+    # Try to create again with if_exists="error" (should raise)
+    with pytest.raises(ValueError):
+        Datasource.from_arrow(name, table, if_exists="error")
+    # Try to create again with if_exists="open" (should return existing)
+    datasource2 = Datasource.from_arrow(name, table, if_exists="open")
+    assert datasource2.id == datasource1.id
+    assert datasource2.name == datasource1.name
+def test_from_disk_csv():
+    # Test creating datasource from CSV file
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
+        f.write("column1,column2\n1,a\n2,b\n3,c")
+        f.flush()
+        try:
+            datasource = Datasource.from_disk(f"test_csv_{uuid4()}", f.name)
+            assert datasource.length == 3
+            assert "column1" in datasource.columns
+            assert "column2" in datasource.columns
+        finally:
+            os.unlink(f.name)
+def test_from_disk_json():
+    # Test creating datasource from JSON file
+    import json
+    data = [{"column1": 1, "column2": "a"}, {"column1": 2, "column2": "b"}]
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+        json.dump(data, f)
+        f.flush()
+        try:
+            datasource = Datasource.from_disk(f"test_json_{uuid4()}", f.name)
+            assert datasource.length == 2
+            assert "column1" in datasource.columns
+            assert "column2" in datasource.columns
+        finally:
+            os.unlink(f.name)
+def test_from_disk_already_exists():
+    # Test the if_exists parameter with from_disk
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
+        f.write("column1,column2\n1,a")
+        f.flush()
+        try:
+            name = f"test_disk_exists_{uuid4()}"
+            # Create the first datasource
+            datasource1 = Datasource.from_disk(name, f.name)
+            assert datasource1.length == 1
+            # Try to create again with if_exists="error" (should raise)
+            with pytest.raises(ValueError):
+                Datasource.from_disk(name, f.name, if_exists="error")
+            # Try to create again with if_exists="open" (should return existing)
+            datasource2 = Datasource.from_disk(name, f.name, if_exists="open")
+            assert datasource2.id == datasource1.id
+            assert datasource2.name == datasource1.name
+        finally:
+            os.unlink(f.name)

orca_sdk/embedding_model.py CHANGED Viewed

@@ -23,7 +23,7 @@ from ._generated_api_client.models import (
     PretrainedEmbeddingModelMetadata,
     PretrainedEmbeddingModelName,
 )
-from ._utils.common import CreateMode, DropMode
+from ._utils.common import UNSET, CreateMode, DropMode
 from .datasource import Datasource
 from .job import Job, Status
@@ -36,40 +36,58 @@ class _EmbeddingModel:
     embedding_dim: int
     max_seq_length: int
     uses_context: bool
+    supports_instructions: bool
-    def __init__(self, *, name: str, embedding_dim: int, max_seq_length: int, uses_context: bool):
+    def __init__(
+        self, *, name: str, embedding_dim: int, max_seq_length: int, uses_context: bool, supports_instructions: bool
+    ):
         self.name = name
         self.embedding_dim = embedding_dim
         self.max_seq_length = max_seq_length
         self.uses_context = uses_context
+        self.supports_instructions = supports_instructions
     @classmethod
     @abstractmethod
     def all(cls) -> Sequence[_EmbeddingModel]:
         pass
+    def _get_instruction_error_message(self) -> str:
+        """Get error message for instruction not supported"""
+        if isinstance(self, FinetunedEmbeddingModel):
+            return f"Model {self.name} does not support instructions. Instruction-following is only supported by models based on instruction-supporting models."
+        else:
+            return f"Model {self.name} does not support instructions. Instruction-following is only supported by instruction-supporting models."
     @overload
-    def embed(self, value: str, max_seq_length: int | None = None) -> list[float]:
+    def embed(self, value: str, max_seq_length: int | None = None, prompt: str | None = None) -> list[float]:
         pass
     @overload
-    def embed(self, value: list[str], max_seq_length: int | None = None) -> list[list[float]]:
+    def embed(
+        self, value: list[str], max_seq_length: int | None = None, prompt: str | None = None
+    ) -> list[list[float]]:
         pass
-    def embed(self, value: str | list[str], max_seq_length: int | None = None) -> list[float] | list[list[float]]:
+    def embed(
+        self, value: str | list[str], max_seq_length: int | None = None, prompt: str | None = None
+    ) -> list[float] | list[list[float]]:
         """
         Generate embeddings for a value or list of values
         Params:
             value: The value or list of values to embed
             max_seq_length: The maximum sequence length to truncate the input to
+            prompt: Optional prompt for prompt-following embedding models.
         Returns:
             A matrix of floats representing the embedding for each value if the input is a list of
                 values, or a list of floats representing the embedding for the single value if the
                 input is a single value
         """
-        request = EmbedRequest(values=value if isinstance(value, list) else [value], max_seq_length=max_seq_length)
+        request = EmbedRequest(
+            values=value if isinstance(value, list) else [value], max_seq_length=max_seq_length, prompt=prompt
+        )
         if isinstance(self, PretrainedEmbeddingModel):
             embeddings = embed_with_pretrained_model_gpu(self._model_name, body=request)
         elif isinstance(self, FinetunedEmbeddingModel):
@@ -152,17 +170,27 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
     - **`GIST_LARGE`**: GIST-Large embedding model from Hugging Face ([avsolatorio/GIST-large-Embedding-v0](https://huggingface.co/avsolatorio/GIST-large-Embedding-v0))
     - **`MXBAI_LARGE`**: Mixbreas's Large embedding model from Hugging Face ([mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1))
     - **`QWEN2_1_5B`**: Alibaba's Qwen2-1.5B instruction-tuned embedding model from Hugging Face ([Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct))
+    - **`BGE_BASE`**: BAAI's BGE-Base instruction-tuned embedding model from Hugging Face ([BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5))
+    **Instruction Support:**
+    Some models support instruction-following for better task-specific embeddings. You can check if a model supports instructions
+    using the `supports_instructions` attribute.
     Examples:
         >>> PretrainedEmbeddingModel.CDE_SMALL
         PretrainedEmbeddingModel({name: CDE_SMALL, embedding_dim: 768, max_seq_length: 512})
+        >>> # Using instruction with an instruction-supporting model
+        >>> model = PretrainedEmbeddingModel.E5_LARGE
+        >>> embeddings = model.embed("Hello world", prompt="Represent this sentence for retrieval:")
     Attributes:
         name: Name of the pretrained embedding model
         embedding_dim: Dimension of the embeddings that are generated by the model
         max_seq_length: Maximum input length (in tokens not characters) that this model can process. Inputs that are longer will be truncated during the embedding process
         uses_context: Whether the pretrained embedding model uses context
+        supports_instructions: Whether this model supports instruction-following
     """
     # Define descriptors for model access with IDE autocomplete
@@ -175,17 +203,22 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
     GIST_LARGE = _ModelDescriptor("GIST_LARGE")
     MXBAI_LARGE = _ModelDescriptor("MXBAI_LARGE")
     QWEN2_1_5B = _ModelDescriptor("QWEN2_1_5B")
+    BGE_BASE = _ModelDescriptor("BGE_BASE")
     _model_name: PretrainedEmbeddingModelName
     def __init__(self, metadata: PretrainedEmbeddingModelMetadata):
         # for internal use only, do not document
         self._model_name = metadata.name
         super().__init__(
             name=metadata.name.value,
             embedding_dim=metadata.embedding_dim,
             max_seq_length=metadata.max_seq_length,
             uses_context=metadata.uses_context,
+            supports_instructions=(
+                bool(metadata.supports_instructions) if metadata.supports_instructions is not UNSET else False
+            ),
         )
     def __eq__(self, other) -> bool:
@@ -209,9 +242,11 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
     @classmethod
     def _get(cls, name: PretrainedEmbeddingModelName | str) -> PretrainedEmbeddingModel:
         # for internal use only, do not document - we want people to use dot notation to get the model
-        if str(name) not in cls._instances:
-            cls._instances[str(name)] = cls(get_pretrained_embedding_model(cast(PretrainedEmbeddingModelName, name)))
-        return cls._instances[str(name)]
+        cache_key = str(name)
+        if cache_key not in cls._instances:
+            metadata = get_pretrained_embedding_model(cast(PretrainedEmbeddingModelName, name))
+            cls._instances[cache_key] = cls(metadata)
+        return cls._instances[cache_key]
     @classmethod
     def open(cls, name: str) -> PretrainedEmbeddingModel:
@@ -231,9 +266,9 @@ class PretrainedEmbeddingModel(_EmbeddingModel):
             >>> model = PretrainedEmbeddingModel.open("GTE_BASE")
         """
         try:
-            # Use getattr to access the descriptor which will initialize the model
-            return getattr(cls, name)
-        except AttributeError:
+            # Always use the _get method which handles caching properly
+            return cls._get(name)
+        except (KeyError, AttributeError):
             raise ValueError(f"Unknown model name: {name}")
     @classmethod
@@ -385,11 +420,13 @@ class FinetunedEmbeddingModel(_EmbeddingModel):
         self.updated_at = metadata.updated_at
         self.base_model_name = metadata.base_model
         self._status = Status(metadata.finetuning_status.value)
         super().__init__(
             name=metadata.name,
             embedding_dim=metadata.embedding_dim,
             max_seq_length=metadata.max_seq_length,
             uses_context=metadata.uses_context,
+            supports_instructions=self.base_model.supports_instructions,
         )
     def __eq__(self, other) -> bool:
@@ -434,7 +471,8 @@ class FinetunedEmbeddingModel(_EmbeddingModel):
         Raises:
             LookupError: If the finetuned embedding model does not exist
         """
-        return cls(get_finetuned_embedding_model(name))
+        metadata = get_finetuned_embedding_model(name)
+        return cls(metadata)
     @classmethod
     def exists(cls, name_or_id: str) -> bool:

orca-sdk 0.0.96__py3-none-any.whl → 0.0.97__py3-none-any.whl

orca-sdk 0.0.96py3-none-any.whl → 0.0.97py3-none-any.whl