PyPI - orca-sdk - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

orca-sdk 0.1.5py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

orca_sdk/_shared/metrics.py +8 -4
orca_sdk/async_client.py +38 -14
orca_sdk/classification_model.py +2 -4
orca_sdk/client.py +38 -14
orca_sdk/embedding_model.py +0 -2
{orca_sdk-0.1.5.dist-info → orca_sdk-0.1.6.dist-info}/METADATA +1 -1
{orca_sdk-0.1.5.dist-info → orca_sdk-0.1.6.dist-info}/RECORD +8 -8
{orca_sdk-0.1.5.dist-info → orca_sdk-0.1.6.dist-info}/WHEEL +0 -0

orca_sdk/_shared/metrics.py CHANGED Viewed

@@ -20,7 +20,9 @@ from numpy.typing import NDArray
 def softmax(logits: np.ndarray, axis: int = -1) -> np.ndarray:
     shifted = logits - np.max(logits, axis=axis, keepdims=True)
     exps = np.exp(shifted)
-    return exps / np.sum(exps, axis=axis, keepdims=True)
+    sums = np.sum(exps, axis=axis, keepdims=True)
+    # Guard against division by zero (can happen if all logits are -inf or NaN)
+    return exps / np.where(sums > 0, sums, 1.0)
 # We don't want to depend on transformers just for the eval_pred type in orca_sdk
@@ -300,7 +302,9 @@ def convert_logits_to_probabilities(logits: NDArray[np.float32]) -> NDArray[np.f
             probabilities = cast(NDArray[np.float32], softmax(logits))
         elif not np.allclose(logits.sum(-1, keepdims=True), 1.0):
             # Rows don't sum to 1: normalize to probabilities
-            probabilities = cast(NDArray[np.float32], logits / logits.sum(-1, keepdims=True))
+            row_sums = logits.sum(-1, keepdims=True)
+            # Guard against division by zero (can happen if all values in a row are 0 or NaN)
+            probabilities = cast(NDArray[np.float32], logits / np.where(row_sums > 0, row_sums, 1.0))
         else:
             # Already normalized probabilities
             probabilities = logits
@@ -349,7 +353,7 @@ def calculate_classification_metrics(
     num_classes_references = len(set(references))
     num_classes_predictions = len(set(predictions))
     num_none_predictions = np.isnan(probabilities).all(axis=-1).sum()
-    coverage = 1 - num_none_predictions / len(probabilities)
+    coverage = 1 - (num_none_predictions / len(probabilities) if len(probabilities) > 0 else 0)
     if average is None:
         average = "binary" if num_classes_references == 2 and num_none_predictions == 0 else "weighted"
@@ -503,7 +507,7 @@ def calculate_regression_metrics(
     # Filter out NaN values from predictions (expected_scores are already validated to be non-NaN)
     valid_mask = ~np.isnan(predictions)
     num_none_predictions = (~valid_mask).sum()
-    coverage = 1 - num_none_predictions / len(predictions)
+    coverage = 1 - (num_none_predictions / len(predictions) if len(predictions) > 0 else 0)
     if num_none_predictions > 0:
         references = references[valid_mask]
         predictions = predictions[valid_mask]

orca_sdk/async_client.py CHANGED Viewed

@@ -555,16 +555,7 @@ class PredictiveModelUpdate(TypedDict):
 PretrainedEmbeddingModelName = Literal[
-    "CLIP_BASE",
-    "GTE_BASE",
-    "CDE_SMALL",
-    "DISTILBERT",
-    "GTE_SMALL",
-    "MXBAI_LARGE",
-    "E5_LARGE",
-    "QWEN2_1_5B",
-    "BGE_BASE",
-    "GIST_LARGE",
+    "CLIP_BASE", "GTE_BASE", "CDE_SMALL", "DISTILBERT", "GTE_SMALL", "MXBAI_LARGE", "E5_LARGE", "BGE_BASE", "GIST_LARGE"
 ]
@@ -1175,7 +1166,14 @@ class BootstrapClassificationModelRequest(TypedDict):
     num_examples_per_label: NotRequired[int]
-class BootstrapClassificationModelResult(TypedDict):
+class BootstrapLabeledMemoryDataInput(TypedDict):
+    model_description: str
+    label_names: list[str]
+    initial_examples: NotRequired[list[LabeledExample]]
+    num_examples_per_label: NotRequired[int]
+class BootstrapLabeledMemoryDataResult(TypedDict):
     model_description: str
     label_names: list[str]
     model_name: str
@@ -1680,7 +1678,7 @@ class BootstrapClassificationModelMeta(TypedDict):
     datasource_meta: DatasourceMetadata
     memoryset_meta: MemorysetMetadata
     model_meta: ClassificationModelMetadata
-    agent_output: BootstrapClassificationModelResult
+    agent_output: BootstrapLabeledMemoryDataResult
 class BootstrapClassificationModelResponse(TypedDict):
@@ -2570,7 +2568,7 @@ class OrcaAsyncClient(AsyncClient):
         timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
         extensions: RequestExtensions | None = None,
     ) -> BootstrapClassificationModelResponse:
-        """Get the status of a bootstrap classification model job"""
+        """Get the status of a bootstrap labeled memory data job"""
         pass
     async def GET(
@@ -3292,6 +3290,32 @@ class OrcaAsyncClient(AsyncClient):
         """Get row count from a specific datasource with optional filtering."""
         pass
+    @overload
+    async def POST(
+        self,
+        path: Literal["/datasource/bootstrap_memory_data"],
+        *,
+        params: None = None,
+        json: BootstrapLabeledMemoryDataInput,
+        data: None = None,
+        files: None = None,
+        content: None = None,
+        parse_as: Literal["json"] = "json",
+        headers: HeaderTypes | None = None,
+        cookies: CookieTypes | None = None,
+        auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
+        timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        extensions: RequestExtensions | None = None,
+    ) -> BootstrapLabeledMemoryDataResult:
+        """
+        Bootstrap memory data using an AI agent.
+        This endpoint uses the bootstrap labeled memory data agent to generate
+        high-quality, diverse training examples for a classification model.
+        """
+        pass
     @overload
     async def POST(
         self,
@@ -3540,7 +3564,7 @@ class OrcaAsyncClient(AsyncClient):
         """
         Bootstrap a classification model by creating a memoryset with generated memories and a classification model.
-        This endpoint uses the bootstrap_classification_model agent to generate:
+        This endpoint uses the bootstrap_labeled_memory_data agent to generate:
         1. Memoryset configuration with appropriate settings
         2. Model configuration with optimal parameters
         3. High-quality training memories for each label

orca_sdk/classification_model.py CHANGED Viewed

@@ -12,12 +12,10 @@ from ._utils.common import UNSET, CreateMode, DropMode
 from .async_client import OrcaAsyncClient
 from .client import (
     BootstrapClassificationModelMeta,
-    BootstrapClassificationModelResult,
-    ClassificationEvaluationRequest,
+    BootstrapLabeledMemoryDataResult,
     ClassificationModelMetadata,
     ClassificationPredictionRequest,
     OrcaClient,
-    PostClassificationModelByModelNameOrIdEvaluationParams,
     PredictiveModelUpdate,
     RACHeadType,
 )
@@ -43,7 +41,7 @@ class BootstrappedClassificationModel:
     datasource: Datasource | None
     memoryset: LabeledMemoryset | None
     classification_model: ClassificationModel | None
-    agent_output: BootstrapClassificationModelResult | None
+    agent_output: BootstrapLabeledMemoryDataResult | None
     def __init__(self, metadata: BootstrapClassificationModelMeta):
         self.datasource = Datasource.open(metadata["datasource_meta"]["id"])

orca_sdk/client.py CHANGED Viewed

@@ -553,16 +553,7 @@ class PredictiveModelUpdate(TypedDict):
 PretrainedEmbeddingModelName = Literal[
-    "CLIP_BASE",
-    "GTE_BASE",
-    "CDE_SMALL",
-    "DISTILBERT",
-    "GTE_SMALL",
-    "MXBAI_LARGE",
-    "E5_LARGE",
-    "QWEN2_1_5B",
-    "BGE_BASE",
-    "GIST_LARGE",
+    "CLIP_BASE", "GTE_BASE", "CDE_SMALL", "DISTILBERT", "GTE_SMALL", "MXBAI_LARGE", "E5_LARGE", "BGE_BASE", "GIST_LARGE"
 ]
@@ -1173,7 +1164,14 @@ class BootstrapClassificationModelRequest(TypedDict):
     num_examples_per_label: NotRequired[int]
-class BootstrapClassificationModelResult(TypedDict):
+class BootstrapLabeledMemoryDataInput(TypedDict):
+    model_description: str
+    label_names: list[str]
+    initial_examples: NotRequired[list[LabeledExample]]
+    num_examples_per_label: NotRequired[int]
+class BootstrapLabeledMemoryDataResult(TypedDict):
     model_description: str
     label_names: list[str]
     model_name: str
@@ -1678,7 +1676,7 @@ class BootstrapClassificationModelMeta(TypedDict):
     datasource_meta: DatasourceMetadata
     memoryset_meta: MemorysetMetadata
     model_meta: ClassificationModelMetadata
-    agent_output: BootstrapClassificationModelResult
+    agent_output: BootstrapLabeledMemoryDataResult
 class BootstrapClassificationModelResponse(TypedDict):
@@ -2568,7 +2566,7 @@ class OrcaClient(Client):
         timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
         extensions: RequestExtensions | None = None,
     ) -> BootstrapClassificationModelResponse:
-        """Get the status of a bootstrap classification model job"""
+        """Get the status of a bootstrap labeled memory data job"""
         pass
     def GET(
@@ -3290,6 +3288,32 @@ class OrcaClient(Client):
         """Get row count from a specific datasource with optional filtering."""
         pass
+    @overload
+    def POST(
+        self,
+        path: Literal["/datasource/bootstrap_memory_data"],
+        *,
+        params: None = None,
+        json: BootstrapLabeledMemoryDataInput,
+        data: None = None,
+        files: None = None,
+        content: None = None,
+        parse_as: Literal["json"] = "json",
+        headers: HeaderTypes | None = None,
+        cookies: CookieTypes | None = None,
+        auth: AuthTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        follow_redirects: bool | UseClientDefault = USE_CLIENT_DEFAULT,
+        timeout: TimeoutTypes | UseClientDefault = USE_CLIENT_DEFAULT,
+        extensions: RequestExtensions | None = None,
+    ) -> BootstrapLabeledMemoryDataResult:
+        """
+        Bootstrap memory data using an AI agent.
+        This endpoint uses the bootstrap labeled memory data agent to generate
+        high-quality, diverse training examples for a classification model.
+        """
+        pass
     @overload
     def POST(
         self,
@@ -3538,7 +3562,7 @@ class OrcaClient(Client):
         """
         Bootstrap a classification model by creating a memoryset with generated memories and a classification model.
-        This endpoint uses the bootstrap_classification_model agent to generate:
+        This endpoint uses the bootstrap_labeled_memory_data agent to generate:
         1. Memoryset configuration with appropriate settings
         2. Model configuration with optimal parameters
         3. High-quality training memories for each label

orca_sdk/embedding_model.py CHANGED Viewed

@@ -340,7 +340,6 @@ class PretrainedEmbeddingModel(EmbeddingModelBase):
     - **`E5_LARGE`**: E5-Large instruction-tuned embedding model from Hugging Face ([intfloat/multilingual-e5-large-instruct](https://huggingface.co/intfloat/multilingual-e5-large-instruct))
     - **`GIST_LARGE`**: GIST-Large embedding model from Hugging Face ([avsolatorio/GIST-large-Embedding-v0](https://huggingface.co/avsolatorio/GIST-large-Embedding-v0))
     - **`MXBAI_LARGE`**: Mixbreas's Large embedding model from Hugging Face ([mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1))
-    - **`QWEN2_1_5B`**: Alibaba's Qwen2-1.5B instruction-tuned embedding model from Hugging Face ([Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct))
     - **`BGE_BASE`**: BAAI's BGE-Base instruction-tuned embedding model from Hugging Face ([BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5))
     **Instruction Support:**
@@ -373,7 +372,6 @@ class PretrainedEmbeddingModel(EmbeddingModelBase):
     E5_LARGE = _ModelDescriptor("E5_LARGE")
     GIST_LARGE = _ModelDescriptor("GIST_LARGE")
     MXBAI_LARGE = _ModelDescriptor("MXBAI_LARGE")
-    QWEN2_1_5B = _ModelDescriptor("QWEN2_1_5B")
     BGE_BASE = _ModelDescriptor("BGE_BASE")
     name: PretrainedEmbeddingModelName

{orca_sdk-0.1.5.dist-info → orca_sdk-0.1.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: orca_sdk
-Version: 0.1.5
+Version: 0.1.6
 Summary: SDK for interacting with Orca Services
 License-Expression: Apache-2.0
 Author: Orca DB Inc.

{orca_sdk-0.1.5.dist-info → orca_sdk-0.1.6.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 orca_sdk/__init__.py,sha256=xyjNwkLQXaX8A-UYgGwYDjv2btOXArT_yiMTfmW7KA8,1003
 orca_sdk/_shared/__init__.py,sha256=3Kt0Hu3QLI5FEp9nqGTxqAm3hAoBJKcagfaGQZ-lbJQ,223
-orca_sdk/_shared/metrics.py,sha256=m-d2-AsHI12REWev1WeniOcQRhF5cXxNjUgC4skM2o4,19412
+orca_sdk/_shared/metrics.py,sha256=a_FdsPGDjR3CMOEBaEhEBqMfWUg7sqz9Jeh26XzAeg0,19756
 orca_sdk/_shared/metrics_test.py,sha256=n7eEAT8e6RqbI94ftEDljTBzOuh-YkFpXfF3DOoZA10,12905
 orca_sdk/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 orca_sdk/_utils/analysis_ui.py,sha256=nT-M_YcNRCVPQzvuqYNFKnNHhYkADYBvq1GlIUePrWw,9232
@@ -17,16 +17,16 @@ orca_sdk/_utils/prediction_result_ui.py,sha256=Ur_FY7dz3oWNmtPiP3Wl3yRlEMgK8q9Uf
 orca_sdk/_utils/tqdm_file_reader.py,sha256=Lw7Cg1UgNuRUoN6jjqZb-IlV00H-kbRcrZLdudr1GxE,324
 orca_sdk/_utils/value_parser.py,sha256=c3qMABCCDQcIjn9N1orYYnlRwDW9JWdGwW_2TDZPLdI,1286
 orca_sdk/_utils/value_parser_test.py,sha256=OybsiC-Obi32RRi9NIuwrVBRAnlyPMV1xVAaevSrb7M,1079
-orca_sdk/async_client.py,sha256=V16wWwdJFvAzmKd5zHsFo3ny_-7B34UrONl80bZzKKs,131628
-orca_sdk/classification_model.py,sha256=90r-PfJ3ZW7ZJ7jrZPTbhuXRds46f7Ooe8FTp-iUJgg,46350
+orca_sdk/async_client.py,sha256=y2D3fPQZmbwmtYWAk5acJ45atSZen9MNfjP2tKjpP6Q,132737
+orca_sdk/classification_model.py,sha256=4AcQvAm0EN7w0qx0WpgEs7VUoIIPTqIVE86wtkaIAYs,46249
 orca_sdk/classification_model_test.py,sha256=vBn7KBb9-ACuJEdzW50n54Fn6Mh9iEYbn1197lE8-yI,36997
-orca_sdk/client.py,sha256=J3Od1sWO7YK2M5afcRNeJcjzNEgZ4zt6e7vLJdk6Nbs,130695
+orca_sdk/client.py,sha256=oQd8Lm0agetLyAdVRP8IZqe6S5mjxhFSnbVHqhT7dmI,131798
 orca_sdk/conftest.py,sha256=0O1VY-SPKNAvi9fBLdY1RMnYVgZvMjP92y99bNAqqiw,12461
 orca_sdk/credentials.py,sha256=80_1r8n5jruEvN_E629SaRrRhKvF_NhWUEZyZzPXkqQ,6620
 orca_sdk/credentials_test.py,sha256=TLbXJMz3IlThvtSrHeLM7jRsKnrncA_ahOTpHg15Ei4,4089
 orca_sdk/datasource.py,sha256=6QaccghiyFEUSFcqnwjIJzpgIh9Id0snJk2EqViqPsU,22356
 orca_sdk/datasource_test.py,sha256=sCk3IcQJbDut5oN4Wf7PXhTxyMwalxMuCXJekSxy9wk,16665
-orca_sdk/embedding_model.py,sha256=bZhbNJBimWc9Ryklza3q9HS0MRWsiH5Lhn6p7pff0RI,28165
+orca_sdk/embedding_model.py,sha256=4xxfo26b5X_YJtU8KyqoMmJQ6VgfHEcYftVSz-RfDng,27920
 orca_sdk/embedding_model_test.py,sha256=-NItbNb3tTVj5jAvSi3WjV3FP448q08lmT5iObg9vwA,8133
 orca_sdk/job.py,sha256=wHwVt-s7i-v8udhLGybB-90Kp4dwOLrY806bE4Tam5Q,13092
 orca_sdk/job_test.py,sha256=nRSWxd_1UIfrj9oMVvrXjt6OBkBpddYAjb2y6P-DTUg,4327
@@ -36,6 +36,6 @@ orca_sdk/regression_model.py,sha256=vXdY2Fbfc0MyECUR3fa_IR-nETPrDN7VFAdjvsgHPrs,
 orca_sdk/regression_model_test.py,sha256=DfWLkqxB835jjwM-sj1uxQ6Yz_ZBMnt8EHjdfnHsRnU,25103
 orca_sdk/telemetry.py,sha256=ZyCMiyyo_SchjadWZH55TlLrC4Ucq5S316NbW26LL4Y,27834
 orca_sdk/telemetry_test.py,sha256=eT66C5lFdNg-pQdo2I__BP7Tn5fTc9aTkVo9ZhWwhU0,5519
-orca_sdk-0.1.5.dist-info/METADATA,sha256=NsXoCiKQ-frqwZeydk_OzvK-QqD1_SnGdRuERXM1ILc,3659
-orca_sdk-0.1.5.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-orca_sdk-0.1.5.dist-info/RECORD,,
+orca_sdk-0.1.6.dist-info/METADATA,sha256=85QDZDP9Uxda4oZ3BMPP_kI5T4GPy1mFMYtWh1-nI54,3659
+orca_sdk-0.1.6.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+orca_sdk-0.1.6.dist-info/RECORD,,

{orca_sdk-0.1.5.dist-info → orca_sdk-0.1.6.dist-info}/WHEEL RENAMED Viewed

File without changes

orca-sdk 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

orca-sdk 0.1.5py3-none-any.whl → 0.1.6py3-none-any.whl