PyPI - openaivec - Versions diffs - 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

openaivec 0.12.5py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

openaivec/__init__.py +13 -4
openaivec/_cache/__init__.py +12 -0
openaivec/_cache/optimize.py +109 -0
openaivec/_cache/proxy.py +806 -0
openaivec/{di.py → _di.py} +36 -12
openaivec/_embeddings.py +203 -0
openaivec/{log.py → _log.py} +2 -2
openaivec/_model.py +113 -0
openaivec/{prompt.py → _prompt.py} +95 -28
openaivec/_provider.py +207 -0
openaivec/_responses.py +511 -0
openaivec/_schema/__init__.py +9 -0
openaivec/_schema/infer.py +340 -0
openaivec/_schema/spec.py +350 -0
openaivec/_serialize.py +234 -0
openaivec/{util.py → _util.py} +25 -85
openaivec/pandas_ext.py +1496 -318
openaivec/spark.py +485 -183
openaivec/task/__init__.py +9 -7
openaivec/task/customer_support/__init__.py +9 -15
openaivec/task/customer_support/customer_sentiment.py +17 -15
openaivec/task/customer_support/inquiry_classification.py +23 -22
openaivec/task/customer_support/inquiry_summary.py +14 -13
openaivec/task/customer_support/intent_analysis.py +21 -19
openaivec/task/customer_support/response_suggestion.py +16 -16
openaivec/task/customer_support/urgency_analysis.py +24 -25
openaivec/task/nlp/__init__.py +4 -4
openaivec/task/nlp/dependency_parsing.py +10 -12
openaivec/task/nlp/keyword_extraction.py +11 -14
openaivec/task/nlp/morphological_analysis.py +12 -14
openaivec/task/nlp/named_entity_recognition.py +16 -18
openaivec/task/nlp/sentiment_analysis.py +14 -11
openaivec/task/nlp/translation.py +6 -9
openaivec/task/table/__init__.py +2 -2
openaivec/task/table/fillna.py +11 -11
openaivec-1.0.10.dist-info/METADATA +399 -0
openaivec-1.0.10.dist-info/RECORD +39 -0
{openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
openaivec/embeddings.py +0 -172
openaivec/model.py +0 -67
openaivec/provider.py +0 -45
openaivec/responses.py +0 -393
openaivec/serialize.py +0 -225
openaivec-0.12.5.dist-info/METADATA +0 -696
openaivec-0.12.5.dist-info/RECORD +0 -33
{openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0

openaivec/{di.py → _di.py} RENAMED Viewed

@@ -1,6 +1,9 @@
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from threading import RLock
-from typing import Any, Callable, Dict, Set, Type, TypeVar
+from typing import Any, TypeVar
+__all__ = []
 """Simple dependency injection container with singleton lifecycle management.
@@ -11,14 +14,14 @@ are created once and reused across multiple resolve calls.
 Example:
     ```python
     from openaivec.di import Container
     class DatabaseService:
         def __init__(self):
             self.connection = "database://localhost"
     container = Container()
     container.register(DatabaseService, lambda: DatabaseService())
     db1 = container.resolve(DatabaseService)
     db2 = container.resolve(DatabaseService)
     print(db1 is db2)  # True - same instance
@@ -117,12 +120,12 @@ class Container:
         ```
     """
-    _instances: Dict[Type[Any], Any] = field(default_factory=dict)
-    _providers: Dict[Type[Any], Provider[Any]] = field(default_factory=dict)
+    _instances: dict[type[Any], Any] = field(default_factory=dict)
+    _providers: dict[type[Any], Provider[Any]] = field(default_factory=dict)
     _lock: RLock = field(default_factory=RLock)
-    _resolving: Set[Type[Any]] = field(default_factory=set)
+    _resolving: set[type[Any]] = field(default_factory=set)
-    def register(self, cls: Type[T], provider: Provider[T]) -> None:
+    def register(self, cls: type[T], provider: Provider[T]) -> None:
         """Register a provider function for a service type.
         The provider function will be called once to create the singleton instance
@@ -148,7 +151,7 @@ class Container:
             self._providers[cls] = provider
-    def register_instance(self, cls: Type[T], instance: T) -> None:
+    def register_instance(self, cls: type[T], instance: T) -> None:
         """Register a pre-created instance for a service type.
         The provided instance will be stored directly in the container and returned
@@ -176,7 +179,7 @@ class Container:
             self._instances[cls] = instance
             self._providers[cls] = lambda: instance
-    def resolve(self, cls: Type[T]) -> T:
+    def resolve(self, cls: type[T]) -> T:
         """Resolve a service instance, creating it if necessary.
         Returns the singleton instance for the requested service type. If this is
@@ -230,7 +233,7 @@ class Container:
             finally:
                 self._resolving.discard(cls)
-    def is_registered(self, cls: Type[Any]) -> bool:
+    def is_registered(self, cls: type[Any]) -> bool:
         """Check if a service type is registered in the container.
         Args:
@@ -250,7 +253,7 @@ class Container:
         with self._lock:
             return cls in self._providers
-    def unregister(self, cls: Type[Any]) -> None:
+    def unregister(self, cls: type[Any]) -> None:
         """Unregister a service type from the container.
         Removes the provider function and any cached singleton instance for
@@ -300,3 +303,24 @@ class Container:
             self._providers.clear()
             self._instances.clear()
             self._resolving.clear()
+    def clear_singletons(self) -> None:
+        """Clear all cached singleton instances from the container.
+        Removes all cached singleton instances while keeping the registered
+        providers intact. After calling this method, the next resolve call
+        for any service will create a new instance using the provider function.
+        Example:
+            ```python
+            container = Container()
+            container.register(str, lambda: "Hello")
+            instance1 = container.resolve(str)
+            container.clear_singletons()
+            instance2 = container.resolve(str)
+            print(instance1 is instance2)
+            # False - different instances after clearing singletons
+            ```
+        """
+        with self._lock:
+            self._instances.clear()

openaivec/_embeddings.py ADDED Viewed

@@ -0,0 +1,203 @@
+from dataclasses import dataclass, field
+from logging import Logger, getLogger
+import numpy as np
+from numpy.typing import NDArray
+from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
+from openaivec._cache import AsyncBatchingMapProxy, BatchingMapProxy
+from openaivec._log import observe
+from openaivec._util import backoff, backoff_async
+__all__ = [
+    "BatchEmbeddings",
+    "AsyncBatchEmbeddings",
+]
+_LOGGER: Logger = getLogger(__name__)
+@dataclass(frozen=True)
+class BatchEmbeddings:
+    """Thin wrapper around the OpenAI embeddings endpoint (synchronous).
+    Attributes:
+        client (OpenAI): Configured OpenAI client.
+        model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
+            (e.g., ``"text-embedding-3-small"``).
+        cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
+        api_kwargs (dict[str, Any]): Additional OpenAI API parameters stored at initialization.
+    """
+    client: OpenAI
+    model_name: str
+    cache: BatchingMapProxy[str, NDArray[np.float32]] = field(default_factory=lambda: BatchingMapProxy(batch_size=None))
+    api_kwargs: dict[str, int | float | str | bool] = field(default_factory=dict)
+    @classmethod
+    def of(cls, client: OpenAI, model_name: str, batch_size: int | None = None, **api_kwargs) -> "BatchEmbeddings":
+        """Factory constructor.
+        Args:
+            client (OpenAI): OpenAI client.
+            model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
+            batch_size (int | None, optional): Max unique inputs per API call. Defaults to None
+                (automatic batch size optimization). Set to a positive integer for fixed batch size.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
+        Returns:
+            BatchEmbeddings: Configured instance backed by a batching proxy.
+        """
+        return cls(
+            client=client,
+            model_name=model_name,
+            cache=BatchingMapProxy(batch_size=batch_size),
+            api_kwargs=api_kwargs,
+        )
+    @observe(_LOGGER)
+    @backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
+    def _embed_chunk(self, inputs: list[str]) -> list[NDArray[np.float32]]:
+        """Embed one minibatch of strings.
+        This private helper is the unit of work used by the map/parallel
+        utilities.  Exponential back‑off is applied automatically when
+        ``openai.RateLimitError`` is raised.
+        Args:
+            inputs (list[str]): Input strings to be embedded. Duplicates allowed.
+        Returns:
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+        """
+        responses = self.client.embeddings.create(input=inputs, model=self.model_name, **self.api_kwargs)
+        return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
+    @observe(_LOGGER)
+    def create(self, inputs: list[str]) -> list[NDArray[np.float32]]:
+        """Generate embeddings for inputs using cached, ordered batching.
+        Args:
+            inputs (list[str]): Input strings. Duplicates allowed.
+        Returns:
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+        """
+        return self.cache.map(inputs, self._embed_chunk)
+@dataclass(frozen=True)
+class AsyncBatchEmbeddings:
+    """Thin wrapper around the OpenAI embeddings endpoint (asynchronous).
+    This class provides an asynchronous interface for generating embeddings using
+    OpenAI models. It manages concurrency, handles rate limits automatically,
+    and efficiently processes batches of inputs, including de-duplication.
+    Example:
+        ```python
+        import asyncio
+        import numpy as np
+        from openai import AsyncOpenAI
+        from openaivec import AsyncBatchEmbeddings
+        # Assuming openai_async_client is an initialized AsyncOpenAI client
+        openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
+        embedder = AsyncBatchEmbeddings.of(
+            client=openai_async_client,
+            model_name="text-embedding-3-small",
+            batch_size=128,
+            max_concurrency=8,
+        )
+        texts = ["This is the first document.", "This is the second document.", "This is the first document."]
+        # Asynchronous call
+        async def main():
+            embeddings = await embedder.create(texts)
+            # embeddings will be a list of numpy arrays (float32)
+            # The embedding for the third text will be identical to the first
+            # due to automatic de-duplication.
+            print(f"Generated {len(embeddings)} embeddings.")
+            print(f"Shape of first embedding: {embeddings[0].shape}")
+            assert np.array_equal(embeddings[0], embeddings[2])
+        # Run the async function
+        asyncio.run(main())
+        ```
+    Attributes:
+        client (AsyncOpenAI): Configured OpenAI async client.
+        model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
+        cache (AsyncBatchingMapProxy[str, NDArray[np.float32]]): Async batching proxy.
+        api_kwargs (dict): Additional OpenAI API parameters stored at initialization.
+    """
+    client: AsyncOpenAI
+    model_name: str
+    cache: AsyncBatchingMapProxy[str, NDArray[np.float32]] = field(
+        default_factory=lambda: AsyncBatchingMapProxy(batch_size=None, max_concurrency=8)
+    )
+    api_kwargs: dict[str, int | float | str | bool] = field(default_factory=dict)
+    @classmethod
+    def of(
+        cls,
+        client: AsyncOpenAI,
+        model_name: str,
+        batch_size: int | None = None,
+        max_concurrency: int = 8,
+        **api_kwargs,
+    ) -> "AsyncBatchEmbeddings":
+        """Factory constructor.
+        Args:
+            client (AsyncOpenAI): OpenAI async client.
+            model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
+            batch_size (int | None, optional): Max unique inputs per API call. Defaults to None
+                (automatic batch size optimization). Set to a positive integer for fixed batch size.
+            max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
+        Returns:
+            AsyncBatchEmbeddings: Configured instance with an async batching proxy.
+        """
+        return cls(
+            client=client,
+            model_name=model_name,
+            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            api_kwargs=api_kwargs,
+        )
+    @backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
+    @observe(_LOGGER)
+    async def _embed_chunk(self, inputs: list[str]) -> list[NDArray[np.float32]]:
+        """Embed one minibatch of strings asynchronously.
+        This private helper handles the actual API call for a batch of inputs.
+        Exponential back-off is applied automatically when ``openai.RateLimitError``
+        is raised.
+        Args:
+            inputs (list[str]): Input strings to be embedded. Duplicates allowed.
+        Returns:
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+        Raises:
+            RateLimitError: Propagated if retries are exhausted.
+        """
+        responses = await self.client.embeddings.create(input=inputs, model=self.model_name, **self.api_kwargs)
+        return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
+    @observe(_LOGGER)
+    async def create(self, inputs: list[str]) -> list[NDArray[np.float32]]:
+        """Generate embeddings for inputs using proxy batching (async).
+        Args:
+            inputs (list[str]): Input strings. Duplicates allowed.
+        Returns:
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+        """
+        return await self.cache.map(inputs, self._embed_chunk)  # type: ignore[arg-type]

openaivec/{log.py → _log.py} RENAMED Viewed

@@ -2,10 +2,10 @@ import functools
 import json
 import time
 import uuid
+from collections.abc import Callable
 from logging import Logger
-from typing import Callable
-__all__ = ["observe"]
+__all__ = []
 def observe(logger: Logger):

openaivec/_model.py ADDED Viewed

@@ -0,0 +1,113 @@
+from dataclasses import dataclass
+from typing import Generic, TypeVar
+__all__ = [
+    "PreparedTask",
+]
+ResponseFormat = TypeVar("ResponseFormat")
+@dataclass(frozen=True)
+class PreparedTask(Generic[ResponseFormat]):
+    """A data class representing a complete task configuration for OpenAI API calls.
+    This class encapsulates the instructions and expected response format for
+    executing a task against the OpenAI Responses API.
+    Attributes:
+        instructions (str): The prompt or instructions to send to the OpenAI model.
+            This should contain clear, specific directions for the task.
+        response_format (type[ResponseFormat]): A Pydantic model class or str type that defines the expected
+            structure of the response. Can be either a BaseModel subclass or str.
+    Example:
+        Creating a custom task:
+        ```python
+        from pydantic import BaseModel
+        class TranslationResponse(BaseModel):
+            translated_text: str
+            source_language: str
+            target_language: str
+        custom_task = PreparedTask(
+            instructions="Translate the following text to French:",
+            response_format=TranslationResponse,
+        )
+        ```
+    Note:
+        This class is frozen (immutable) to ensure task configurations
+        cannot be accidentally modified after creation.
+    """
+    instructions: str
+    response_format: type[ResponseFormat]
+@dataclass(frozen=True)
+class ResponsesModelName:
+    """Container for responses model name configuration.
+    Attributes:
+        value (str): The model name for OpenAI responses API.
+    """
+    value: str
+@dataclass(frozen=True)
+class EmbeddingsModelName:
+    """Container for embeddings model name configuration.
+    Attributes:
+        value (str): The model name for OpenAI embeddings API.
+    """
+    value: str
+@dataclass(frozen=True)
+class OpenAIAPIKey:
+    """Container for OpenAI API key configuration.
+    Attributes:
+        value (str | None): The API key for OpenAI services.
+    """
+    value: str | None
+@dataclass(frozen=True)
+class AzureOpenAIAPIKey:
+    """Container for Azure OpenAI API key configuration.
+    Attributes:
+        value (str | None): The API key for Azure OpenAI services.
+    """
+    value: str | None
+@dataclass(frozen=True)
+class AzureOpenAIBaseURL:
+    """Container for Azure OpenAI base URL configuration.
+    Attributes:
+        value (str | None): The base URL for Azure OpenAI services.
+    """
+    value: str | None
+@dataclass(frozen=True)
+class AzureOpenAIAPIVersion:
+    """Container for Azure OpenAI API version configuration.
+    Attributes:
+        value (str): The API version for Azure OpenAI services.
+    """
+    value: str

openaivec 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl

openaivec 0.12.5py3-none-any.whl → 1.0.10py3-none-any.whl