PyPI - openaivec - Versions diffs - 0.14.7__py3-none-any.whl → 0.14.8__py3-none-any.whl - Mend

openaivec 0.14.7py3-none-any.whl → 0.14.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

openaivec/_di.py +10 -9
openaivec/_embeddings.py +12 -13
openaivec/_log.py +1 -1
openaivec/_model.py +3 -3
openaivec/_optimize.py +3 -4
openaivec/_prompt.py +4 -5
openaivec/_proxy.py +34 -35
openaivec/_responses.py +29 -29
openaivec/_schema.py +56 -18
openaivec/_serialize.py +19 -15
openaivec/_util.py +9 -8
openaivec/pandas_ext.py +20 -19
openaivec/spark.py +11 -10
openaivec/task/customer_support/customer_sentiment.py +2 -2
openaivec/task/customer_support/inquiry_classification.py +8 -8
openaivec/task/customer_support/inquiry_summary.py +4 -4
openaivec/task/customer_support/intent_analysis.py +5 -5
openaivec/task/customer_support/response_suggestion.py +4 -4
openaivec/task/customer_support/urgency_analysis.py +9 -9
openaivec/task/nlp/dependency_parsing.py +2 -4
openaivec/task/nlp/keyword_extraction.py +3 -5
openaivec/task/nlp/morphological_analysis.py +4 -6
openaivec/task/nlp/named_entity_recognition.py +7 -9
openaivec/task/nlp/sentiment_analysis.py +3 -3
openaivec/task/nlp/translation.py +1 -2
openaivec/task/table/fillna.py +2 -3
{openaivec-0.14.7.dist-info → openaivec-0.14.8.dist-info}/METADATA +1 -1
openaivec-0.14.8.dist-info/RECORD +36 -0
openaivec-0.14.7.dist-info/RECORD +0 -36
{openaivec-0.14.7.dist-info → openaivec-0.14.8.dist-info}/WHEEL +0 -0
{openaivec-0.14.7.dist-info → openaivec-0.14.8.dist-info}/licenses/LICENSE +0 -0

openaivec/_di.py CHANGED Viewed

@@ -1,6 +1,7 @@
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from threading import RLock
-from typing import Any, Callable, Dict, Set, Type, TypeVar
+from typing import Any, TypeVar
 __all__ = []
@@ -119,12 +120,12 @@ class Container:
         ```
     """
-    _instances: Dict[Type[Any], Any] = field(default_factory=dict)
-    _providers: Dict[Type[Any], Provider[Any]] = field(default_factory=dict)
+    _instances: dict[type[Any], Any] = field(default_factory=dict)
+    _providers: dict[type[Any], Provider[Any]] = field(default_factory=dict)
     _lock: RLock = field(default_factory=RLock)
-    _resolving: Set[Type[Any]] = field(default_factory=set)
+    _resolving: set[type[Any]] = field(default_factory=set)
-    def register(self, cls: Type[T], provider: Provider[T]) -> None:
+    def register(self, cls: type[T], provider: Provider[T]) -> None:
         """Register a provider function for a service type.
         The provider function will be called once to create the singleton instance
@@ -150,7 +151,7 @@ class Container:
             self._providers[cls] = provider
-    def register_instance(self, cls: Type[T], instance: T) -> None:
+    def register_instance(self, cls: type[T], instance: T) -> None:
         """Register a pre-created instance for a service type.
         The provided instance will be stored directly in the container and returned
@@ -178,7 +179,7 @@ class Container:
             self._instances[cls] = instance
             self._providers[cls] = lambda: instance
-    def resolve(self, cls: Type[T]) -> T:
+    def resolve(self, cls: type[T]) -> T:
         """Resolve a service instance, creating it if necessary.
         Returns the singleton instance for the requested service type. If this is
@@ -232,7 +233,7 @@ class Container:
             finally:
                 self._resolving.discard(cls)
-    def is_registered(self, cls: Type[Any]) -> bool:
+    def is_registered(self, cls: type[Any]) -> bool:
         """Check if a service type is registered in the container.
         Args:
@@ -252,7 +253,7 @@ class Container:
         with self._lock:
             return cls in self._providers
-    def unregister(self, cls: Type[Any]) -> None:
+    def unregister(self, cls: type[Any]) -> None:
         """Unregister a service type from the container.
         Removes the provider function and any cached singleton instance for

openaivec/_embeddings.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from dataclasses import dataclass, field
 from logging import Logger, getLogger
-from typing import List
 import numpy as np
 from numpy.typing import NDArray
@@ -50,7 +49,7 @@ class BatchEmbeddings:
     @observe(_LOGGER)
     @backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
-    def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
+    def _embed_chunk(self, inputs: list[str]) -> list[NDArray[np.float32]]:
         """Embed one minibatch of strings.
         This private helper is the unit of work used by the map/parallel
@@ -58,23 +57,23 @@ class BatchEmbeddings:
         ``openai.RateLimitError`` is raised.
         Args:
-            inputs (List[str]): Input strings to be embedded. Duplicates allowed.
+            inputs (list[str]): Input strings to be embedded. Duplicates allowed.
         Returns:
-            List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
         """
         responses = self.client.embeddings.create(input=inputs, model=self.model_name)
         return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
     @observe(_LOGGER)
-    def create(self, inputs: List[str]) -> List[NDArray[np.float32]]:
+    def create(self, inputs: list[str]) -> list[NDArray[np.float32]]:
         """Generate embeddings for inputs using cached, ordered batching.
         Args:
-            inputs (List[str]): Input strings. Duplicates allowed.
+            inputs (list[str]): Input strings. Duplicates allowed.
         Returns:
-            List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
         """
         return self.cache.map(inputs, self._embed_chunk)
@@ -159,7 +158,7 @@ class AsyncBatchEmbeddings:
     @backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
     @observe(_LOGGER)
-    async def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
+    async def _embed_chunk(self, inputs: list[str]) -> list[NDArray[np.float32]]:
         """Embed one minibatch of strings asynchronously.
         This private helper handles the actual API call for a batch of inputs.
@@ -167,10 +166,10 @@ class AsyncBatchEmbeddings:
         is raised.
         Args:
-            inputs (List[str]): Input strings to be embedded. Duplicates allowed.
+            inputs (list[str]): Input strings to be embedded. Duplicates allowed.
         Returns:
-            List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
         Raises:
             RateLimitError: Propagated if retries are exhausted.
@@ -179,13 +178,13 @@ class AsyncBatchEmbeddings:
         return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
     @observe(_LOGGER)
-    async def create(self, inputs: List[str]) -> List[NDArray[np.float32]]:
+    async def create(self, inputs: list[str]) -> list[NDArray[np.float32]]:
         """Generate embeddings for inputs using proxy batching (async).
         Args:
-            inputs (List[str]): Input strings. Duplicates allowed.
+            inputs (list[str]): Input strings. Duplicates allowed.
         Returns:
-            List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
+            list[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
         """
         return await self.cache.map(inputs, self._embed_chunk)  # type: ignore[arg-type]

openaivec/_log.py CHANGED Viewed

@@ -2,8 +2,8 @@ import functools
 import json
 import time
 import uuid
+from collections.abc import Callable
 from logging import Logger
-from typing import Callable
 __all__ = []

openaivec/_model.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Generic, Type, TypeVar
+from typing import Generic, TypeVar
 __all__ = [
     "PreparedTask",
@@ -20,7 +20,7 @@ class PreparedTask(Generic[ResponseFormat]):
     Attributes:
         instructions (str): The prompt or instructions to send to the OpenAI model.
             This should contain clear, specific directions for the task.
-        response_format (Type[ResponseFormat]): A Pydantic model class or str type that defines the expected
+        response_format (type[ResponseFormat]): A Pydantic model class or str type that defines the expected
             structure of the response. Can be either a BaseModel subclass or str.
         temperature (float): Controls randomness in the model's output.
             Range: 0.0 to 1.0. Lower values make output more deterministic.
@@ -54,7 +54,7 @@ class PreparedTask(Generic[ResponseFormat]):
     """
     instructions: str
-    response_format: Type[ResponseFormat]
+    response_format: type[ResponseFormat]
     temperature: float = 0.0
     top_p: float = 1.0

openaivec/_optimize.py CHANGED Viewed

@@ -3,7 +3,6 @@ import time
 from contextlib import contextmanager
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
-from typing import List
 __all__ = []
@@ -24,7 +23,7 @@ class BatchSizeSuggester:
     max_duration: float = 60.0
     step_ratio: float = 0.2
     sample_size: int = 4
-    _history: List[PerformanceMetric] = field(default_factory=list)
+    _history: list[PerformanceMetric] = field(default_factory=list)
     _lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
     _batch_size_changed_at: datetime | None = field(default=None, init=False)
@@ -65,9 +64,9 @@ class BatchSizeSuggester:
                 )
     @property
-    def samples(self) -> List[PerformanceMetric]:
+    def samples(self) -> list[PerformanceMetric]:
         with self._lock:
-            selected: List[PerformanceMetric] = []
+            selected: list[PerformanceMetric] = []
             for metric in reversed(self._history):
                 if metric.exception is not None:
                     continue

openaivec/_prompt.py CHANGED Viewed

@@ -44,7 +44,6 @@ this will produce an XML string that looks like this:
 import difflib
 import logging
-from typing import List
 from xml.etree import ElementTree
 from openai import OpenAI
@@ -90,8 +89,8 @@ class FewShotPrompt(BaseModel):
     """
     purpose: str
-    cautions: List[str]
-    examples: List[Example]
+    cautions: list[str]
+    examples: list[Example]
 class Step(BaseModel):
@@ -116,7 +115,7 @@ class Request(BaseModel):
 class Response(BaseModel):
-    iterations: List[Step]
+    iterations: list[Step]
 _PROMPT: str = """
@@ -358,7 +357,7 @@ class FewShotPromptBuilder:
     """
     _prompt: FewShotPrompt
-    _steps: List[Step]
+    _steps: list[Step]
     def __init__(self):
         """Initialize an empty FewShotPromptBuilder.

openaivec/_proxy.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import asyncio
 import threading
-from collections.abc import Hashable
+from collections.abc import Awaitable, Callable, Hashable
 from dataclasses import dataclass, field
-from typing import Any, Awaitable, Callable, Dict, Generic, List, TypeVar
+from typing import Any, Generic, TypeVar
 from openaivec._optimize import BatchSizeSuggester
@@ -130,7 +130,7 @@ class ProxyBase(Generic[S, T]):
             progress_bar.close()
     @staticmethod
-    def _unique_in_order(seq: List[S]) -> List[S]:
+    def _unique_in_order(seq: list[S]) -> list[S]:
         """Return unique items preserving their first-occurrence order.
         Args:
@@ -141,7 +141,7 @@ class ProxyBase(Generic[S, T]):
             once, in the order of their first occurrence.
         """
         seen: set[S] = set()
-        out: List[S] = []
+        out: list[S] = []
         for x in seq:
             if x not in seen:
                 seen.add(x)
@@ -186,9 +186,8 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
     performance (targeting 30-60 seconds per batch).
     Example:
-        >>> from typing import List
         >>> p = BatchingMapProxy[int, str](batch_size=3)
-        >>> def f(xs: List[int]) -> List[str]:
+        >>> def f(xs: list[int]) -> list[str]:
         ...     return [f"v:{x}" for x in xs]
         >>> p.map([1, 2, 2, 3, 4], f)
         ['v:1', 'v:2', 'v:2', 'v:3', 'v:4']
@@ -204,11 +203,11 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
     suggester: BatchSizeSuggester = field(default_factory=BatchSizeSuggester, repr=False)
     # internals
-    _cache: Dict[S, T] = field(default_factory=dict)
+    _cache: dict[S, T] = field(default_factory=dict)
     _lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
-    _inflight: Dict[S, threading.Event] = field(default_factory=dict, repr=False)
+    _inflight: dict[S, threading.Event] = field(default_factory=dict, repr=False)
-    def __all_cached(self, items: List[S]) -> bool:
+    def __all_cached(self, items: list[S]) -> bool:
         """Check whether all items are present in the cache.
         This method acquires the internal lock to perform a consistent check.
@@ -222,7 +221,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         with self._lock:
             return all(x in self._cache for x in items)
-    def __values(self, items: List[S]) -> List[T]:
+    def __values(self, items: list[S]) -> list[T]:
         """Fetch cached values for ``items`` preserving the given order.
         This method acquires the internal lock while reading the cache.
@@ -237,7 +236,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         with self._lock:
             return [self._cache[x] for x in items]
-    def __acquire_ownership(self, items: List[S]) -> tuple[List[S], List[S]]:
+    def __acquire_ownership(self, items: list[S]) -> tuple[list[S], list[S]]:
         """Acquire ownership for missing items and identify keys to wait for.
         For each unique item, if it's already cached, it is ignored. If it's
@@ -253,8 +252,8 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
             - ``owned`` are items this thread is responsible for computing.
             - ``wait_for`` are items that another thread is already computing.
         """
-        owned: List[S] = []
-        wait_for: List[S] = []
+        owned: list[S] = []
+        wait_for: list[S] = []
         with self._lock:
             for x in items:
                 if x in self._cache:
@@ -266,7 +265,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
                     owned.append(x)
         return owned, wait_for
-    def __finalize_success(self, to_call: List[S], results: List[T]) -> None:
+    def __finalize_success(self, to_call: list[S], results: list[T]) -> None:
         """Populate cache with results and signal completion events.
         Args:
@@ -285,7 +284,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
                 if ev:
                     ev.set()
-    def __finalize_failure(self, to_call: List[S]) -> None:
+    def __finalize_failure(self, to_call: list[S]) -> None:
         """Release in-flight events on failure to avoid deadlocks.
         Args:
@@ -316,7 +315,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         """Alias for clear()."""
         self.clear()
-    def __process_owned(self, owned: List[S], map_func: Callable[[List[S]], List[T]]) -> None:
+    def __process_owned(self, owned: list[S], map_func: Callable[[list[S]], list[T]]) -> None:
         """Process owned items in mini-batches and fill the cache.
         Before calling ``map_func`` for each batch, the cache is re-checked
@@ -339,7 +338,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         progress_bar = self._create_progress_bar(len(owned))
         # Accumulate uncached items to maximize batch size utilization
-        pending_to_call: List[S] = []
+        pending_to_call: list[S] = []
         i = 0
         while i < len(owned):
@@ -395,7 +394,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         # Close progress bar
         self._close_progress_bar(progress_bar)
-    def __wait_for(self, keys: List[S], map_func: Callable[[List[S]], List[T]]) -> None:
+    def __wait_for(self, keys: list[S], map_func: Callable[[list[S]], list[T]]) -> None:
         """Wait for other threads to complete computations for the given keys.
         If a key is neither cached nor in-flight, this method now claims ownership
@@ -407,7 +406,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         Args:
             keys (list[S]): Items whose computations are owned by other threads.
         """
-        rescued: List[S] = []  # keys we claim to batch-process
+        rescued: list[S] = []  # keys we claim to batch-process
         for x in keys:
             while True:
                 with self._lock:
@@ -431,7 +430,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
                 raise
     # ---- public API ------------------------------------------------------
-    def map(self, items: List[S], map_func: Callable[[List[S]], List[T]]) -> List[T]:
+    def map(self, items: list[S], map_func: Callable[[list[S]], list[T]]) -> list[T]:
         """Map ``items`` to values using caching and optional mini-batching.
         This method is thread-safe. It deduplicates inputs while preserving order,
@@ -494,7 +493,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         >>> import asyncio
         >>> from typing import List
         >>> p = AsyncBatchingMapProxy[int, str](batch_size=2)
-        >>> async def af(xs: List[int]) -> List[str]:
+        >>> async def af(xs: list[int]) -> list[str]:
         ...     await asyncio.sleep(0)
         ...     return [f"v:{x}" for x in xs]
         >>> async def run():
@@ -514,9 +513,9 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
     suggester: BatchSizeSuggester = field(default_factory=BatchSizeSuggester, repr=False)
     # internals
-    _cache: Dict[S, T] = field(default_factory=dict, repr=False)
+    _cache: dict[S, T] = field(default_factory=dict, repr=False)
     _lock: asyncio.Lock = field(default_factory=asyncio.Lock, repr=False)
-    _inflight: Dict[S, asyncio.Event] = field(default_factory=dict, repr=False)
+    _inflight: dict[S, asyncio.Event] = field(default_factory=dict, repr=False)
     __sema: asyncio.Semaphore | None = field(default=None, init=False, repr=False)
     def __post_init__(self) -> None:
@@ -537,7 +536,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         else:
             self.__sema = None
-    async def __all_cached(self, items: List[S]) -> bool:
+    async def __all_cached(self, items: list[S]) -> bool:
         """Check whether all items are present in the cache.
         This method acquires the internal asyncio lock for a consistent view
@@ -552,7 +551,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         async with self._lock:
             return all(x in self._cache for x in items)
-    async def __values(self, items: List[S]) -> List[T]:
+    async def __values(self, items: list[S]) -> list[T]:
         """Get cached values for ``items`` preserving their given order.
         The internal asyncio lock is held while reading the cache to preserve
@@ -567,7 +566,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         async with self._lock:
             return [self._cache[x] for x in items]
-    async def __acquire_ownership(self, items: List[S]) -> tuple[List[S], List[S]]:
+    async def __acquire_ownership(self, items: list[S]) -> tuple[list[S], list[S]]:
         """Acquire ownership for missing keys and identify keys to wait for.
         Args:
@@ -578,8 +577,8 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
             keys this coroutine should compute, and wait_for are keys currently
             being computed elsewhere.
         """
-        owned: List[S] = []
-        wait_for: List[S] = []
+        owned: list[S] = []
+        wait_for: list[S] = []
         async with self._lock:
             for x in items:
                 if x in self._cache:
@@ -591,7 +590,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
                     owned.append(x)
         return owned, wait_for
-    async def __finalize_success(self, to_call: List[S], results: List[T]) -> None:
+    async def __finalize_success(self, to_call: list[S], results: list[T]) -> None:
         """Populate cache and signal completion for successfully computed keys.
         Args:
@@ -609,7 +608,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
                 if ev:
                     ev.set()
-    async def __finalize_failure(self, to_call: List[S]) -> None:
+    async def __finalize_failure(self, to_call: list[S]) -> None:
         """Release in-flight events on failure to avoid deadlocks.
         Args:
@@ -640,7 +639,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         """Alias for clear()."""
         await self.clear()
-    async def __process_owned(self, owned: List[S], map_func: Callable[[List[S]], Awaitable[List[T]]]) -> None:
+    async def __process_owned(self, owned: list[S], map_func: Callable[[list[S]], Awaitable[list[T]]]) -> None:
         """Process owned keys using Producer-Consumer pattern with dynamic batch sizing.
         Args:
@@ -681,7 +680,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         self._close_progress_bar(progress_bar)
     async def __process_single_batch(
-        self, to_call: List[S], map_func: Callable[[List[S]], Awaitable[List[T]]], progress_bar
+        self, to_call: list[S], map_func: Callable[[list[S]], Awaitable[list[T]]], progress_bar
     ) -> None:
         """Process a single batch with semaphore control."""
         acquired = False
@@ -703,7 +702,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         # Update progress bar
         self._update_progress_bar(progress_bar, len(to_call))
-    async def __wait_for(self, keys: List[S], map_func: Callable[[List[S]], Awaitable[List[T]]]) -> None:
+    async def __wait_for(self, keys: list[S], map_func: Callable[[list[S]], Awaitable[list[T]]]) -> None:
         """Wait for computations owned by other coroutines to complete.
         If a key is neither cached nor in-flight, this method now claims ownership
@@ -715,7 +714,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
         Args:
             keys (list[S]): Items whose computations are owned by other coroutines.
         """
-        rescued: List[S] = []  # keys we claim to batch-process
+        rescued: list[S] = []  # keys we claim to batch-process
         for x in keys:
             while True:
                 async with self._lock:
@@ -738,7 +737,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
                 raise
     # ---- public API ------------------------------------------------------
-    async def map(self, items: List[S], map_func: Callable[[List[S]], Awaitable[List[T]]]) -> List[T]:
+    async def map(self, items: list[S], map_func: Callable[[list[S]], Awaitable[list[T]]]) -> list[T]:
         """Async map with caching, de-duplication, and optional mini-batching.
         Args:

openaivec 0.14.7__py3-none-any.whl → 0.14.8__py3-none-any.whl

openaivec 0.14.7py3-none-any.whl → 0.14.8py3-none-any.whl