openaivec 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/embeddings.py CHANGED
@@ -1,14 +1,14 @@
1
- import asyncio
2
1
  from dataclasses import dataclass, field
3
2
  from logging import Logger, getLogger
4
3
  from typing import List
5
4
 
6
5
  import numpy as np
7
6
  from numpy.typing import NDArray
8
- from openai import AsyncOpenAI, OpenAI, RateLimitError
7
+ from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
9
8
 
10
9
  from .log import observe
11
- from .util import backoff, backoff_async, map, map_async
10
+ from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
11
+ from .util import backoff, backoff_async
12
12
 
13
13
  __all__ = [
14
14
  "BatchEmbeddings",
@@ -20,61 +20,66 @@ _LOGGER: Logger = getLogger(__name__)
20
20
 
21
21
  @dataclass(frozen=True)
22
22
  class BatchEmbeddings:
23
- """Thin wrapper around the OpenAI /embeddings endpoint.
23
+ """Thin wrapper around the OpenAI embeddings endpoint (synchronous).
24
24
 
25
25
  Attributes:
26
- client: An already‑configured ``openai.OpenAI`` client.
27
- model_name: The model identifier, e.g. ``"text-embedding-3-small"``.
26
+ client (OpenAI): Configured OpenAI client.
27
+ model_name (str): Model identifier (e.g., ``"text-embedding-3-small"``).
28
+ cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
28
29
  """
29
30
 
30
31
  client: OpenAI
31
32
  model_name: str
33
+ cache: BatchingMapProxy[str, NDArray[np.float32]] = field(default_factory=lambda: BatchingMapProxy(batch_size=128))
34
+
35
+ @classmethod
36
+ def of(cls, client: OpenAI, model_name: str, batch_size: int = 128) -> "BatchEmbeddings":
37
+ """Factory constructor.
38
+
39
+ Args:
40
+ client (OpenAI): OpenAI client.
41
+ model_name (str): Embeddings model name.
42
+ batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
43
+
44
+ Returns:
45
+ BatchEmbeddings: Configured instance backed by a batching proxy.
46
+ """
47
+ return cls(client=client, model_name=model_name, cache=BatchingMapProxy(batch_size=batch_size))
32
48
 
33
49
  @observe(_LOGGER)
34
- @backoff(exception=RateLimitError, scale=15, max_retries=8)
50
+ @backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
35
51
  def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
36
- """Embed one minibatch of sentences.
52
+ """Embed one minibatch of strings.
37
53
 
38
54
  This private helper is the unit of work used by the map/parallel
39
55
  utilities. Exponential back‑off is applied automatically when
40
56
  ``openai.RateLimitError`` is raised.
41
57
 
42
58
  Args:
43
- inputs (List[str]): Input strings to be embedded. Duplicates are allowed; the
44
- implementation may decide to de‑duplicate internally.
59
+ inputs (List[str]): Input strings to be embedded. Duplicates allowed.
45
60
 
46
61
  Returns:
47
- List of embedding vectors with the same ordering as *sentences*.
62
+ List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
48
63
  """
49
64
  responses = self.client.embeddings.create(input=inputs, model=self.model_name)
50
65
  return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
51
66
 
52
67
  @observe(_LOGGER)
53
- def create(self, inputs: List[str], batch_size: int) -> List[NDArray[np.float32]]:
54
- """See ``VectorizedEmbeddings.create`` for contract details.
55
-
56
- The call is internally delegated to either ``map_unique_minibatch`` or
57
- its parallel counterpart depending on *is_parallel*.
68
+ def create(self, inputs: List[str]) -> List[NDArray[np.float32]]:
69
+ """Generate embeddings for inputs using cached, ordered batching.
58
70
 
59
71
  Args:
60
- inputs (List[str]): A list of input strings. Duplicates are allowed; the
61
- implementation may decide to de‑duplicate internally.
62
- batch_size (int): Maximum number of sentences to be sent to the underlying
63
- model in one request.
72
+ inputs (List[str]): Input strings. Duplicates allowed.
64
73
 
65
74
  Returns:
66
- A list of ``np.ndarray`` objects (dtype ``float32``) where each entry
67
- is the embedding of the corresponding sentence in *sentences*.
68
-
69
- Raises:
70
- openai.RateLimitError: Propagated if retries are exhausted.
75
+ List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
71
76
  """
72
- return map(inputs, self._embed_chunk, batch_size)
77
+ return self.cache.map(inputs, self._embed_chunk)
73
78
 
74
79
 
75
80
  @dataclass(frozen=True)
76
81
  class AsyncBatchEmbeddings:
77
- """Thin wrapper around the OpenAI /embeddings endpoint using async operations.
82
+ """Thin wrapper around the OpenAI embeddings endpoint (asynchronous).
78
83
 
79
84
  This class provides an asynchronous interface for generating embeddings using
80
85
  OpenAI models. It manages concurrency, handles rate limits automatically,
@@ -85,21 +90,22 @@ class AsyncBatchEmbeddings:
85
90
  import asyncio
86
91
  import numpy as np
87
92
  from openai import AsyncOpenAI
88
- from openaivec.aio.embeddings import AsyncBatchEmbeddings
93
+ from openaivec import AsyncBatchEmbeddings
89
94
 
90
95
  # Assuming openai_async_client is an initialized AsyncOpenAI client
91
96
  openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
92
97
 
93
- embedder = AsyncBatchEmbeddings(
98
+ embedder = AsyncBatchEmbeddings.of(
94
99
  client=openai_async_client,
95
100
  model_name="text-embedding-3-small",
96
- max_concurrency=8 # Limit concurrent requests
101
+ batch_size=128,
102
+ max_concurrency=8,
97
103
  )
98
104
  texts = ["This is the first document.", "This is the second document.", "This is the first document."]
99
105
 
100
106
  # Asynchronous call
101
107
  async def main():
102
- embeddings = await embedder.create(texts, batch_size=128)
108
+ embeddings = await embedder.create(texts)
103
109
  # embeddings will be a list of numpy arrays (float32)
104
110
  # The embedding for the third text will be identical to the first
105
111
  # due to automatic de-duplication.
@@ -112,61 +118,71 @@ class AsyncBatchEmbeddings:
112
118
  ```
113
119
 
114
120
  Attributes:
115
- client: An already‑configured ``openai.AsyncOpenAI`` client.
116
- model_name: The model identifier, e.g. ``"text-embedding-3-small"``.
117
- max_concurrency: Maximum number of concurrent requests to the OpenAI API.
121
+ client (AsyncOpenAI): Configured OpenAI async client.
122
+ model_name (str): Embeddings model name.
123
+ cache (AsyncBatchingMapProxy[str, NDArray[np.float32]]): Async batching proxy.
118
124
  """
119
125
 
120
126
  client: AsyncOpenAI
121
127
  model_name: str
122
- max_concurrency: int = 8 # Default concurrency limit
123
- _semaphore: asyncio.Semaphore = field(init=False, repr=False)
128
+ cache: AsyncBatchingMapProxy[str, NDArray[np.float32]] = field(
129
+ default_factory=lambda: AsyncBatchingMapProxy(batch_size=128, max_concurrency=8)
130
+ )
131
+
132
+ @classmethod
133
+ def of(
134
+ cls,
135
+ client: AsyncOpenAI,
136
+ model_name: str,
137
+ batch_size: int = 128,
138
+ max_concurrency: int = 8,
139
+ ) -> "AsyncBatchEmbeddings":
140
+ """Factory constructor.
124
141
 
125
- def __post_init__(self):
126
- # Initialize the semaphore after the object is created
127
- # Use object.__setattr__ because the dataclass is frozen
128
- object.__setattr__(self, "_semaphore", asyncio.Semaphore(self.max_concurrency))
142
+ Args:
143
+ client (AsyncOpenAI): OpenAI async client.
144
+ model_name (str): Embeddings model name.
145
+ batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
146
+ max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
147
+
148
+ Returns:
149
+ AsyncBatchEmbeddings: Configured instance with an async batching proxy.
150
+ """
151
+ return cls(
152
+ client=client,
153
+ model_name=model_name,
154
+ cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
155
+ )
129
156
 
130
157
  @observe(_LOGGER)
131
- @backoff_async(exception=RateLimitError, scale=15, max_retries=8)
158
+ @backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
132
159
  async def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
133
- """Embed one minibatch of sentences asynchronously, respecting concurrency limits.
160
+ """Embed one minibatch of strings asynchronously.
134
161
 
135
162
  This private helper handles the actual API call for a batch of inputs.
136
163
  Exponential back-off is applied automatically when ``openai.RateLimitError``
137
164
  is raised.
138
165
 
139
166
  Args:
140
- inputs (List[str]): Input strings to be embedded. Duplicates are allowed.
167
+ inputs (List[str]): Input strings to be embedded. Duplicates allowed.
141
168
 
142
169
  Returns:
143
- List of embedding vectors (``np.ndarray`` with dtype ``float32``)
144
- in the same order as *inputs*.
170
+ List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
145
171
 
146
172
  Raises:
147
- openai.RateLimitError: Propagated if retries are exhausted.
173
+ RateLimitError: Propagated if retries are exhausted.
148
174
  """
149
- # Acquire semaphore before making the API call
150
- async with self._semaphore:
151
- responses = await self.client.embeddings.create(input=inputs, model=self.model_name)
152
- return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
175
+ responses = await self.client.embeddings.create(input=inputs, model=self.model_name)
176
+ return [np.array(d.embedding, dtype=np.float32) for d in responses.data]
153
177
 
154
178
  @observe(_LOGGER)
155
- async def create(self, inputs: List[str], batch_size: int) -> List[NDArray[np.float32]]:
156
- """Asynchronous public API: generate embeddings for a list of inputs.
157
-
158
- Uses ``openaivec.util.map_async`` to efficiently handle batching and de-duplication.
179
+ async def create(self, inputs: List[str]) -> List[NDArray[np.float32]]:
180
+ """Generate embeddings for inputs using proxy batching (async).
159
181
 
160
182
  Args:
161
- inputs (List[str]): A list of input strings. Duplicates are handled efficiently.
162
- batch_size (int): Maximum number of unique inputs per API call.
183
+ inputs (List[str]): Input strings. Duplicates allowed.
163
184
 
164
185
  Returns:
165
- A list of ``np.ndarray`` objects (dtype ``float32``) where each entry
166
- is the embedding of the corresponding string in *inputs*.
167
-
168
- Raises:
169
- openai.RateLimitError: Propagated if retries are exhausted during API calls.
186
+ List[NDArray[np.float32]]: Embedding vectors aligned to ``inputs``.
170
187
  """
171
-
172
- return await map_async(inputs, self._embed_chunk, batch_size)
188
+ return await self.cache.map(inputs, self._embed_chunk)