openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/__init__.py +13 -4
- openaivec/_cache/__init__.py +12 -0
- openaivec/_cache/optimize.py +109 -0
- openaivec/_cache/proxy.py +806 -0
- openaivec/_di.py +326 -0
- openaivec/_embeddings.py +203 -0
- openaivec/{log.py → _log.py} +2 -2
- openaivec/_model.py +113 -0
- openaivec/{prompt.py → _prompt.py} +95 -28
- openaivec/_provider.py +207 -0
- openaivec/_responses.py +511 -0
- openaivec/_schema/__init__.py +9 -0
- openaivec/_schema/infer.py +340 -0
- openaivec/_schema/spec.py +350 -0
- openaivec/_serialize.py +234 -0
- openaivec/{util.py → _util.py} +25 -85
- openaivec/pandas_ext.py +1635 -425
- openaivec/spark.py +604 -335
- openaivec/task/__init__.py +27 -29
- openaivec/task/customer_support/__init__.py +9 -15
- openaivec/task/customer_support/customer_sentiment.py +51 -41
- openaivec/task/customer_support/inquiry_classification.py +86 -61
- openaivec/task/customer_support/inquiry_summary.py +44 -45
- openaivec/task/customer_support/intent_analysis.py +56 -41
- openaivec/task/customer_support/response_suggestion.py +49 -43
- openaivec/task/customer_support/urgency_analysis.py +76 -71
- openaivec/task/nlp/__init__.py +4 -4
- openaivec/task/nlp/dependency_parsing.py +19 -20
- openaivec/task/nlp/keyword_extraction.py +22 -24
- openaivec/task/nlp/morphological_analysis.py +25 -25
- openaivec/task/nlp/named_entity_recognition.py +26 -28
- openaivec/task/nlp/sentiment_analysis.py +29 -21
- openaivec/task/nlp/translation.py +24 -30
- openaivec/task/table/__init__.py +3 -0
- openaivec/task/table/fillna.py +183 -0
- openaivec-1.0.10.dist-info/METADATA +399 -0
- openaivec-1.0.10.dist-info/RECORD +39 -0
- {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
- openaivec/embeddings.py +0 -172
- openaivec/responses.py +0 -392
- openaivec/serialize.py +0 -225
- openaivec/task/model.py +0 -84
- openaivec-0.10.0.dist-info/METADATA +0 -546
- openaivec-0.10.0.dist-info/RECORD +0 -29
- {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
openaivec/_responses.py
ADDED
|
@@ -0,0 +1,511 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from logging import Logger, getLogger
|
|
4
|
+
from typing import Generic, cast
|
|
5
|
+
|
|
6
|
+
from openai import AsyncOpenAI, BadRequestError, InternalServerError, OpenAI, RateLimitError
|
|
7
|
+
from openai.types.responses import ParsedResponse
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from openaivec._cache import AsyncBatchingMapProxy, BatchingMapProxy
|
|
11
|
+
from openaivec._log import observe
|
|
12
|
+
from openaivec._model import PreparedTask, ResponseFormat
|
|
13
|
+
from openaivec._util import backoff, backoff_async
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"BatchResponses",
|
|
17
|
+
"AsyncBatchResponses",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
_LOGGER: Logger = getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _handle_temperature_error(error: BadRequestError, model_name: str, temperature: float) -> None:
|
|
24
|
+
"""Handle temperature-related errors for reasoning models.
|
|
25
|
+
|
|
26
|
+
Detects when a model doesn't support temperature parameter and provides guidance.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
error (BadRequestError): The OpenAI API error.
|
|
30
|
+
model_name (str): The model that caused the error.
|
|
31
|
+
temperature (float): The temperature value that was rejected.
|
|
32
|
+
"""
|
|
33
|
+
error_message = str(error)
|
|
34
|
+
if "temperature" in error_message.lower() and "not supported" in error_message.lower():
|
|
35
|
+
guidance_message = (
|
|
36
|
+
f"🔧 Model '{model_name}' rejected temperature parameter (value: {temperature}). "
|
|
37
|
+
f"This typically happens with reasoning models (o1-preview, o1-mini, o3, etc.). "
|
|
38
|
+
f"To fix this, you MUST explicitly set temperature=None:\n"
|
|
39
|
+
f"• For pandas: df.col.ai.responses('prompt', temperature=None)\n"
|
|
40
|
+
f"• For Spark UDFs: responses_udf('prompt', temperature=None)\n"
|
|
41
|
+
f"• For direct API: BatchResponses.of(client, model, temperature=None)\n"
|
|
42
|
+
f"• Original error: {error_message}\n"
|
|
43
|
+
f"See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning"
|
|
44
|
+
)
|
|
45
|
+
warnings.warn(guidance_message, UserWarning, stacklevel=5)
|
|
46
|
+
|
|
47
|
+
# Re-raise with enhanced message
|
|
48
|
+
enhanced_message = f"{error_message}\n\nSUGGESTION: Set temperature=None to resolve this error."
|
|
49
|
+
raise BadRequestError(message=enhanced_message, response=error.response, body=error.body)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _vectorize_system_message(system_message: str) -> str:
|
|
53
|
+
"""Build a system prompt that instructs the model to work on batched inputs.
|
|
54
|
+
|
|
55
|
+
The returned XML‐ish prompt explains two things to the LLM:
|
|
56
|
+
|
|
57
|
+
1. The *general* system instruction coming from the caller (`system_message`)
|
|
58
|
+
is preserved verbatim.
|
|
59
|
+
2. Extra instructions describe how the model should treat the incoming JSON
|
|
60
|
+
that contains multiple user messages and how it must shape its output.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
system_message (str): Single instance system instruction the caller would
|
|
64
|
+
normally send to the model.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
str: Composite system prompt with embedded examples for the JSON‑mode
|
|
68
|
+
endpoint (to be supplied via the ``instructions=`` field).
|
|
69
|
+
"""
|
|
70
|
+
return f"""
|
|
71
|
+
<SystemMessage>
|
|
72
|
+
<ElementInstructions>
|
|
73
|
+
<Instruction>{system_message}</Instruction>
|
|
74
|
+
</ElementInstructions>
|
|
75
|
+
<BatchInstructions>
|
|
76
|
+
<Instruction>
|
|
77
|
+
You will receive multiple user messages at once.
|
|
78
|
+
Please provide an appropriate response to each message individually.
|
|
79
|
+
</Instruction>
|
|
80
|
+
</BatchInstructions>
|
|
81
|
+
<Examples>
|
|
82
|
+
<Example>
|
|
83
|
+
<Input>
|
|
84
|
+
{{
|
|
85
|
+
"user_messages": [
|
|
86
|
+
{{
|
|
87
|
+
"id": 1,
|
|
88
|
+
"body": "{{user_message_1}}"
|
|
89
|
+
}},
|
|
90
|
+
{{
|
|
91
|
+
"id": 2,
|
|
92
|
+
"body": "{{user_message_2}}"
|
|
93
|
+
}}
|
|
94
|
+
]
|
|
95
|
+
}}
|
|
96
|
+
</Input>
|
|
97
|
+
<Output>
|
|
98
|
+
{{
|
|
99
|
+
"assistant_messages": [
|
|
100
|
+
{{
|
|
101
|
+
"id": 1,
|
|
102
|
+
"body": "{{assistant_response_1}}"
|
|
103
|
+
}},
|
|
104
|
+
{{
|
|
105
|
+
"id": 2,
|
|
106
|
+
"body": "{{assistant_response_2}}"
|
|
107
|
+
}}
|
|
108
|
+
]
|
|
109
|
+
}}
|
|
110
|
+
</Output>
|
|
111
|
+
</Example>
|
|
112
|
+
</Examples>
|
|
113
|
+
</SystemMessage>
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class Message(BaseModel, Generic[ResponseFormat]):
|
|
118
|
+
id: int
|
|
119
|
+
body: ResponseFormat
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class Request(BaseModel):
|
|
123
|
+
user_messages: list[Message[str]]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class Response(BaseModel, Generic[ResponseFormat]):
|
|
127
|
+
assistant_messages: list[Message[ResponseFormat]]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True)
|
|
131
|
+
class BatchResponses(Generic[ResponseFormat]):
|
|
132
|
+
"""Stateless façade that turns OpenAI's JSON‑mode API into a batched API.
|
|
133
|
+
|
|
134
|
+
This wrapper allows you to submit *multiple* user prompts in one JSON‑mode
|
|
135
|
+
request and receive the answers in the original order.
|
|
136
|
+
|
|
137
|
+
Example:
|
|
138
|
+
```python
|
|
139
|
+
vector_llm = BatchResponses(
|
|
140
|
+
client=openai_client,
|
|
141
|
+
model_name="gpt‑4o‑mini",
|
|
142
|
+
system_message="You are a helpful assistant."
|
|
143
|
+
)
|
|
144
|
+
answers = vector_llm.parse(questions)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Attributes:
|
|
148
|
+
client (OpenAI): Initialised OpenAI client.
|
|
149
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
150
|
+
system_message (str): System prompt prepended to every request.
|
|
151
|
+
response_format (type[ResponseFormat]): Expected Pydantic model class or ``str`` for each assistant message.
|
|
152
|
+
cache (BatchingMapProxy[str, ResponseFormat]): Order‑preserving batching proxy with de‑duplication and caching.
|
|
153
|
+
|
|
154
|
+
Notes:
|
|
155
|
+
Internally the work is delegated to two helpers:
|
|
156
|
+
|
|
157
|
+
* ``_predict_chunk`` – fragments the workload and restores ordering.
|
|
158
|
+
* ``_request_llm`` – performs a single OpenAI API call.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
client: OpenAI
|
|
162
|
+
model_name: str # For Azure: deployment name, for OpenAI: model name
|
|
163
|
+
system_message: str
|
|
164
|
+
response_format: type[ResponseFormat] = str # type: ignore[assignment]
|
|
165
|
+
cache: BatchingMapProxy[str, ResponseFormat] = field(default_factory=lambda: BatchingMapProxy(batch_size=None))
|
|
166
|
+
api_kwargs: dict[str, int | float | str | bool] = field(default_factory=dict)
|
|
167
|
+
_vectorized_system_message: str = field(init=False)
|
|
168
|
+
_model_json_schema: dict = field(init=False)
|
|
169
|
+
|
|
170
|
+
@classmethod
|
|
171
|
+
def of(
|
|
172
|
+
cls,
|
|
173
|
+
client: OpenAI,
|
|
174
|
+
model_name: str,
|
|
175
|
+
system_message: str,
|
|
176
|
+
response_format: type[ResponseFormat] = str,
|
|
177
|
+
batch_size: int | None = None,
|
|
178
|
+
**api_kwargs,
|
|
179
|
+
) -> "BatchResponses":
|
|
180
|
+
"""Factory constructor.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
client (OpenAI): OpenAI client.
|
|
184
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
185
|
+
system_message (str): System prompt for the model.
|
|
186
|
+
response_format (type[ResponseFormat], optional): Expected output type. Defaults to ``str``.
|
|
187
|
+
batch_size (int | None, optional): Max unique prompts per API call. Defaults to None
|
|
188
|
+
(automatic batch size optimization). Set to a positive integer for fixed batch size.
|
|
189
|
+
**api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
BatchResponses: Configured instance backed by a batching proxy.
|
|
193
|
+
"""
|
|
194
|
+
return cls(
|
|
195
|
+
client=client,
|
|
196
|
+
model_name=model_name,
|
|
197
|
+
system_message=system_message,
|
|
198
|
+
response_format=response_format,
|
|
199
|
+
cache=BatchingMapProxy(batch_size=batch_size),
|
|
200
|
+
api_kwargs=api_kwargs,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def of_task(
|
|
205
|
+
cls,
|
|
206
|
+
client: OpenAI,
|
|
207
|
+
model_name: str,
|
|
208
|
+
task: PreparedTask[ResponseFormat],
|
|
209
|
+
batch_size: int | None = None,
|
|
210
|
+
**api_kwargs,
|
|
211
|
+
) -> "BatchResponses":
|
|
212
|
+
"""Factory from a PreparedTask.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
client (OpenAI): OpenAI client.
|
|
216
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
217
|
+
task (PreparedTask): Prepared task with instructions and response format.
|
|
218
|
+
batch_size (int | None, optional): Max unique prompts per API call. Defaults to None
|
|
219
|
+
(automatic batch size optimization). Set to a positive integer for fixed batch size.
|
|
220
|
+
**api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
BatchResponses: Configured instance backed by a batching proxy.
|
|
224
|
+
"""
|
|
225
|
+
return cls(
|
|
226
|
+
client=client,
|
|
227
|
+
model_name=model_name,
|
|
228
|
+
system_message=task.instructions,
|
|
229
|
+
response_format=task.response_format,
|
|
230
|
+
cache=BatchingMapProxy(batch_size=batch_size),
|
|
231
|
+
api_kwargs=api_kwargs,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def __post_init__(self):
|
|
235
|
+
object.__setattr__(
|
|
236
|
+
self,
|
|
237
|
+
"_vectorized_system_message",
|
|
238
|
+
_vectorize_system_message(self.system_message),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
@observe(_LOGGER)
|
|
242
|
+
@backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
|
|
243
|
+
def _request_llm(self, user_messages: list[Message[str]]) -> ParsedResponse[Response[ResponseFormat]]:
|
|
244
|
+
"""Make a single call to the OpenAI JSON‑mode endpoint.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
user_messages (list[Message[str]]): Sequence of ``Message[str]`` representing the
|
|
248
|
+
prompts for this minibatch. Each message carries a unique `id`
|
|
249
|
+
so we can restore ordering later.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
ParsedResponse[Response[ResponseFormat]]: Parsed response containing assistant messages (arbitrary order).
|
|
253
|
+
|
|
254
|
+
Raises:
|
|
255
|
+
openai.RateLimitError: Transparently re‑raised after the
|
|
256
|
+
exponential back‑off decorator exhausts all retries.
|
|
257
|
+
"""
|
|
258
|
+
response_format = self.response_format
|
|
259
|
+
|
|
260
|
+
class MessageT(BaseModel):
|
|
261
|
+
id: int
|
|
262
|
+
body: response_format # type: ignore
|
|
263
|
+
|
|
264
|
+
class ResponseT(BaseModel):
|
|
265
|
+
assistant_messages: list[MessageT]
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
response: ParsedResponse[ResponseT] = self.client.responses.parse(
|
|
269
|
+
instructions=self._vectorized_system_message,
|
|
270
|
+
model=self.model_name,
|
|
271
|
+
input=Request(user_messages=user_messages).model_dump_json(),
|
|
272
|
+
text_format=ResponseT,
|
|
273
|
+
**self.api_kwargs,
|
|
274
|
+
)
|
|
275
|
+
except BadRequestError as e:
|
|
276
|
+
_handle_temperature_error(e, self.model_name, self.api_kwargs.get("temperature", 0.0))
|
|
277
|
+
raise # Re-raise if it wasn't a temperature error
|
|
278
|
+
|
|
279
|
+
return cast(ParsedResponse[Response[ResponseFormat]], response)
|
|
280
|
+
|
|
281
|
+
@observe(_LOGGER)
|
|
282
|
+
def _predict_chunk(self, user_messages: list[str]) -> list[ResponseFormat | None]:
|
|
283
|
+
"""Helper executed for every unique minibatch.
|
|
284
|
+
|
|
285
|
+
This method:
|
|
286
|
+
1. Converts plain strings into `Message[str]` with stable indices.
|
|
287
|
+
2. Delegates the request to `_request_llm`.
|
|
288
|
+
3. Reorders the responses so they match the original indices.
|
|
289
|
+
|
|
290
|
+
The function is pure – it has no side‑effects and the result depends
|
|
291
|
+
only on its arguments – which allows safe reuse.
|
|
292
|
+
"""
|
|
293
|
+
messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
|
|
294
|
+
responses: ParsedResponse[Response[ResponseFormat]] = self._request_llm(messages)
|
|
295
|
+
if not responses.output_parsed:
|
|
296
|
+
return [None] * len(messages)
|
|
297
|
+
response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
|
|
298
|
+
sorted_responses: list[ResponseFormat | None] = [response_dict.get(m.id, None) for m in messages]
|
|
299
|
+
return sorted_responses
|
|
300
|
+
|
|
301
|
+
@observe(_LOGGER)
|
|
302
|
+
def parse(self, inputs: list[str]) -> list[ResponseFormat | None]:
|
|
303
|
+
"""Batched predict.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
inputs (list[str]): Prompts that require responses. Duplicates are de‑duplicated.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
list[ResponseFormat | None]: Assistant responses aligned to ``inputs``.
|
|
310
|
+
"""
|
|
311
|
+
return self.cache.map(inputs, self._predict_chunk) # type: ignore[return-value]
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
@dataclass(frozen=True)
|
|
315
|
+
class AsyncBatchResponses(Generic[ResponseFormat]):
|
|
316
|
+
"""Stateless façade that turns OpenAI's JSON-mode API into a batched API (Async version).
|
|
317
|
+
|
|
318
|
+
This wrapper allows you to submit *multiple* user prompts in one JSON-mode
|
|
319
|
+
request and receive the answers in the original order asynchronously. It also
|
|
320
|
+
controls the maximum number of concurrent requests to the OpenAI API.
|
|
321
|
+
|
|
322
|
+
Example:
|
|
323
|
+
```python
|
|
324
|
+
import asyncio
|
|
325
|
+
from openai import AsyncOpenAI
|
|
326
|
+
from openaivec import AsyncBatchResponses
|
|
327
|
+
|
|
328
|
+
openai_async_client = AsyncOpenAI() # initialize your client
|
|
329
|
+
|
|
330
|
+
vector_llm = AsyncBatchResponses.of(
|
|
331
|
+
client=openai_async_client,
|
|
332
|
+
model_name="gpt-4.1-mini",
|
|
333
|
+
system_message="You are a helpful assistant.",
|
|
334
|
+
batch_size=64,
|
|
335
|
+
max_concurrency=5,
|
|
336
|
+
)
|
|
337
|
+
questions = [
|
|
338
|
+
"What is the capital of France?",
|
|
339
|
+
"Explain quantum physics simply.",
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
async def main():
|
|
343
|
+
answers = await vector_llm.parse(questions)
|
|
344
|
+
print(answers)
|
|
345
|
+
|
|
346
|
+
asyncio.run(main())
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
Attributes:
|
|
350
|
+
client (AsyncOpenAI): Initialised OpenAI async client.
|
|
351
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
352
|
+
system_message (str): System prompt prepended to every request.
|
|
353
|
+
response_format (type[ResponseFormat]): Expected Pydantic model class or ``str`` for each assistant message.
|
|
354
|
+
cache (AsyncBatchingMapProxy[str, ResponseFormat]): Async batching proxy with de‑duplication
|
|
355
|
+
and concurrency control.
|
|
356
|
+
"""
|
|
357
|
+
|
|
358
|
+
client: AsyncOpenAI
|
|
359
|
+
model_name: str # For Azure: deployment name, for OpenAI: model name
|
|
360
|
+
system_message: str
|
|
361
|
+
response_format: type[ResponseFormat] = str # type: ignore[assignment]
|
|
362
|
+
cache: AsyncBatchingMapProxy[str, ResponseFormat] = field(
|
|
363
|
+
default_factory=lambda: AsyncBatchingMapProxy(batch_size=None, max_concurrency=8)
|
|
364
|
+
)
|
|
365
|
+
api_kwargs: dict[str, int | float | str | bool] = field(default_factory=dict)
|
|
366
|
+
_vectorized_system_message: str = field(init=False)
|
|
367
|
+
_model_json_schema: dict = field(init=False)
|
|
368
|
+
|
|
369
|
+
@classmethod
|
|
370
|
+
def of(
|
|
371
|
+
cls,
|
|
372
|
+
client: AsyncOpenAI,
|
|
373
|
+
model_name: str,
|
|
374
|
+
system_message: str,
|
|
375
|
+
response_format: type[ResponseFormat] = str,
|
|
376
|
+
batch_size: int | None = None,
|
|
377
|
+
max_concurrency: int = 8,
|
|
378
|
+
**api_kwargs,
|
|
379
|
+
) -> "AsyncBatchResponses":
|
|
380
|
+
"""Factory constructor.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
client (AsyncOpenAI): OpenAI async client.
|
|
384
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
385
|
+
system_message (str): System prompt.
|
|
386
|
+
response_format (type[ResponseFormat], optional): Expected output type. Defaults to ``str``.
|
|
387
|
+
batch_size (int | None, optional): Max unique prompts per API call. Defaults to None
|
|
388
|
+
(automatic batch size optimization). Set to a positive integer for fixed batch size.
|
|
389
|
+
max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
|
|
390
|
+
**api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
AsyncBatchResponses: Configured instance backed by an async batching proxy.
|
|
394
|
+
"""
|
|
395
|
+
return cls(
|
|
396
|
+
client=client,
|
|
397
|
+
model_name=model_name,
|
|
398
|
+
system_message=system_message,
|
|
399
|
+
response_format=response_format,
|
|
400
|
+
cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
|
|
401
|
+
api_kwargs=api_kwargs,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
@classmethod
|
|
405
|
+
def of_task(
|
|
406
|
+
cls,
|
|
407
|
+
client: AsyncOpenAI,
|
|
408
|
+
model_name: str,
|
|
409
|
+
task: PreparedTask[ResponseFormat],
|
|
410
|
+
batch_size: int | None = None,
|
|
411
|
+
max_concurrency: int = 8,
|
|
412
|
+
**api_kwargs,
|
|
413
|
+
) -> "AsyncBatchResponses":
|
|
414
|
+
"""Factory from a PreparedTask.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
client (AsyncOpenAI): OpenAI async client.
|
|
418
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
419
|
+
task (PreparedTask): Prepared task with instructions and response format.
|
|
420
|
+
batch_size (int | None, optional): Max unique prompts per API call. Defaults to None
|
|
421
|
+
(automatic batch size optimization). Set to a positive integer for fixed batch size.
|
|
422
|
+
max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
|
|
423
|
+
**api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
AsyncBatchResponses: Configured instance backed by an async batching proxy.
|
|
427
|
+
"""
|
|
428
|
+
return cls(
|
|
429
|
+
client=client,
|
|
430
|
+
model_name=model_name,
|
|
431
|
+
system_message=task.instructions,
|
|
432
|
+
response_format=task.response_format,
|
|
433
|
+
cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
|
|
434
|
+
api_kwargs=api_kwargs,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
def __post_init__(self):
|
|
438
|
+
object.__setattr__(
|
|
439
|
+
self,
|
|
440
|
+
"_vectorized_system_message",
|
|
441
|
+
_vectorize_system_message(self.system_message),
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
@backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
|
|
445
|
+
@observe(_LOGGER)
|
|
446
|
+
async def _request_llm(self, user_messages: list[Message[str]]) -> ParsedResponse[Response[ResponseFormat]]:
|
|
447
|
+
"""Make a single async call to the OpenAI JSON‑mode endpoint.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
user_messages (list[Message[str]]): Sequence of ``Message[str]`` representing the minibatch prompts.
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
ParsedResponse[Response[ResponseFormat]]: Parsed response with assistant messages (arbitrary order).
|
|
454
|
+
|
|
455
|
+
Raises:
|
|
456
|
+
RateLimitError: Re‑raised after back‑off retries are exhausted.
|
|
457
|
+
"""
|
|
458
|
+
response_format = self.response_format
|
|
459
|
+
|
|
460
|
+
class MessageT(BaseModel):
|
|
461
|
+
id: int
|
|
462
|
+
body: response_format # type: ignore
|
|
463
|
+
|
|
464
|
+
class ResponseT(BaseModel):
|
|
465
|
+
assistant_messages: list[MessageT]
|
|
466
|
+
|
|
467
|
+
try:
|
|
468
|
+
response: ParsedResponse[ResponseT] = await self.client.responses.parse(
|
|
469
|
+
instructions=self._vectorized_system_message,
|
|
470
|
+
model=self.model_name,
|
|
471
|
+
input=Request(user_messages=user_messages).model_dump_json(),
|
|
472
|
+
text_format=ResponseT,
|
|
473
|
+
**self.api_kwargs,
|
|
474
|
+
)
|
|
475
|
+
except BadRequestError as e:
|
|
476
|
+
_handle_temperature_error(e, self.model_name, self.api_kwargs.get("temperature", 0.0))
|
|
477
|
+
raise # Re-raise if it wasn't a temperature error
|
|
478
|
+
|
|
479
|
+
return cast(ParsedResponse[Response[ResponseFormat]], response)
|
|
480
|
+
|
|
481
|
+
@observe(_LOGGER)
|
|
482
|
+
async def _predict_chunk(self, user_messages: list[str]) -> list[ResponseFormat | None]:
|
|
483
|
+
"""Async helper executed for every unique minibatch.
|
|
484
|
+
|
|
485
|
+
This method:
|
|
486
|
+
1. Converts plain strings into `Message[str]` with stable indices.
|
|
487
|
+
2. Delegates the request to `_request_llm`.
|
|
488
|
+
3. Reorders the responses so they match the original indices.
|
|
489
|
+
|
|
490
|
+
The function is pure – it has no side‑effects and the result depends only on its arguments.
|
|
491
|
+
"""
|
|
492
|
+
messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
|
|
493
|
+
responses: ParsedResponse[Response[ResponseFormat]] = await self._request_llm(messages)
|
|
494
|
+
if not responses.output_parsed:
|
|
495
|
+
return [None] * len(messages)
|
|
496
|
+
response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
|
|
497
|
+
# Ensure proper handling for missing IDs - this shouldn't happen in normal operation
|
|
498
|
+
sorted_responses: list[ResponseFormat | None] = [response_dict.get(m.id, None) for m in messages]
|
|
499
|
+
return sorted_responses
|
|
500
|
+
|
|
501
|
+
@observe(_LOGGER)
|
|
502
|
+
async def parse(self, inputs: list[str]) -> list[ResponseFormat | None]:
|
|
503
|
+
"""Batched predict (async).
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
inputs (list[str]): Prompts that require responses. Duplicates are de‑duplicated.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
list[ResponseFormat | None]: Assistant responses aligned to ``inputs``.
|
|
510
|
+
"""
|
|
511
|
+
return await self.cache.map(inputs, self._predict_chunk) # type: ignore[return-value]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Schema inference package.
|
|
2
|
+
|
|
3
|
+
Internal helpers now live in :mod:`openaivec._schema.infer`; this module simply
|
|
4
|
+
re-exports the main entry points so ``from openaivec._schema import ...`` still
|
|
5
|
+
behaves the same."""
|
|
6
|
+
|
|
7
|
+
from .infer import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
|
|
8
|
+
|
|
9
|
+
__all__ = ["SchemaInferenceOutput", "SchemaInferenceInput", "SchemaInferer"]
|