openaivec 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/_proxy.py CHANGED
@@ -460,7 +460,20 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
460
460
  self.__process_owned(owned, map_func)
461
461
  self.__wait_for(wait_for, map_func)
462
462
 
463
- return self.__values(items)
463
+ # Fetch results before purging None entries
464
+ results = self.__values(items)
465
+
466
+ # Remove None values from cache so they are recomputed on future calls
467
+ with self._lock:
468
+ if self._cache: # micro-optimization
469
+ for k in set(items):
470
+ try:
471
+ if self._cache.get(k, object()) is None:
472
+ del self._cache[k]
473
+ except KeyError:
474
+ pass
475
+
476
+ return results
464
477
 
465
478
 
466
479
  @dataclass
@@ -745,4 +758,13 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
745
758
  await self.__process_owned(owned, map_func)
746
759
  await self.__wait_for(wait_for, map_func)
747
760
 
748
- return await self.__values(items)
761
+ results = await self.__values(items)
762
+
763
+ # Remove None values from cache after retrieval to avoid persisting incomplete results
764
+ async with self._lock:
765
+ if self._cache:
766
+ for k in set(items):
767
+ if self._cache.get(k, object()) is None:
768
+ self._cache.pop(k, None)
769
+
770
+ return results
openaivec/_responses.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import warnings
2
2
  from dataclasses import dataclass, field
3
3
  from logging import Logger, getLogger
4
- from typing import Generic, List, Type, cast
4
+ from typing import Any, Generic, List, Type, cast
5
5
 
6
6
  from openai import AsyncOpenAI, BadRequestError, InternalServerError, OpenAI, RateLimitError
7
7
  from openai.types.responses import ParsedResponse
@@ -163,7 +163,7 @@ class BatchResponses(Generic[ResponseFormat]):
163
163
  client: OpenAI
164
164
  model_name: str # For Azure: deployment name, for OpenAI: model name
165
165
  system_message: str
166
- temperature: float | None = 0.0
166
+ temperature: float | None = None
167
167
  top_p: float = 1.0
168
168
  response_format: Type[ResponseFormat] = str # type: ignore[assignment]
169
169
  cache: BatchingMapProxy[str, ResponseFormat] = field(default_factory=lambda: BatchingMapProxy(batch_size=None))
@@ -241,7 +241,9 @@ class BatchResponses(Generic[ResponseFormat]):
241
241
 
242
242
  @observe(_LOGGER)
243
243
  @backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
244
- def _request_llm(self, user_messages: List[Message[str]]) -> ParsedResponse[Response[ResponseFormat]]:
244
+ def _request_llm(
245
+ self, user_messages: List[Message[str]], **extra_api_params: Any
246
+ ) -> ParsedResponse[Response[ResponseFormat]]:
245
247
  """Make a single call to the OpenAI JSON‑mode endpoint.
246
248
 
247
249
  Args:
@@ -265,16 +267,29 @@ class BatchResponses(Generic[ResponseFormat]):
265
267
  class ResponseT(BaseModel):
266
268
  assistant_messages: List[MessageT]
267
269
 
268
- # Prepare API parameters, excluding temperature if None (for reasoning models)
269
- api_params = {
270
+ # Build base API parameters (cannot be overridden by caller)
271
+ api_params: dict[str, Any] = {
270
272
  "model": self.model_name,
271
273
  "instructions": self._vectorized_system_message,
272
274
  "input": Request(user_messages=user_messages).model_dump_json(),
273
- "top_p": self.top_p,
274
275
  "text_format": ResponseT,
275
276
  }
276
- if self.temperature is not None:
277
- api_params["temperature"] = self.temperature
277
+
278
+ # Resolve nucleus sampling (caller can override)
279
+ top_p = extra_api_params.pop("top_p", self.top_p)
280
+ if top_p is not None:
281
+ api_params["top_p"] = top_p
282
+
283
+ # Resolve temperature (caller can override). If None, omit entirely for reasoning models.
284
+ temperature = extra_api_params.pop("temperature", self.temperature)
285
+ if temperature is not None:
286
+ api_params["temperature"] = temperature
287
+
288
+ # Merge remaining user supplied params, excluding protected keys
289
+ for k, v in extra_api_params.items():
290
+ if k in {"model", "instructions", "input", "text_format"}:
291
+ continue # ignore attempts to override core batching contract
292
+ api_params[k] = v
278
293
 
279
294
  try:
280
295
  completion: ParsedResponse[ResponseT] = self.client.responses.parse(**api_params)
@@ -285,7 +300,7 @@ class BatchResponses(Generic[ResponseFormat]):
285
300
  return cast(ParsedResponse[Response[ResponseFormat]], completion)
286
301
 
287
302
  @observe(_LOGGER)
288
- def _predict_chunk(self, user_messages: List[str]) -> List[ResponseFormat | None]:
303
+ def _predict_chunk(self, user_messages: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
289
304
  """Helper executed for every unique minibatch.
290
305
 
291
306
  This method:
@@ -297,7 +312,7 @@ class BatchResponses(Generic[ResponseFormat]):
297
312
  only on its arguments – which allows safe reuse.
298
313
  """
299
314
  messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
300
- responses: ParsedResponse[Response[ResponseFormat]] = self._request_llm(messages)
315
+ responses: ParsedResponse[Response[ResponseFormat]] = self._request_llm(messages, **api_kwargs)
301
316
  if not responses.output_parsed:
302
317
  return [None] * len(messages)
303
318
  response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
@@ -305,17 +320,28 @@ class BatchResponses(Generic[ResponseFormat]):
305
320
  return sorted_responses
306
321
 
307
322
  @observe(_LOGGER)
308
- def parse(self, inputs: List[str]) -> List[ResponseFormat | None]:
323
+ def parse(self, inputs: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
309
324
  """Batched predict.
310
325
 
326
+ Accepts arbitrary keyword arguments that are forwarded to the underlying
327
+ ``OpenAI.responses.parse`` call for future‑proofing (e.g., ``max_output_tokens``,
328
+ penalties, etc.). ``top_p`` and ``temperature`` default to the instance's
329
+ configured values but can be overridden explicitly.
330
+
311
331
  Args:
312
332
  inputs (List[str]): Prompts that require responses. Duplicates are de‑duplicated.
333
+ **api_kwargs: Extra keyword args forwarded to the OpenAI Responses API.
313
334
 
314
335
  Returns:
315
336
  List[ResponseFormat | None]: Assistant responses aligned to ``inputs``.
316
337
  """
317
- result = self.cache.map(inputs, self._predict_chunk)
318
- return result # type: ignore[return-value]
338
+ if not api_kwargs:
339
+ return self.cache.map(inputs, self._predict_chunk) # type: ignore[return-value]
340
+
341
+ def _predict_with(xs: List[str]) -> List[ResponseFormat | None]:
342
+ return self._predict_chunk(xs, **api_kwargs)
343
+
344
+ return self.cache.map(inputs, _predict_with) # type: ignore[return-value]
319
345
 
320
346
 
321
347
  @dataclass(frozen=True)
@@ -382,7 +408,7 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
382
408
  client: AsyncOpenAI,
383
409
  model_name: str,
384
410
  system_message: str,
385
- temperature: float | None = 0.0,
411
+ temperature: float | None = None,
386
412
  top_p: float = 1.0,
387
413
  response_format: Type[ResponseFormat] = str,
388
414
  batch_size: int | None = None,
@@ -455,7 +481,9 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
455
481
 
456
482
  @backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
457
483
  @observe(_LOGGER)
458
- async def _request_llm(self, user_messages: List[Message[str]]) -> ParsedResponse[Response[ResponseFormat]]:
484
+ async def _request_llm(
485
+ self, user_messages: List[Message[str]], **extra_api_params: Any
486
+ ) -> ParsedResponse[Response[ResponseFormat]]:
459
487
  """Make a single async call to the OpenAI JSON‑mode endpoint.
460
488
 
461
489
  Args:
@@ -476,16 +504,29 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
476
504
  class ResponseT(BaseModel):
477
505
  assistant_messages: List[MessageT]
478
506
 
479
- # Prepare API parameters, excluding temperature if None (for reasoning models)
480
- api_params = {
507
+ # Build base API parameters (cannot be overridden by caller)
508
+ api_params: dict[str, Any] = {
481
509
  "model": self.model_name,
482
510
  "instructions": self._vectorized_system_message,
483
511
  "input": Request(user_messages=user_messages).model_dump_json(),
484
- "top_p": self.top_p,
485
512
  "text_format": ResponseT,
486
513
  }
487
- if self.temperature is not None:
488
- api_params["temperature"] = self.temperature
514
+
515
+ # Resolve nucleus sampling (caller can override)
516
+ top_p = extra_api_params.pop("top_p", self.top_p)
517
+ if top_p is not None:
518
+ api_params["top_p"] = top_p
519
+
520
+ # Resolve temperature (caller can override). If None, omit entirely for reasoning models.
521
+ temperature = extra_api_params.pop("temperature", self.temperature)
522
+ if temperature is not None:
523
+ api_params["temperature"] = temperature
524
+
525
+ # Merge remaining user supplied params, excluding protected keys
526
+ for k, v in extra_api_params.items():
527
+ if k in {"model", "instructions", "input", "text_format"}:
528
+ continue
529
+ api_params[k] = v
489
530
 
490
531
  try:
491
532
  completion: ParsedResponse[ResponseT] = await self.client.responses.parse(**api_params)
@@ -496,7 +537,7 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
496
537
  return cast(ParsedResponse[Response[ResponseFormat]], completion)
497
538
 
498
539
  @observe(_LOGGER)
499
- async def _predict_chunk(self, user_messages: List[str]) -> List[ResponseFormat | None]:
540
+ async def _predict_chunk(self, user_messages: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
500
541
  """Async helper executed for every unique minibatch.
501
542
 
502
543
  This method:
@@ -507,7 +548,7 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
507
548
  The function is pure – it has no side‑effects and the result depends only on its arguments.
508
549
  """
509
550
  messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
510
- responses: ParsedResponse[Response[ResponseFormat]] = await self._request_llm(messages) # type: ignore[call-issue]
551
+ responses: ParsedResponse[Response[ResponseFormat]] = await self._request_llm(messages, **api_kwargs) # type: ignore[call-issue]
511
552
  if not responses.output_parsed:
512
553
  return [None] * len(messages)
513
554
  response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
@@ -516,14 +557,25 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
516
557
  return sorted_responses
517
558
 
518
559
  @observe(_LOGGER)
519
- async def parse(self, inputs: List[str]) -> List[ResponseFormat | None]:
560
+ async def parse(self, inputs: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
520
561
  """Batched predict (async).
521
562
 
563
+ Accepts arbitrary keyword arguments forwarded to ``AsyncOpenAI.responses.parse``.
564
+ ``top_p`` and ``temperature`` default to instance configuration but can be
565
+ overridden per call. This prepares for future API parameters without
566
+ changing the public surface again.
567
+
522
568
  Args:
523
569
  inputs (List[str]): Prompts that require responses. Duplicates are de‑duplicated.
570
+ **api_kwargs: Extra keyword args for the OpenAI Responses API.
524
571
 
525
572
  Returns:
526
573
  List[ResponseFormat | None]: Assistant responses aligned to ``inputs``.
527
574
  """
528
- result = await self.cache.map(inputs, self._predict_chunk)
529
- return result # type: ignore[return-value]
575
+ if not api_kwargs:
576
+ return await self.cache.map(inputs, self._predict_chunk) # type: ignore[return-value]
577
+
578
+ async def _predict_with(xs: List[str]) -> List[ResponseFormat | None]:
579
+ return await self._predict_chunk(xs, **api_kwargs)
580
+
581
+ return await self.cache.map(inputs, _predict_with) # type: ignore[return-value]