openaivec 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/pandas_ext.py CHANGED
@@ -74,6 +74,21 @@ __all__ = [
74
74
  _LOGGER = logging.getLogger(__name__)
75
75
 
76
76
 
77
+ # ---------------------------------------------------------------------------
78
+ # Internal helpers (not exported)
79
+ # ---------------------------------------------------------------------------
80
+ def _df_rows_to_json_series(df: pd.DataFrame) -> pd.Series:
81
+ """Return a Series of JSON strings (UTF-8, no ASCII escaping) representing DataFrame rows.
82
+
83
+ Each element is the JSON serialisation of the corresponding row as a dict. Index and
84
+ name are preserved so downstream operations retain alignment. This consolidates the
85
+ previously duplicated inline pipeline used by responses*/task* DataFrame helpers.
86
+ """
87
+ return pd.Series(df.to_dict(orient="records"), index=df.index, name="record").map(
88
+ lambda x: json.dumps(x, ensure_ascii=False)
89
+ )
90
+
91
+
77
92
  T = TypeVar("T") # For pipe function return type
78
93
 
79
94
 
@@ -165,6 +180,7 @@ class OpenAIVecSeriesAccessor:
165
180
  response_format: Type[ResponseFormat] = str,
166
181
  temperature: float | None = 0.0,
167
182
  top_p: float = 1.0,
183
+ **api_kwargs,
168
184
  ) -> pd.Series:
169
185
  client: BatchResponses = BatchResponses(
170
186
  client=CONTAINER.resolve(OpenAI),
@@ -176,7 +192,8 @@ class OpenAIVecSeriesAccessor:
176
192
  top_p=top_p,
177
193
  )
178
194
 
179
- return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
195
+ # Forward any extra kwargs to the underlying Responses API.
196
+ return pd.Series(client.parse(self._obj.tolist(), **api_kwargs), index=self._obj.index, name=self._obj.name)
180
197
 
181
198
  def embeddings_with_cache(
182
199
  self,
@@ -229,6 +246,7 @@ class OpenAIVecSeriesAccessor:
229
246
  temperature: float | None = 0.0,
230
247
  top_p: float = 1.0,
231
248
  show_progress: bool = False,
249
+ **api_kwargs,
232
250
  ) -> pd.Series:
233
251
  """Call an LLM once for every Series element.
234
252
 
@@ -246,10 +264,6 @@ class OpenAIVecSeriesAccessor:
246
264
  show_progress=True
247
265
  )
248
266
  ```
249
- This method returns a Series of strings, each containing the
250
- assistant's response to the corresponding input.
251
- The model used is set by the `responses_model` function.
252
- The default model is `gpt-4.1-mini`.
253
267
 
254
268
  Args:
255
269
  instructions (str): System prompt prepended to every user message.
@@ -271,46 +285,41 @@ class OpenAIVecSeriesAccessor:
271
285
  response_format=response_format,
272
286
  temperature=temperature,
273
287
  top_p=top_p,
288
+ **api_kwargs,
274
289
  )
275
290
 
276
291
  def task_with_cache(
277
292
  self,
278
293
  task: PreparedTask[ResponseFormat],
279
294
  cache: BatchingMapProxy[str, ResponseFormat],
295
+ **api_kwargs,
280
296
  ) -> pd.Series:
281
297
  """Execute a prepared task on every Series element using a provided cache.
282
298
 
283
- This method allows external control over caching behavior by accepting
284
- a pre-configured BatchingMapProxy instance, enabling cache sharing
285
- across multiple operations or custom batch size management.
299
+ This mirrors ``responses_with_cache`` but uses the task's stored instructions,
300
+ response format, temperature and top_p. A supplied ``BatchingMapProxy`` enables
301
+ cross‑operation deduplicated reuse and external batch size / progress control.
286
302
 
287
303
  Args:
288
- task (PreparedTask): A pre-configured task containing instructions,
289
- response format, and other parameters for processing the inputs.
290
- cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
291
- instance for managing API call batching and deduplication.
292
- Set cache.batch_size=None to enable automatic batch size optimization.
304
+ task (PreparedTask): Prepared task (instructions + response_format + sampling params).
305
+ cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
306
+
307
+ Additional Keyword Args:
308
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
309
+ ``seed``, etc.) forwarded verbatim to the underlying client. Core routing keys
310
+ (``model``, system instructions, user input) are managed internally and cannot be overridden.
293
311
 
294
312
  Returns:
295
- pandas.Series: Series whose values are instances of the task's
296
- response format, aligned with the original Series index.
313
+ pandas.Series: Task results aligned with the original Series index.
297
314
 
298
315
  Example:
299
316
  ```python
300
- from openaivec._model import PreparedTask
301
317
  from openaivec._proxy import BatchingMapProxy
302
-
303
- # Create a shared cache with custom batch size
304
318
  shared_cache = BatchingMapProxy(batch_size=64)
305
-
306
- # Assume you have a prepared task for sentiment analysis
307
- sentiment_task = PreparedTask(...)
308
-
309
- reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
310
- results = reviews.ai.task_with_cache(sentiment_task, cache=shared_cache)
319
+ reviews.ai.task_with_cache(sentiment_task, cache=shared_cache)
311
320
  ```
312
321
  """
313
- client = BatchResponses(
322
+ client: BatchResponses = BatchResponses(
314
323
  client=CONTAINER.resolve(OpenAI),
315
324
  model_name=CONTAINER.resolve(ResponsesModelName).value,
316
325
  system_message=task.instructions,
@@ -319,15 +328,17 @@ class OpenAIVecSeriesAccessor:
319
328
  temperature=task.temperature,
320
329
  top_p=task.top_p,
321
330
  )
322
- return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
331
+ return pd.Series(client.parse(self._obj.tolist(), **api_kwargs), index=self._obj.index, name=self._obj.name)
323
332
 
324
- def task(self, task: PreparedTask, batch_size: int | None = None, show_progress: bool = False) -> pd.Series:
333
+ def task(
334
+ self,
335
+ task: PreparedTask,
336
+ batch_size: int | None = None,
337
+ show_progress: bool = False,
338
+ **api_kwargs,
339
+ ) -> pd.Series:
325
340
  """Execute a prepared task on every Series element.
326
341
 
327
- This method applies a pre-configured task to each element in the Series,
328
- using the task's instructions and response format to generate structured
329
- responses from the language model.
330
-
331
342
  Example:
332
343
  ```python
333
344
  from openaivec._model import PreparedTask
@@ -347,8 +358,6 @@ class OpenAIVecSeriesAccessor:
347
358
  show_progress=True
348
359
  )
349
360
  ```
350
- This method returns a Series containing the task results for each
351
- corresponding input element, following the task's defined structure.
352
361
 
353
362
  Args:
354
363
  task (PreparedTask): A pre-configured task containing instructions,
@@ -358,13 +367,19 @@ class OpenAIVecSeriesAccessor:
358
367
  optimization based on execution time). Set to a positive integer for fixed batch size.
359
368
  show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
360
369
 
370
+ Additional Keyword Args:
371
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
372
+ ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
373
+ keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
374
+ library and cannot be overridden.
375
+
361
376
  Returns:
362
- pandas.Series: Series whose values are instances of the task's
363
- response format, aligned with the original Series index.
377
+ pandas.Series: Series whose values are instances of the task's response format.
364
378
  """
365
379
  return self.task_with_cache(
366
380
  task=task,
367
381
  cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
382
+ **api_kwargs,
368
383
  )
369
384
 
370
385
  def embeddings(self, batch_size: int | None = None, show_progress: bool = False) -> pd.Series:
@@ -383,10 +398,6 @@ class OpenAIVecSeriesAccessor:
383
398
  show_progress=True
384
399
  )
385
400
  ```
386
- This method returns a Series of numpy arrays, each containing the
387
- embedding vector for the corresponding input.
388
- The embedding model is set by the `embeddings_model` function.
389
- The default embedding model is `text-embedding-3-small`.
390
401
 
391
402
  Args:
392
403
  batch_size (int | None, optional): Number of inputs grouped into a
@@ -495,6 +506,7 @@ class OpenAIVecDataFrameAccessor:
495
506
  response_format: Type[ResponseFormat] = str,
496
507
  temperature: float | None = 0.0,
497
508
  top_p: float = 1.0,
509
+ **api_kwargs,
498
510
  ) -> pd.Series:
499
511
  """Generate a response for each row after serialising it to JSON using a provided cache.
500
512
 
@@ -533,18 +545,13 @@ class OpenAIVecDataFrameAccessor:
533
545
  )
534
546
  ```
535
547
  """
536
- return self._obj.pipe(
537
- lambda df: (
538
- df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
539
- .map(lambda x: json.dumps(x, ensure_ascii=False))
540
- .ai.responses_with_cache(
541
- instructions=instructions,
542
- cache=cache,
543
- response_format=response_format,
544
- temperature=temperature,
545
- top_p=top_p,
546
- )
547
- )
548
+ return _df_rows_to_json_series(self._obj).ai.responses_with_cache(
549
+ instructions=instructions,
550
+ cache=cache,
551
+ response_format=response_format,
552
+ temperature=temperature,
553
+ top_p=top_p,
554
+ **api_kwargs,
548
555
  )
549
556
 
550
557
  def responses(
@@ -555,6 +562,7 @@ class OpenAIVecDataFrameAccessor:
555
562
  temperature: float | None = 0.0,
556
563
  top_p: float = 1.0,
557
564
  show_progress: bool = False,
565
+ **api_kwargs,
558
566
  ) -> pd.Series:
559
567
  """Generate a response for each row after serialising it to JSON.
560
568
 
@@ -576,11 +584,6 @@ class OpenAIVecDataFrameAccessor:
576
584
  show_progress=True
577
585
  )
578
586
  ```
579
- This method returns a Series of strings, each containing the
580
- assistant's response to the corresponding input.
581
- Each row is serialised to JSON before being sent to the assistant.
582
- The model used is set by the `responses_model` function.
583
- The default model is `gpt-4.1-mini`.
584
587
 
585
588
  Args:
586
589
  instructions (str): System prompt for the assistant.
@@ -602,16 +605,18 @@ class OpenAIVecDataFrameAccessor:
602
605
  response_format=response_format,
603
606
  temperature=temperature,
604
607
  top_p=top_p,
608
+ **api_kwargs,
605
609
  )
606
610
 
607
- def task(self, task: PreparedTask, batch_size: int | None = None, show_progress: bool = False) -> pd.Series:
611
+ def task(
612
+ self,
613
+ task: PreparedTask,
614
+ batch_size: int | None = None,
615
+ show_progress: bool = False,
616
+ **api_kwargs,
617
+ ) -> pd.Series:
608
618
  """Execute a prepared task on each DataFrame row after serialising it to JSON.
609
619
 
610
- This method applies a pre-configured task to each row in the DataFrame,
611
- using the task's instructions and response format to generate structured
612
- responses from the language model. Each row is serialised to JSON before
613
- being processed by the task.
614
-
615
620
  Example:
616
621
  ```python
617
622
  from openaivec._model import PreparedTask
@@ -624,10 +629,17 @@ class OpenAIVecDataFrameAccessor:
624
629
  {"name": "dog", "legs": 4},
625
630
  {"name": "elephant", "legs": 4},
626
631
  ])
632
+ # Basic usage
627
633
  results = df.ai.task(analysis_task)
634
+
635
+ # With progress bar for large datasets
636
+ large_df = pd.DataFrame({"id": list(range(1000))})
637
+ results = large_df.ai.task(
638
+ analysis_task,
639
+ batch_size=50,
640
+ show_progress=True
641
+ )
628
642
  ```
629
- This method returns a Series containing the task results for each
630
- corresponding row, following the task's defined structure.
631
643
 
632
644
  Args:
633
645
  task (PreparedTask): A pre-configured task containing instructions,
@@ -637,19 +649,56 @@ class OpenAIVecDataFrameAccessor:
637
649
  optimization based on execution time). Set to a positive integer for fixed batch size.
638
650
  show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
639
651
 
652
+ Additional Keyword Args:
653
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
654
+ ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
655
+ keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
656
+ library and cannot be overridden.
657
+
640
658
  Returns:
641
659
  pandas.Series: Series whose values are instances of the task's
642
660
  response format, aligned with the DataFrame's original index.
643
661
  """
644
- return self._obj.pipe(
645
- lambda df: (
646
- df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
647
- .map(lambda x: json.dumps(x, ensure_ascii=False))
648
- .ai.task(task=task, batch_size=batch_size, show_progress=show_progress)
649
- )
662
+ return _df_rows_to_json_series(self._obj).ai.task(
663
+ task=task,
664
+ batch_size=batch_size,
665
+ show_progress=show_progress,
666
+ **api_kwargs,
667
+ )
668
+
669
+ def task_with_cache(
670
+ self,
671
+ task: PreparedTask[ResponseFormat],
672
+ cache: BatchingMapProxy[str, ResponseFormat],
673
+ **api_kwargs,
674
+ ) -> pd.Series:
675
+ """Execute a prepared task on each DataFrame row after serializing it to JSON using a provided cache.
676
+
677
+ Args:
678
+ task (PreparedTask): Prepared task (instructions + response_format + sampling params).
679
+ cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
680
+
681
+ Additional Keyword Args:
682
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
683
+ ``seed``) forwarded verbatim. Core routing keys are managed internally.
684
+
685
+ Returns:
686
+ pandas.Series: Task results aligned with the DataFrame's original index.
687
+ """
688
+ return _df_rows_to_json_series(self._obj).ai.task_with_cache(
689
+ task=task,
690
+ cache=cache,
691
+ **api_kwargs,
650
692
  )
651
693
 
652
- def fillna(self, target_column_name: str, max_examples: int = 500, batch_size: int | None = None) -> pd.DataFrame:
694
+ def fillna(
695
+ self,
696
+ target_column_name: str,
697
+ max_examples: int = 500,
698
+ batch_size: int | None = None,
699
+ show_progress: bool = False,
700
+ **api_kwargs,
701
+ ) -> pd.DataFrame:
653
702
  """Fill missing values in a DataFrame column using AI-powered inference.
654
703
 
655
704
  This method uses machine learning to intelligently fill missing (NaN) values
@@ -666,6 +715,11 @@ class OpenAIVecDataFrameAccessor:
666
715
  batch_size (int | None, optional): Number of requests sent in one batch
667
716
  to optimize API usage. Defaults to ``None`` (automatic batch size
668
717
  optimization based on execution time). Set to a positive integer for fixed batch size.
718
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
719
+
720
+ Additional Keyword Args:
721
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
722
+ ``seed``, etc.) are forwarded verbatim to the underlying task execution.
669
723
 
670
724
  Returns:
671
725
  pandas.DataFrame: A new DataFrame with missing values filled in the target
@@ -681,6 +735,10 @@ class OpenAIVecDataFrameAccessor:
681
735
 
682
736
  # Fill missing values in the 'name' column
683
737
  filled_df = df.ai.fillna('name')
738
+
739
+ # With progress bar for large datasets
740
+ large_df = pd.DataFrame({'name': [None] * 1000, 'age': list(range(1000))})
741
+ filled_df = large_df.ai.fillna('name', batch_size=32, show_progress=True)
684
742
  ```
685
743
 
686
744
  Note:
@@ -693,7 +751,9 @@ class OpenAIVecDataFrameAccessor:
693
751
  if missing_rows.empty:
694
752
  return self._obj
695
753
 
696
- filled_values: List[FillNaResponse] = missing_rows.ai.task(task=task, batch_size=batch_size)
754
+ filled_values: List[FillNaResponse] = missing_rows.ai.task(
755
+ task=task, batch_size=batch_size, show_progress=show_progress, **api_kwargs
756
+ )
697
757
 
698
758
  # get deep copy of the DataFrame to avoid modifying the original
699
759
  df = self._obj.copy()
@@ -754,6 +814,7 @@ class AsyncOpenAIVecSeriesAccessor:
754
814
  response_format: Type[ResponseFormat] = str,
755
815
  temperature: float | None = 0.0,
756
816
  top_p: float = 1.0,
817
+ **api_kwargs,
757
818
  ) -> pd.Series:
758
819
  """Call an LLM once for every Series element using a provided cache (asynchronously).
759
820
 
@@ -769,24 +830,24 @@ class AsyncOpenAIVecSeriesAccessor:
769
830
  Set cache.batch_size=None to enable automatic batch size optimization.
770
831
  response_format (Type[ResponseFormat], optional): Pydantic model or built‑in
771
832
  type the assistant should return. Defaults to ``str``.
772
- temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
833
+ temperature (float | None, optional): Sampling temperature. ``None`` omits the
834
+ parameter (recommended for reasoning models). Defaults to ``0.0``.
773
835
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
836
+ **api_kwargs: Additional keyword arguments forwarded verbatim to
837
+ ``AsyncOpenAI.responses.parse`` (e.g. ``max_output_tokens``, penalties,
838
+ future parameters). Core batching keys (model, instructions, input,
839
+ text_format) are protected and silently ignored if provided.
774
840
 
775
841
  Returns:
776
842
  pandas.Series: Series whose values are instances of ``response_format``.
777
843
 
778
844
  Example:
779
845
  ```python
780
- from openaivec._proxy import AsyncBatchingMapProxy
781
-
782
- # Create a shared cache with custom batch size and concurrency
783
- shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
784
-
785
- animals = pd.Series(["cat", "dog", "elephant"])
786
- # Must be awaited
787
- result = await animals.aio.responses_with_cache(
788
- "translate to French",
789
- cache=shared_cache
846
+ result = await series.aio.responses_with_cache(
847
+ "classify",
848
+ cache=shared,
849
+ max_output_tokens=256,
850
+ frequency_penalty=0.2,
790
851
  )
791
852
  ```
792
853
 
@@ -802,9 +863,7 @@ class AsyncOpenAIVecSeriesAccessor:
802
863
  temperature=temperature,
803
864
  top_p=top_p,
804
865
  )
805
- # Await the async operation
806
- results = await client.parse(self._obj.tolist())
807
-
866
+ results = await client.parse(self._obj.tolist(), **api_kwargs)
808
867
  return pd.Series(results, index=self._obj.index, name=self._obj.name)
809
868
 
810
869
  async def embeddings_with_cache(
@@ -864,6 +923,7 @@ class AsyncOpenAIVecSeriesAccessor:
864
923
  self,
865
924
  task: PreparedTask[ResponseFormat],
866
925
  cache: AsyncBatchingMapProxy[str, ResponseFormat],
926
+ **api_kwargs,
867
927
  ) -> pd.Series:
868
928
  """Execute a prepared task on every Series element using a provided cache (asynchronously).
869
929
 
@@ -879,6 +939,12 @@ class AsyncOpenAIVecSeriesAccessor:
879
939
  instance for managing API call batching and deduplication.
880
940
  Set cache.batch_size=None to enable automatic batch size optimization.
881
941
 
942
+ Additional Keyword Args:
943
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
944
+ ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
945
+ keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
946
+ library and cannot be overridden.
947
+
882
948
  Returns:
883
949
  pandas.Series: Series whose values are instances of the task's
884
950
  response format, aligned with the original Series index.
@@ -911,9 +977,8 @@ class AsyncOpenAIVecSeriesAccessor:
911
977
  temperature=task.temperature,
912
978
  top_p=task.top_p,
913
979
  )
914
-
915
980
  # Await the async operation
916
- results = await client.parse(self._obj.tolist())
981
+ results = await client.parse(self._obj.tolist(), **api_kwargs)
917
982
 
918
983
  return pd.Series(results, index=self._obj.index, name=self._obj.name)
919
984
 
@@ -926,6 +991,7 @@ class AsyncOpenAIVecSeriesAccessor:
926
991
  top_p: float = 1.0,
927
992
  max_concurrency: int = 8,
928
993
  show_progress: bool = False,
994
+ **api_kwargs,
929
995
  ) -> pd.Series:
930
996
  """Call an LLM once for every Series element (asynchronously).
931
997
 
@@ -944,10 +1010,6 @@ class AsyncOpenAIVecSeriesAccessor:
944
1010
  show_progress=True
945
1011
  )
946
1012
  ```
947
- This method returns a Series of strings, each containing the
948
- assistant's response to the corresponding input.
949
- The model used is set by the `responses_model` function.
950
- The default model is `gpt-4.1-mini`.
951
1013
 
952
1014
  Args:
953
1015
  instructions (str): System prompt prepended to every user message.
@@ -976,6 +1038,7 @@ class AsyncOpenAIVecSeriesAccessor:
976
1038
  response_format=response_format,
977
1039
  temperature=temperature,
978
1040
  top_p=top_p,
1041
+ **api_kwargs,
979
1042
  )
980
1043
 
981
1044
  async def embeddings(
@@ -997,10 +1060,6 @@ class AsyncOpenAIVecSeriesAccessor:
997
1060
  show_progress=True
998
1061
  )
999
1062
  ```
1000
- This method returns a Series of numpy arrays, each containing the
1001
- embedding vector for the corresponding input.
1002
- The embedding model is set by the `embeddings_model` function.
1003
- The default embedding model is `text-embedding-3-small`.
1004
1063
 
1005
1064
  Args:
1006
1065
  batch_size (int | None, optional): Number of inputs grouped into a
@@ -1024,14 +1083,15 @@ class AsyncOpenAIVecSeriesAccessor:
1024
1083
  )
1025
1084
 
1026
1085
  async def task(
1027
- self, task: PreparedTask, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = False
1086
+ self,
1087
+ task: PreparedTask,
1088
+ batch_size: int | None = None,
1089
+ max_concurrency: int = 8,
1090
+ show_progress: bool = False,
1091
+ **api_kwargs,
1028
1092
  ) -> pd.Series:
1029
1093
  """Execute a prepared task on every Series element (asynchronously).
1030
1094
 
1031
- This method applies a pre-configured task to each element in the Series,
1032
- using the task's instructions and response format to generate structured
1033
- responses from the language model.
1034
-
1035
1095
  Example:
1036
1096
  ```python
1037
1097
  from openaivec._model import PreparedTask
@@ -1052,8 +1112,6 @@ class AsyncOpenAIVecSeriesAccessor:
1052
1112
  show_progress=True
1053
1113
  )
1054
1114
  ```
1055
- This method returns a Series containing the task results for each
1056
- corresponding input element, following the task's defined structure.
1057
1115
 
1058
1116
  Args:
1059
1117
  task (PreparedTask): A pre-configured task containing instructions,
@@ -1065,6 +1123,12 @@ class AsyncOpenAIVecSeriesAccessor:
1065
1123
  requests. Defaults to 8.
1066
1124
  show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1067
1125
 
1126
+ Additional Keyword Args:
1127
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
1128
+ ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
1129
+ keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
1130
+ library and cannot be overridden.
1131
+
1068
1132
  Returns:
1069
1133
  pandas.Series: Series whose values are instances of the task's
1070
1134
  response format, aligned with the original Series index.
@@ -1077,6 +1141,7 @@ class AsyncOpenAIVecSeriesAccessor:
1077
1141
  cache=AsyncBatchingMapProxy(
1078
1142
  batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
1079
1143
  ),
1144
+ **api_kwargs,
1080
1145
  )
1081
1146
 
1082
1147
 
@@ -1094,6 +1159,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1094
1159
  response_format: Type[ResponseFormat] = str,
1095
1160
  temperature: float | None = 0.0,
1096
1161
  top_p: float = 1.0,
1162
+ **api_kwargs,
1097
1163
  ) -> pd.Series:
1098
1164
  """Generate a response for each row after serialising it to JSON using a provided cache (asynchronously).
1099
1165
 
@@ -1137,20 +1203,14 @@ class AsyncOpenAIVecDataFrameAccessor:
1137
1203
  Note:
1138
1204
  This is an asynchronous method and must be awaited.
1139
1205
  """
1140
- series_of_json = self._obj.pipe(
1141
- lambda df: (
1142
- pd.Series(df.to_dict(orient="records"), index=df.index, name="record").map(
1143
- lambda x: json.dumps(x, ensure_ascii=False)
1144
- )
1145
- )
1146
- )
1147
1206
  # Await the call to the async Series method using .aio
1148
- return await series_of_json.aio.responses_with_cache(
1207
+ return await _df_rows_to_json_series(self._obj).aio.responses_with_cache(
1149
1208
  instructions=instructions,
1150
1209
  cache=cache,
1151
1210
  response_format=response_format,
1152
1211
  temperature=temperature,
1153
1212
  top_p=top_p,
1213
+ **api_kwargs,
1154
1214
  )
1155
1215
 
1156
1216
  async def responses(
@@ -1162,33 +1222,29 @@ class AsyncOpenAIVecDataFrameAccessor:
1162
1222
  top_p: float = 1.0,
1163
1223
  max_concurrency: int = 8,
1164
1224
  show_progress: bool = False,
1225
+ **api_kwargs,
1165
1226
  ) -> pd.Series:
1166
1227
  """Generate a response for each row after serialising it to JSON (asynchronously).
1167
1228
 
1168
1229
  Example:
1169
1230
  ```python
1170
1231
  df = pd.DataFrame([
1171
- {\"name\": \"cat\", \"legs\": 4},
1172
- {\"name\": \"dog\", \"legs\": 4},
1173
- {\"name\": \"elephant\", \"legs\": 4},
1232
+ {"name": "cat", "legs": 4},
1233
+ {"name": "dog", "legs": 4},
1234
+ {"name": "elephant", "legs": 4},
1174
1235
  ])
1175
1236
  # Must be awaited
1176
- results = await df.aio.responses(\"what is the animal\'s name?\")
1237
+ results = await df.aio.responses("what is the animal's name?")
1177
1238
 
1178
1239
  # With progress bar for large datasets
1179
- large_df = pd.DataFrame({\"id\": list(range(1000))})
1240
+ large_df = pd.DataFrame({"id": list(range(1000))})
1180
1241
  results = await large_df.aio.responses(
1181
- \"generate a name for this ID\",
1242
+ "generate a name for this ID",
1182
1243
  batch_size=20,
1183
1244
  max_concurrency=4,
1184
1245
  show_progress=True
1185
1246
  )
1186
1247
  ```
1187
- This method returns a Series of strings, each containing the
1188
- assistant's response to the corresponding input.
1189
- Each row is serialised to JSON before being sent to the assistant.
1190
- The model used is set by the `responses_model` function.
1191
- The default model is `gpt-4.1-mini`.
1192
1248
 
1193
1249
  Args:
1194
1250
  instructions (str): System prompt for the assistant.
@@ -1217,18 +1273,19 @@ class AsyncOpenAIVecDataFrameAccessor:
1217
1273
  response_format=response_format,
1218
1274
  temperature=temperature,
1219
1275
  top_p=top_p,
1276
+ **api_kwargs,
1220
1277
  )
1221
1278
 
1222
1279
  async def task(
1223
- self, task: PreparedTask, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = False
1280
+ self,
1281
+ task: PreparedTask,
1282
+ batch_size: int | None = None,
1283
+ max_concurrency: int = 8,
1284
+ show_progress: bool = False,
1285
+ **api_kwargs,
1224
1286
  ) -> pd.Series:
1225
1287
  """Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
1226
1288
 
1227
- This method applies a pre-configured task to each row in the DataFrame,
1228
- using the task's instructions and response format to generate structured
1229
- responses from the language model. Each row is serialised to JSON before
1230
- being processed by the task.
1231
-
1232
1289
  Example:
1233
1290
  ```python
1234
1291
  from openaivec._model import PreparedTask
@@ -1253,8 +1310,6 @@ class AsyncOpenAIVecDataFrameAccessor:
1253
1310
  show_progress=True
1254
1311
  )
1255
1312
  ```
1256
- This method returns a Series containing the task results for each
1257
- corresponding row, following the task's defined structure.
1258
1313
 
1259
1314
  Args:
1260
1315
  task (PreparedTask): A pre-configured task containing instructions,
@@ -1266,6 +1321,12 @@ class AsyncOpenAIVecDataFrameAccessor:
1266
1321
  requests. Defaults to 8.
1267
1322
  show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1268
1323
 
1324
+ Additional Keyword Args:
1325
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
1326
+ ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
1327
+ keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
1328
+ library and cannot be overridden.
1329
+
1269
1330
  Returns:
1270
1331
  pandas.Series: Series whose values are instances of the task's
1271
1332
  response format, aligned with the DataFrame's original index.
@@ -1273,19 +1334,40 @@ class AsyncOpenAIVecDataFrameAccessor:
1273
1334
  Note:
1274
1335
  This is an asynchronous method and must be awaited.
1275
1336
  """
1276
- series_of_json = self._obj.pipe(
1277
- lambda df: (
1278
- pd.Series(df.to_dict(orient="records"), index=df.index, name="record").map(
1279
- lambda x: json.dumps(x, ensure_ascii=False)
1280
- )
1281
- )
1282
- )
1283
1337
  # Await the call to the async Series method using .aio
1284
- return await series_of_json.aio.task(
1338
+ return await _df_rows_to_json_series(self._obj).aio.task(
1285
1339
  task=task,
1286
1340
  batch_size=batch_size,
1287
1341
  max_concurrency=max_concurrency,
1288
1342
  show_progress=show_progress,
1343
+ **api_kwargs,
1344
+ )
1345
+
1346
+ async def task_with_cache(
1347
+ self,
1348
+ task: PreparedTask[ResponseFormat],
1349
+ cache: AsyncBatchingMapProxy[str, ResponseFormat],
1350
+ **api_kwargs,
1351
+ ) -> pd.Series:
1352
+ """Execute a prepared task on each DataFrame row after serializing it to JSON using a provided cache (async).
1353
+
1354
+ Args:
1355
+ task (PreparedTask): Prepared task (instructions + response_format + sampling params).
1356
+ cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre‑configured async cache instance.
1357
+
1358
+ Additional Keyword Args:
1359
+ Arbitrary OpenAI Responses API parameters forwarded verbatim. Core routing keys are protected.
1360
+
1361
+ Returns:
1362
+ pandas.Series: Task results aligned with the DataFrame's original index.
1363
+
1364
+ Note:
1365
+ This is an asynchronous method and must be awaited.
1366
+ """
1367
+ return await _df_rows_to_json_series(self._obj).aio.task_with_cache(
1368
+ task=task,
1369
+ cache=cache,
1370
+ **api_kwargs,
1289
1371
  )
1290
1372
 
1291
1373
  async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:
@@ -1371,7 +1453,13 @@ class AsyncOpenAIVecDataFrameAccessor:
1371
1453
  return df_current
1372
1454
 
1373
1455
  async def fillna(
1374
- self, target_column_name: str, max_examples: int = 500, batch_size: int | None = None, max_concurrency: int = 8
1456
+ self,
1457
+ target_column_name: str,
1458
+ max_examples: int = 500,
1459
+ batch_size: int | None = None,
1460
+ max_concurrency: int = 8,
1461
+ show_progress: bool = False,
1462
+ **api_kwargs,
1375
1463
  ) -> pd.DataFrame:
1376
1464
  """Fill missing values in a DataFrame column using AI-powered inference (asynchronously).
1377
1465
 
@@ -1391,6 +1479,11 @@ class AsyncOpenAIVecDataFrameAccessor:
1391
1479
  optimization based on execution time). Set to a positive integer for fixed batch size.
1392
1480
  max_concurrency (int, optional): Maximum number of concurrent
1393
1481
  requests. Defaults to 8.
1482
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1483
+
1484
+ Additional Keyword Args:
1485
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
1486
+ ``seed``, etc.) are forwarded verbatim to the underlying task execution.
1394
1487
 
1395
1488
  Returns:
1396
1489
  pandas.DataFrame: A new DataFrame with missing values filled in the target
@@ -1406,6 +1499,15 @@ class AsyncOpenAIVecDataFrameAccessor:
1406
1499
 
1407
1500
  # Fill missing values in the 'name' column (must be awaited)
1408
1501
  filled_df = await df.aio.fillna('name')
1502
+
1503
+ # With progress bar for large datasets
1504
+ large_df = pd.DataFrame({'name': [None] * 1000, 'age': list(range(1000))})
1505
+ filled_df = await large_df.aio.fillna(
1506
+ 'name',
1507
+ batch_size=32,
1508
+ max_concurrency=4,
1509
+ show_progress=True
1510
+ )
1409
1511
  ```
1410
1512
 
1411
1513
  Note:
@@ -1420,7 +1522,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1420
1522
  return self._obj
1421
1523
 
1422
1524
  filled_values: List[FillNaResponse] = await missing_rows.aio.task(
1423
- task=task, batch_size=batch_size, max_concurrency=max_concurrency
1525
+ task=task, batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress, **api_kwargs
1424
1526
  )
1425
1527
 
1426
1528
  # get deep copy of the DataFrame to avoid modifying the original