openaivec 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
- from .embeddings import BatchEmbeddings, AsyncBatchEmbeddings
2
- from .responses import BatchResponses, AsyncBatchResponses
1
+ from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
2
+ from .responses import AsyncBatchResponses, BatchResponses
3
3
 
4
4
  __all__ = [
5
5
  "BatchResponses",
openaivec/di.py CHANGED
@@ -11,14 +11,14 @@ are created once and reused across multiple resolve calls.
11
11
  Example:
12
12
  ```python
13
13
  from openaivec.di import Container
14
-
14
+
15
15
  class DatabaseService:
16
16
  def __init__(self):
17
17
  self.connection = "database://localhost"
18
-
18
+
19
19
  container = Container()
20
20
  container.register(DatabaseService, lambda: DatabaseService())
21
-
21
+
22
22
  db1 = container.resolve(DatabaseService)
23
23
  db2 = container.resolve(DatabaseService)
24
24
  print(db1 is db2) # True - same instance
openaivec/embeddings.py CHANGED
@@ -6,9 +6,9 @@ import numpy as np
6
6
  from numpy.typing import NDArray
7
7
  from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
8
8
 
9
- from .log import observe
10
- from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
11
- from .util import backoff, backoff_async
9
+ from openaivec.log import observe
10
+ from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
11
+ from openaivec.util import backoff, backoff_async
12
12
 
13
13
  __all__ = [
14
14
  "BatchEmbeddings",
@@ -24,7 +24,8 @@ class BatchEmbeddings:
24
24
 
25
25
  Attributes:
26
26
  client (OpenAI): Configured OpenAI client.
27
- model_name (str): Model identifier (e.g., ``"text-embedding-3-small"``).
27
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
28
+ (e.g., ``"text-embedding-3-small"``).
28
29
  cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
29
30
  """
30
31
 
@@ -38,7 +39,7 @@ class BatchEmbeddings:
38
39
 
39
40
  Args:
40
41
  client (OpenAI): OpenAI client.
41
- model_name (str): Embeddings model name.
42
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
42
43
  batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
43
44
 
44
45
  Returns:
@@ -90,7 +91,7 @@ class AsyncBatchEmbeddings:
90
91
  import asyncio
91
92
  import numpy as np
92
93
  from openai import AsyncOpenAI
93
- from openaivec import AsyncBatchEmbeddings
94
+ from openaivec import AsyncBatchEmbeddings
94
95
 
95
96
  # Assuming openai_async_client is an initialized AsyncOpenAI client
96
97
  openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
@@ -119,7 +120,7 @@ class AsyncBatchEmbeddings:
119
120
 
120
121
  Attributes:
121
122
  client (AsyncOpenAI): Configured OpenAI async client.
122
- model_name (str): Embeddings model name.
123
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
123
124
  cache (AsyncBatchingMapProxy[str, NDArray[np.float32]]): Async batching proxy.
124
125
  """
125
126
 
@@ -141,7 +142,7 @@ class AsyncBatchEmbeddings:
141
142
 
142
143
  Args:
143
144
  client (AsyncOpenAI): OpenAI async client.
144
- model_name (str): Embeddings model name.
145
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
145
146
  batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
146
147
  max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
147
148
 
openaivec/model.py CHANGED
@@ -59,29 +59,65 @@ class PreparedTask:
59
59
 
60
60
  @dataclass(frozen=True)
61
61
  class ResponsesModelName:
62
+ """Container for responses model name configuration.
63
+
64
+ Attributes:
65
+ value (str): The model name for OpenAI responses API.
66
+ """
67
+
62
68
  value: str
63
69
 
64
70
 
65
71
  @dataclass(frozen=True)
66
72
  class EmbeddingsModelName:
73
+ """Container for embeddings model name configuration.
74
+
75
+ Attributes:
76
+ value (str): The model name for OpenAI embeddings API.
77
+ """
78
+
67
79
  value: str
68
80
 
69
81
 
70
82
  @dataclass(frozen=True)
71
83
  class OpenAIAPIKey:
84
+ """Container for OpenAI API key configuration.
85
+
86
+ Attributes:
87
+ value (str): The API key for OpenAI services.
88
+ """
89
+
72
90
  value: str
73
91
 
74
92
 
75
93
  @dataclass(frozen=True)
76
94
  class AzureOpenAIAPIKey:
95
+ """Container for Azure OpenAI API key configuration.
96
+
97
+ Attributes:
98
+ value (str): The API key for Azure OpenAI services.
99
+ """
100
+
77
101
  value: str
78
102
 
79
103
 
80
104
  @dataclass(frozen=True)
81
- class AzureOpenAIEndpoint:
105
+ class AzureOpenAIBaseURL:
106
+ """Container for Azure OpenAI base URL configuration.
107
+
108
+ Attributes:
109
+ value (str): The base URL for Azure OpenAI services.
110
+ """
111
+
82
112
  value: str
83
113
 
84
114
 
85
115
  @dataclass(frozen=True)
86
116
  class AzureOpenAIAPIVersion:
117
+ """Container for Azure OpenAI API version configuration.
118
+
119
+ Attributes:
120
+ value (str): The API version for Azure OpenAI services.
121
+ """
122
+
87
123
  value: str
openaivec/pandas_ext.py CHANGED
@@ -7,7 +7,7 @@ from openaivec import pandas_ext
7
7
 
8
8
  # Option 1: Use environment variables (automatic detection)
9
9
  # Set OPENAI_API_KEY or Azure OpenAI environment variables
10
- # (AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_ENDPOINT, AZURE_OPENAI_API_VERSION)
10
+ # (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
11
11
  # No explicit setup needed - clients are automatically created
12
12
 
13
13
  # Option 2: Use an existing OpenAI client instance
@@ -17,14 +17,18 @@ pandas_ext.use(client)
17
17
  # Option 3: Use an existing Azure OpenAI client instance
18
18
  azure_client = AzureOpenAI(
19
19
  api_key="your-azure-key",
20
- azure_endpoint="https://<your-resource-name>.services.ai.azure.com",
21
- api_version="2025-04-01-preview"
20
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
21
+ api_version="preview"
22
22
  )
23
23
  pandas_ext.use(azure_client)
24
24
 
25
- # Option 4: Use async clients
26
- async_client = AsyncOpenAI(api_key="your-api-key")
27
- pandas_ext.use_async(async_client)
25
+ # Option 4: Use async Azure OpenAI client instance
26
+ async_azure_client = AsyncAzureOpenAI(
27
+ api_key="your-azure-key",
28
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
29
+ api_version="preview"
30
+ )
31
+ pandas_ext.use_async(async_azure_client)
28
32
 
29
33
  # Set up model names (optional, defaults shown)
30
34
  pandas_ext.responses_model("gpt-4.1-mini")
@@ -46,12 +50,12 @@ import tiktoken
46
50
  from openai import AsyncOpenAI, OpenAI
47
51
  from pydantic import BaseModel
48
52
 
49
- from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
50
- from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
51
- from .provider import CONTAINER
52
- from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
53
- from .responses import AsyncBatchResponses, BatchResponses
54
- from .task.table import FillNaResponse, fillna
53
+ from openaivec.embeddings import AsyncBatchEmbeddings, BatchEmbeddings
54
+ from openaivec.model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
55
+ from openaivec.provider import CONTAINER, _check_azure_v1_api_url
56
+ from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
57
+ from openaivec.responses import AsyncBatchResponses, BatchResponses
58
+ from openaivec.task.table import FillNaResponse, fillna
55
59
 
56
60
  __all__ = [
57
61
  "use",
@@ -74,6 +78,10 @@ def use(client: OpenAI) -> None:
74
78
  `openai.AzureOpenAI` instance.
75
79
  The same instance is reused by every helper in this module.
76
80
  """
81
+ # Check Azure v1 API URL if using AzureOpenAI client
82
+ if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
83
+ _check_azure_v1_api_url(str(client.base_url))
84
+
77
85
  CONTAINER.register(OpenAI, lambda: client)
78
86
 
79
87
 
@@ -85,6 +93,10 @@ def use_async(client: AsyncOpenAI) -> None:
85
93
  `openai.AsyncAzureOpenAI` instance.
86
94
  The same instance is reused by every helper in this module.
87
95
  """
96
+ # Check Azure v1 API URL if using AsyncAzureOpenAI client
97
+ if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
98
+ _check_azure_v1_api_url(str(client.base_url))
99
+
88
100
  CONTAINER.register(AsyncOpenAI, lambda: client)
89
101
 
90
102
 
@@ -92,7 +104,7 @@ def responses_model(name: str) -> None:
92
104
  """Override the model used for text responses.
93
105
 
94
106
  Args:
95
- name (str): Model name as listed in the OpenAI API
107
+ name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
96
108
  (for example, ``gpt-4.1-mini``).
97
109
  """
98
110
  CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
@@ -102,7 +114,8 @@ def embeddings_model(name: str) -> None:
102
114
  """Override the model used for text embeddings.
103
115
 
104
116
  Args:
105
- name (str): Embedding model name, e.g. ``text-embedding-3-small``.
117
+ name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name,
118
+ e.g. ``text-embedding-3-small``.
106
119
  """
107
120
  CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
108
121
 
@@ -143,7 +156,7 @@ class OpenAIVecSeriesAccessor:
143
156
  instructions: str,
144
157
  cache: BatchingMapProxy[str, ResponseFormat],
145
158
  response_format: Type[ResponseFormat] = str,
146
- temperature: float = 0.0,
159
+ temperature: float | None = 0.0,
147
160
  top_p: float = 1.0,
148
161
  ) -> pd.Series:
149
162
  client: BatchResponses = BatchResponses(
@@ -205,15 +218,25 @@ class OpenAIVecSeriesAccessor:
205
218
  instructions: str,
206
219
  response_format: Type[ResponseFormat] = str,
207
220
  batch_size: int = 128,
208
- temperature: float = 0.0,
221
+ temperature: float | None = 0.0,
209
222
  top_p: float = 1.0,
223
+ show_progress: bool = False,
210
224
  ) -> pd.Series:
211
225
  """Call an LLM once for every Series element.
212
226
 
213
227
  Example:
214
228
  ```python
215
229
  animals = pd.Series(["cat", "dog", "elephant"])
230
+ # Basic usage
216
231
  animals.ai.responses("translate to French")
232
+
233
+ # With progress bar in Jupyter notebooks
234
+ large_series = pd.Series(["data"] * 1000)
235
+ large_series.ai.responses(
236
+ "analyze this data",
237
+ batch_size=32,
238
+ show_progress=True
239
+ )
217
240
  ```
218
241
  This method returns a Series of strings, each containing the
219
242
  assistant's response to the corresponding input.
@@ -228,13 +251,14 @@ class OpenAIVecSeriesAccessor:
228
251
  request. Defaults to ``128``.
229
252
  temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
230
253
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
254
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
231
255
 
232
256
  Returns:
233
257
  pandas.Series: Series whose values are instances of ``response_format``.
234
258
  """
235
259
  return self.responses_with_cache(
236
260
  instructions=instructions,
237
- cache=BatchingMapProxy(batch_size=batch_size),
261
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
238
262
  response_format=response_format,
239
263
  temperature=temperature,
240
264
  top_p=top_p,
@@ -287,7 +311,7 @@ class OpenAIVecSeriesAccessor:
287
311
  )
288
312
  return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
289
313
 
290
- def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
314
+ def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
291
315
  """Execute a prepared task on every Series element.
292
316
 
293
317
  This method applies a pre-configured task to each element in the Series,
@@ -302,7 +326,16 @@ class OpenAIVecSeriesAccessor:
302
326
  sentiment_task = PreparedTask(...)
303
327
 
304
328
  reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
329
+ # Basic usage
305
330
  results = reviews.ai.task(sentiment_task)
331
+
332
+ # With progress bar for large datasets
333
+ large_reviews = pd.Series(["review text"] * 2000)
334
+ results = large_reviews.ai.task(
335
+ sentiment_task,
336
+ batch_size=50,
337
+ show_progress=True
338
+ )
306
339
  ```
307
340
  This method returns a Series containing the task results for each
308
341
  corresponding input element, following the task's defined structure.
@@ -312,6 +345,7 @@ class OpenAIVecSeriesAccessor:
312
345
  response format, and other parameters for processing the inputs.
313
346
  batch_size (int, optional): Number of prompts grouped into a single
314
347
  request to optimize API usage. Defaults to 128.
348
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
315
349
 
316
350
  Returns:
317
351
  pandas.Series: Series whose values are instances of the task's
@@ -319,16 +353,24 @@ class OpenAIVecSeriesAccessor:
319
353
  """
320
354
  return self.task_with_cache(
321
355
  task=task,
322
- cache=BatchingMapProxy(batch_size=batch_size),
356
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
323
357
  )
324
358
 
325
- def embeddings(self, batch_size: int = 128) -> pd.Series:
359
+ def embeddings(self, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
326
360
  """Compute OpenAI embeddings for every Series element.
327
361
 
328
362
  Example:
329
363
  ```python
330
364
  animals = pd.Series(["cat", "dog", "elephant"])
365
+ # Basic usage
331
366
  animals.ai.embeddings()
367
+
368
+ # With progress bar for large datasets
369
+ large_texts = pd.Series(["text"] * 5000)
370
+ embeddings = large_texts.ai.embeddings(
371
+ batch_size=100,
372
+ show_progress=True
373
+ )
332
374
  ```
333
375
  This method returns a Series of numpy arrays, each containing the
334
376
  embedding vector for the corresponding input.
@@ -338,13 +380,14 @@ class OpenAIVecSeriesAccessor:
338
380
  Args:
339
381
  batch_size (int, optional): Number of inputs grouped into a
340
382
  single request. Defaults to ``128``.
383
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
341
384
 
342
385
  Returns:
343
386
  pandas.Series: Series whose values are ``np.ndarray`` objects
344
387
  (dtype ``float32``).
345
388
  """
346
389
  return self.embeddings_with_cache(
347
- cache=BatchingMapProxy(batch_size=batch_size),
390
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
348
391
  )
349
392
 
350
393
  def count_tokens(self) -> pd.Series:
@@ -438,7 +481,7 @@ class OpenAIVecDataFrameAccessor:
438
481
  instructions: str,
439
482
  cache: BatchingMapProxy[str, ResponseFormat],
440
483
  response_format: Type[ResponseFormat] = str,
441
- temperature: float = 0.0,
484
+ temperature: float | None = 0.0,
442
485
  top_p: float = 1.0,
443
486
  ) -> pd.Series:
444
487
  """Generate a response for each row after serialising it to JSON using a provided cache.
@@ -496,8 +539,9 @@ class OpenAIVecDataFrameAccessor:
496
539
  instructions: str,
497
540
  response_format: Type[ResponseFormat] = str,
498
541
  batch_size: int = 128,
499
- temperature: float = 0.0,
542
+ temperature: float | None = 0.0,
500
543
  top_p: float = 1.0,
544
+ show_progress: bool = False,
501
545
  ) -> pd.Series:
502
546
  """Generate a response for each row after serialising it to JSON.
503
547
 
@@ -508,7 +552,16 @@ class OpenAIVecDataFrameAccessor:
508
552
  {"name": "dog", "legs": 4},
509
553
  {"name": "elephant", "legs": 4},
510
554
  ])
555
+ # Basic usage
511
556
  df.ai.responses("what is the animal's name?")
557
+
558
+ # With progress bar for large datasets
559
+ large_df = pd.DataFrame({"id": list(range(1000))})
560
+ large_df.ai.responses(
561
+ "generate a name for this ID",
562
+ batch_size=20,
563
+ show_progress=True
564
+ )
512
565
  ```
513
566
  This method returns a Series of strings, each containing the
514
567
  assistant's response to the corresponding input.
@@ -524,19 +577,20 @@ class OpenAIVecDataFrameAccessor:
524
577
  Defaults to ``128``.
525
578
  temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
526
579
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
580
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
527
581
 
528
582
  Returns:
529
583
  pandas.Series: Responses aligned with the DataFrame's original index.
530
584
  """
531
585
  return self.responses_with_cache(
532
586
  instructions=instructions,
533
- cache=BatchingMapProxy(batch_size=batch_size),
587
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
534
588
  response_format=response_format,
535
589
  temperature=temperature,
536
590
  top_p=top_p,
537
591
  )
538
592
 
539
- def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
593
+ def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
540
594
  """Execute a prepared task on each DataFrame row after serialising it to JSON.
541
595
 
542
596
  This method applies a pre-configured task to each row in the DataFrame,
@@ -566,6 +620,7 @@ class OpenAIVecDataFrameAccessor:
566
620
  response format, and other parameters for processing the inputs.
567
621
  batch_size (int, optional): Number of requests sent in one batch
568
622
  to optimize API usage. Defaults to 128.
623
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
569
624
 
570
625
  Returns:
571
626
  pandas.Series: Series whose values are instances of the task's
@@ -575,7 +630,7 @@ class OpenAIVecDataFrameAccessor:
575
630
  lambda df: (
576
631
  df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
577
632
  .map(lambda x: json.dumps(x, ensure_ascii=False))
578
- .ai.task(task=task, batch_size=batch_size)
633
+ .ai.task(task=task, batch_size=batch_size, show_progress=show_progress)
579
634
  )
580
635
  )
581
636
 
@@ -681,7 +736,7 @@ class AsyncOpenAIVecSeriesAccessor:
681
736
  instructions: str,
682
737
  cache: AsyncBatchingMapProxy[str, ResponseFormat],
683
738
  response_format: Type[ResponseFormat] = str,
684
- temperature: float = 0.0,
739
+ temperature: float | None = 0.0,
685
740
  top_p: float = 1.0,
686
741
  ) -> pd.Series:
687
742
  """Call an LLM once for every Series element using a provided cache (asynchronously).
@@ -848,9 +903,10 @@ class AsyncOpenAIVecSeriesAccessor:
848
903
  instructions: str,
849
904
  response_format: Type[ResponseFormat] = str,
850
905
  batch_size: int = 128,
851
- temperature: float = 0.0,
906
+ temperature: float | None = 0.0,
852
907
  top_p: float = 1.0,
853
908
  max_concurrency: int = 8,
909
+ show_progress: bool = False,
854
910
  ) -> pd.Series:
855
911
  """Call an LLM once for every Series element (asynchronously).
856
912
 
@@ -859,6 +915,15 @@ class AsyncOpenAIVecSeriesAccessor:
859
915
  animals = pd.Series(["cat", "dog", "elephant"])
860
916
  # Must be awaited
861
917
  results = await animals.aio.responses("translate to French")
918
+
919
+ # With progress bar for large datasets
920
+ large_series = pd.Series(["data"] * 1000)
921
+ results = await large_series.aio.responses(
922
+ "analyze this data",
923
+ batch_size=32,
924
+ max_concurrency=4,
925
+ show_progress=True
926
+ )
862
927
  ```
863
928
  This method returns a Series of strings, each containing the
864
929
  assistant's response to the corresponding input.
@@ -875,6 +940,7 @@ class AsyncOpenAIVecSeriesAccessor:
875
940
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
876
941
  max_concurrency (int, optional): Maximum number of concurrent
877
942
  requests. Defaults to ``8``.
943
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
878
944
 
879
945
  Returns:
880
946
  pandas.Series: Series whose values are instances of ``response_format``.
@@ -884,13 +950,17 @@ class AsyncOpenAIVecSeriesAccessor:
884
950
  """
885
951
  return await self.responses_with_cache(
886
952
  instructions=instructions,
887
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
953
+ cache=AsyncBatchingMapProxy(
954
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
955
+ ),
888
956
  response_format=response_format,
889
957
  temperature=temperature,
890
958
  top_p=top_p,
891
959
  )
892
960
 
893
- async def embeddings(self, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
961
+ async def embeddings(
962
+ self, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
963
+ ) -> pd.Series:
894
964
  """Compute OpenAI embeddings for every Series element (asynchronously).
895
965
 
896
966
  Example:
@@ -898,6 +968,14 @@ class AsyncOpenAIVecSeriesAccessor:
898
968
  animals = pd.Series(["cat", "dog", "elephant"])
899
969
  # Must be awaited
900
970
  embeddings = await animals.aio.embeddings()
971
+
972
+ # With progress bar for large datasets
973
+ large_texts = pd.Series(["text"] * 5000)
974
+ embeddings = await large_texts.aio.embeddings(
975
+ batch_size=100,
976
+ max_concurrency=4,
977
+ show_progress=True
978
+ )
901
979
  ```
902
980
  This method returns a Series of numpy arrays, each containing the
903
981
  embedding vector for the corresponding input.
@@ -909,6 +987,7 @@ class AsyncOpenAIVecSeriesAccessor:
909
987
  single request. Defaults to ``128``.
910
988
  max_concurrency (int, optional): Maximum number of concurrent
911
989
  requests. Defaults to ``8``.
990
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
912
991
 
913
992
  Returns:
914
993
  pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -918,10 +997,14 @@ class AsyncOpenAIVecSeriesAccessor:
918
997
  This is an asynchronous method and must be awaited.
919
998
  """
920
999
  return await self.embeddings_with_cache(
921
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
1000
+ cache=AsyncBatchingMapProxy(
1001
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
1002
+ ),
922
1003
  )
923
1004
 
924
- async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
1005
+ async def task(
1006
+ self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
1007
+ ) -> pd.Series:
925
1008
  """Execute a prepared task on every Series element (asynchronously).
926
1009
 
927
1010
  This method applies a pre-configured task to each element in the Series,
@@ -938,6 +1021,15 @@ class AsyncOpenAIVecSeriesAccessor:
938
1021
  reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
939
1022
  # Must be awaited
940
1023
  results = await reviews.aio.task(sentiment_task)
1024
+
1025
+ # With progress bar for large datasets
1026
+ large_reviews = pd.Series(["review text"] * 2000)
1027
+ results = await large_reviews.aio.task(
1028
+ sentiment_task,
1029
+ batch_size=50,
1030
+ max_concurrency=4,
1031
+ show_progress=True
1032
+ )
941
1033
  ```
942
1034
  This method returns a Series containing the task results for each
943
1035
  corresponding input element, following the task's defined structure.
@@ -949,6 +1041,7 @@ class AsyncOpenAIVecSeriesAccessor:
949
1041
  request to optimize API usage. Defaults to 128.
950
1042
  max_concurrency (int, optional): Maximum number of concurrent
951
1043
  requests. Defaults to 8.
1044
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
952
1045
 
953
1046
  Returns:
954
1047
  pandas.Series: Series whose values are instances of the task's
@@ -959,7 +1052,9 @@ class AsyncOpenAIVecSeriesAccessor:
959
1052
  """
960
1053
  return await self.task_with_cache(
961
1054
  task=task,
962
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
1055
+ cache=AsyncBatchingMapProxy(
1056
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
1057
+ ),
963
1058
  )
964
1059
 
965
1060
 
@@ -975,7 +1070,7 @@ class AsyncOpenAIVecDataFrameAccessor:
975
1070
  instructions: str,
976
1071
  cache: AsyncBatchingMapProxy[str, ResponseFormat],
977
1072
  response_format: Type[ResponseFormat] = str,
978
- temperature: float = 0.0,
1073
+ temperature: float | None = 0.0,
979
1074
  top_p: float = 1.0,
980
1075
  ) -> pd.Series:
981
1076
  """Generate a response for each row after serialising it to JSON using a provided cache (asynchronously).
@@ -1040,9 +1135,10 @@ class AsyncOpenAIVecDataFrameAccessor:
1040
1135
  instructions: str,
1041
1136
  response_format: Type[ResponseFormat] = str,
1042
1137
  batch_size: int = 128,
1043
- temperature: float = 0.0,
1138
+ temperature: float | None = 0.0,
1044
1139
  top_p: float = 1.0,
1045
1140
  max_concurrency: int = 8,
1141
+ show_progress: bool = False,
1046
1142
  ) -> pd.Series:
1047
1143
  """Generate a response for each row after serialising it to JSON (asynchronously).
1048
1144
 
@@ -1055,6 +1151,15 @@ class AsyncOpenAIVecDataFrameAccessor:
1055
1151
  ])
1056
1152
  # Must be awaited
1057
1153
  results = await df.aio.responses(\"what is the animal\'s name?\")
1154
+
1155
+ # With progress bar for large datasets
1156
+ large_df = pd.DataFrame({\"id\": list(range(1000))})
1157
+ results = await large_df.aio.responses(
1158
+ \"generate a name for this ID\",
1159
+ batch_size=20,
1160
+ max_concurrency=4,
1161
+ show_progress=True
1162
+ )
1058
1163
  ```
1059
1164
  This method returns a Series of strings, each containing the
1060
1165
  assistant's response to the corresponding input.
@@ -1072,6 +1177,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1072
1177
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
1073
1178
  max_concurrency (int, optional): Maximum number of concurrent
1074
1179
  requests. Defaults to ``8``.
1180
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1075
1181
 
1076
1182
  Returns:
1077
1183
  pandas.Series: Responses aligned with the DataFrame's original index.
@@ -1081,13 +1187,17 @@ class AsyncOpenAIVecDataFrameAccessor:
1081
1187
  """
1082
1188
  return await self.responses_with_cache(
1083
1189
  instructions=instructions,
1084
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
1190
+ cache=AsyncBatchingMapProxy(
1191
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
1192
+ ),
1085
1193
  response_format=response_format,
1086
1194
  temperature=temperature,
1087
1195
  top_p=top_p,
1088
1196
  )
1089
1197
 
1090
- async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
1198
+ async def task(
1199
+ self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
1200
+ ) -> pd.Series:
1091
1201
  """Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
1092
1202
 
1093
1203
  This method applies a pre-configured task to each row in the DataFrame,
@@ -1109,6 +1219,15 @@ class AsyncOpenAIVecDataFrameAccessor:
1109
1219
  ])
1110
1220
  # Must be awaited
1111
1221
  results = await df.aio.task(analysis_task)
1222
+
1223
+ # With progress bar for large datasets
1224
+ large_df = pd.DataFrame({"id": list(range(1000))})
1225
+ results = await large_df.aio.task(
1226
+ analysis_task,
1227
+ batch_size=50,
1228
+ max_concurrency=4,
1229
+ show_progress=True
1230
+ )
1112
1231
  ```
1113
1232
  This method returns a Series containing the task results for each
1114
1233
  corresponding row, following the task's defined structure.
@@ -1120,6 +1239,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1120
1239
  to optimize API usage. Defaults to 128.
1121
1240
  max_concurrency (int, optional): Maximum number of concurrent
1122
1241
  requests. Defaults to 8.
1242
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1123
1243
 
1124
1244
  Returns:
1125
1245
  pandas.Series: Series whose values are instances of the task's
@@ -1140,6 +1260,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1140
1260
  task=task,
1141
1261
  batch_size=batch_size,
1142
1262
  max_concurrency=max_concurrency,
1263
+ show_progress=show_progress,
1143
1264
  )
1144
1265
 
1145
1266
  async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T: