PyPI - openaivec - Versions diffs - 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl - Mend

openaivec 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

openaivec/__init__.py +2 -2
openaivec/di.py +3 -3
openaivec/embeddings.py +9 -8
openaivec/model.py +37 -1
openaivec/pandas_ext.py +158 -37
openaivec/prompt.py +34 -13
openaivec/provider.py +70 -18
openaivec/proxy.py +166 -28
openaivec/responses.py +83 -34
openaivec/serialize.py +1 -1
openaivec/spark.py +23 -22
openaivec/task/customer_support/__init__.py +6 -12
openaivec/task/customer_support/customer_sentiment.py +12 -4
openaivec/task/customer_support/inquiry_classification.py +11 -4
openaivec/task/customer_support/inquiry_summary.py +8 -3
openaivec/task/customer_support/intent_analysis.py +10 -4
openaivec/task/customer_support/response_suggestion.py +10 -4
openaivec/task/customer_support/urgency_analysis.py +8 -3
openaivec/task/nlp/__init__.py +3 -3
openaivec/task/nlp/dependency_parsing.py +4 -2
openaivec/task/nlp/keyword_extraction.py +3 -2
openaivec/task/nlp/morphological_analysis.py +4 -2
openaivec/task/nlp/named_entity_recognition.py +4 -2
openaivec/task/nlp/sentiment_analysis.py +7 -2
openaivec/task/nlp/translation.py +1 -1
openaivec/task/table/__init__.py +1 -1
openaivec/task/table/fillna.py +4 -3
openaivec/util.py +0 -1
{openaivec-0.13.1.dist-info → openaivec-0.13.3.dist-info}/METADATA +42 -8
openaivec-0.13.3.dist-info/RECORD +34 -0
openaivec-0.13.1.dist-info/RECORD +0 -34
{openaivec-0.13.1.dist-info → openaivec-0.13.3.dist-info}/WHEEL +0 -0
{openaivec-0.13.1.dist-info → openaivec-0.13.3.dist-info}/licenses/LICENSE +0 -0

openaivec/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from .embeddings import BatchEmbeddings, AsyncBatchEmbeddings
-from .responses import BatchResponses, AsyncBatchResponses
+from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
+from .responses import AsyncBatchResponses, BatchResponses
 __all__ = [
     "BatchResponses",

openaivec/di.py CHANGED Viewed

@@ -11,14 +11,14 @@ are created once and reused across multiple resolve calls.
 Example:
     ```python
     from openaivec.di import Container
     class DatabaseService:
         def __init__(self):
             self.connection = "database://localhost"
     container = Container()
     container.register(DatabaseService, lambda: DatabaseService())
     db1 = container.resolve(DatabaseService)
     db2 = container.resolve(DatabaseService)
     print(db1 is db2)  # True - same instance

openaivec/embeddings.py CHANGED Viewed

@@ -6,9 +6,9 @@ import numpy as np
 from numpy.typing import NDArray
 from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
-from .log import observe
-from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
-from .util import backoff, backoff_async
+from openaivec.log import observe
+from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
+from openaivec.util import backoff, backoff_async
 __all__ = [
     "BatchEmbeddings",
@@ -24,7 +24,8 @@ class BatchEmbeddings:
     Attributes:
         client (OpenAI): Configured OpenAI client.
-        model_name (str): Model identifier (e.g., ``"text-embedding-3-small"``).
+        model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
+            (e.g., ``"text-embedding-3-small"``).
         cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
     """
@@ -38,7 +39,7 @@ class BatchEmbeddings:
         Args:
             client (OpenAI): OpenAI client.
-            model_name (str): Embeddings model name.
+            model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
             batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
         Returns:
@@ -90,7 +91,7 @@ class AsyncBatchEmbeddings:
         import asyncio
         import numpy as np
         from openai import AsyncOpenAI
-    from openaivec import AsyncBatchEmbeddings
+        from openaivec import AsyncBatchEmbeddings
         # Assuming openai_async_client is an initialized AsyncOpenAI client
         openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
@@ -119,7 +120,7 @@ class AsyncBatchEmbeddings:
     Attributes:
         client (AsyncOpenAI): Configured OpenAI async client.
-        model_name (str): Embeddings model name.
+        model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
         cache (AsyncBatchingMapProxy[str, NDArray[np.float32]]): Async batching proxy.
     """
@@ -141,7 +142,7 @@ class AsyncBatchEmbeddings:
         Args:
             client (AsyncOpenAI): OpenAI async client.
-            model_name (str): Embeddings model name.
+            model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
             batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
             max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.

openaivec/model.py CHANGED Viewed

@@ -59,29 +59,65 @@ class PreparedTask:
 @dataclass(frozen=True)
 class ResponsesModelName:
+    """Container for responses model name configuration.
+    Attributes:
+        value (str): The model name for OpenAI responses API.
+    """
     value: str
 @dataclass(frozen=True)
 class EmbeddingsModelName:
+    """Container for embeddings model name configuration.
+    Attributes:
+        value (str): The model name for OpenAI embeddings API.
+    """
     value: str
 @dataclass(frozen=True)
 class OpenAIAPIKey:
+    """Container for OpenAI API key configuration.
+    Attributes:
+        value (str): The API key for OpenAI services.
+    """
     value: str
 @dataclass(frozen=True)
 class AzureOpenAIAPIKey:
+    """Container for Azure OpenAI API key configuration.
+    Attributes:
+        value (str): The API key for Azure OpenAI services.
+    """
     value: str
 @dataclass(frozen=True)
-class AzureOpenAIEndpoint:
+class AzureOpenAIBaseURL:
+    """Container for Azure OpenAI base URL configuration.
+    Attributes:
+        value (str): The base URL for Azure OpenAI services.
+    """
     value: str
 @dataclass(frozen=True)
 class AzureOpenAIAPIVersion:
+    """Container for Azure OpenAI API version configuration.
+    Attributes:
+        value (str): The API version for Azure OpenAI services.
+    """
     value: str

openaivec/pandas_ext.py CHANGED Viewed

@@ -7,7 +7,7 @@ from openaivec import pandas_ext
 # Option 1: Use environment variables (automatic detection)
 # Set OPENAI_API_KEY or Azure OpenAI environment variables
-# (AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_ENDPOINT, AZURE_OPENAI_API_VERSION)
+# (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
 # No explicit setup needed - clients are automatically created
 # Option 2: Use an existing OpenAI client instance
@@ -17,14 +17,18 @@ pandas_ext.use(client)
 # Option 3: Use an existing Azure OpenAI client instance
 azure_client = AzureOpenAI(
     api_key="your-azure-key",
-    azure_endpoint="https://<your-resource-name>.services.ai.azure.com",
-    api_version="2025-04-01-preview"
+    base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
+    api_version="preview"
 )
 pandas_ext.use(azure_client)
-# Option 4: Use async clients
-async_client = AsyncOpenAI(api_key="your-api-key")
-pandas_ext.use_async(async_client)
+# Option 4: Use async Azure OpenAI client instance
+async_azure_client = AsyncAzureOpenAI(
+    api_key="your-azure-key",
+    base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
+    api_version="preview"
+)
+pandas_ext.use_async(async_azure_client)
 # Set up model names (optional, defaults shown)
 pandas_ext.responses_model("gpt-4.1-mini")
@@ -46,12 +50,12 @@ import tiktoken
 from openai import AsyncOpenAI, OpenAI
 from pydantic import BaseModel
-from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
-from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
-from .provider import CONTAINER
-from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
-from .responses import AsyncBatchResponses, BatchResponses
-from .task.table import FillNaResponse, fillna
+from openaivec.embeddings import AsyncBatchEmbeddings, BatchEmbeddings
+from openaivec.model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
+from openaivec.provider import CONTAINER, _check_azure_v1_api_url
+from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
+from openaivec.responses import AsyncBatchResponses, BatchResponses
+from openaivec.task.table import FillNaResponse, fillna
 __all__ = [
     "use",
@@ -74,6 +78,10 @@ def use(client: OpenAI) -> None:
             `openai.AzureOpenAI` instance.
             The same instance is reused by every helper in this module.
     """
+    # Check Azure v1 API URL if using AzureOpenAI client
+    if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
+        _check_azure_v1_api_url(str(client.base_url))
     CONTAINER.register(OpenAI, lambda: client)
@@ -85,6 +93,10 @@ def use_async(client: AsyncOpenAI) -> None:
             `openai.AsyncAzureOpenAI` instance.
             The same instance is reused by every helper in this module.
     """
+    # Check Azure v1 API URL if using AsyncAzureOpenAI client
+    if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
+        _check_azure_v1_api_url(str(client.base_url))
     CONTAINER.register(AsyncOpenAI, lambda: client)
@@ -92,7 +104,7 @@ def responses_model(name: str) -> None:
     """Override the model used for text responses.
     Args:
-        name (str): Model name as listed in the OpenAI API
+        name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
             (for example, ``gpt-4.1-mini``).
     """
     CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
@@ -102,7 +114,8 @@ def embeddings_model(name: str) -> None:
     """Override the model used for text embeddings.
     Args:
-        name (str): Embedding model name, e.g. ``text-embedding-3-small``.
+        name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name,
+            e.g. ``text-embedding-3-small``.
     """
     CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
@@ -143,7 +156,7 @@ class OpenAIVecSeriesAccessor:
         instructions: str,
         cache: BatchingMapProxy[str, ResponseFormat],
         response_format: Type[ResponseFormat] = str,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
     ) -> pd.Series:
         client: BatchResponses = BatchResponses(
@@ -205,15 +218,25 @@ class OpenAIVecSeriesAccessor:
         instructions: str,
         response_format: Type[ResponseFormat] = str,
         batch_size: int = 128,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Call an LLM once for every Series element.
         Example:
             ```python
             animals = pd.Series(["cat", "dog", "elephant"])
+            # Basic usage
             animals.ai.responses("translate to French")
+            # With progress bar in Jupyter notebooks
+            large_series = pd.Series(["data"] * 1000)
+            large_series.ai.responses(
+                "analyze this data",
+                batch_size=32,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -228,13 +251,14 @@ class OpenAIVecSeriesAccessor:
                 request. Defaults to ``128``.
             temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
         """
         return self.responses_with_cache(
             instructions=instructions,
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
@@ -287,7 +311,7 @@ class OpenAIVecSeriesAccessor:
         )
         return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
-    def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
+    def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
         """Execute a prepared task on every Series element.
         This method applies a pre-configured task to each element in the Series,
@@ -302,7 +326,16 @@ class OpenAIVecSeriesAccessor:
             sentiment_task = PreparedTask(...)
             reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
+            # Basic usage
             results = reviews.ai.task(sentiment_task)
+            # With progress bar for large datasets
+            large_reviews = pd.Series(["review text"] * 2000)
+            results = large_reviews.ai.task(
+                sentiment_task,
+                batch_size=50,
+                show_progress=True
+            )
             ```
             This method returns a Series containing the task results for each
             corresponding input element, following the task's defined structure.
@@ -312,6 +345,7 @@ class OpenAIVecSeriesAccessor:
                 response format, and other parameters for processing the inputs.
             batch_size (int, optional): Number of prompts grouped into a single
                 request to optimize API usage. Defaults to 128.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -319,16 +353,24 @@ class OpenAIVecSeriesAccessor:
         """
         return self.task_with_cache(
             task=task,
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
         )
-    def embeddings(self, batch_size: int = 128) -> pd.Series:
+    def embeddings(self, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
         """Compute OpenAI embeddings for every Series element.
         Example:
             ```python
             animals = pd.Series(["cat", "dog", "elephant"])
+            # Basic usage
             animals.ai.embeddings()
+            # With progress bar for large datasets
+            large_texts = pd.Series(["text"] * 5000)
+            embeddings = large_texts.ai.embeddings(
+                batch_size=100,
+                show_progress=True
+            )
             ```
             This method returns a Series of numpy arrays, each containing the
             embedding vector for the corresponding input.
@@ -338,13 +380,14 @@ class OpenAIVecSeriesAccessor:
         Args:
             batch_size (int, optional): Number of inputs grouped into a
                 single request. Defaults to ``128``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
                 (dtype ``float32``).
         """
         return self.embeddings_with_cache(
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
         )
     def count_tokens(self) -> pd.Series:
@@ -438,7 +481,7 @@ class OpenAIVecDataFrameAccessor:
         instructions: str,
         cache: BatchingMapProxy[str, ResponseFormat],
         response_format: Type[ResponseFormat] = str,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
     ) -> pd.Series:
         """Generate a response for each row after serialising it to JSON using a provided cache.
@@ -496,8 +539,9 @@ class OpenAIVecDataFrameAccessor:
         instructions: str,
         response_format: Type[ResponseFormat] = str,
         batch_size: int = 128,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Generate a response for each row after serialising it to JSON.
@@ -508,7 +552,16 @@ class OpenAIVecDataFrameAccessor:
                 {"name": "dog", "legs": 4},
                 {"name": "elephant", "legs": 4},
             ])
+            # Basic usage
             df.ai.responses("what is the animal's name?")
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            large_df.ai.responses(
+                "generate a name for this ID",
+                batch_size=20,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -524,19 +577,20 @@ class OpenAIVecDataFrameAccessor:
                 Defaults to ``128``.
             temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Responses aligned with the DataFrame's original index.
         """
         return self.responses_with_cache(
             instructions=instructions,
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
         )
-    def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
+    def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
         """Execute a prepared task on each DataFrame row after serialising it to JSON.
         This method applies a pre-configured task to each row in the DataFrame,
@@ -566,6 +620,7 @@ class OpenAIVecDataFrameAccessor:
                 response format, and other parameters for processing the inputs.
             batch_size (int, optional): Number of requests sent in one batch
                 to optimize API usage. Defaults to 128.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -575,7 +630,7 @@ class OpenAIVecDataFrameAccessor:
             lambda df: (
                 df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
                 .map(lambda x: json.dumps(x, ensure_ascii=False))
-                .ai.task(task=task, batch_size=batch_size)
+                .ai.task(task=task, batch_size=batch_size, show_progress=show_progress)
             )
         )
@@ -681,7 +736,7 @@ class AsyncOpenAIVecSeriesAccessor:
         instructions: str,
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
         response_format: Type[ResponseFormat] = str,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
     ) -> pd.Series:
         """Call an LLM once for every Series element using a provided cache (asynchronously).
@@ -848,9 +903,10 @@ class AsyncOpenAIVecSeriesAccessor:
         instructions: str,
         response_format: Type[ResponseFormat] = str,
         batch_size: int = 128,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
         max_concurrency: int = 8,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Call an LLM once for every Series element (asynchronously).
@@ -859,6 +915,15 @@ class AsyncOpenAIVecSeriesAccessor:
             animals = pd.Series(["cat", "dog", "elephant"])
             # Must be awaited
             results = await animals.aio.responses("translate to French")
+            # With progress bar for large datasets
+            large_series = pd.Series(["data"] * 1000)
+            results = await large_series.aio.responses(
+                "analyze this data",
+                batch_size=32,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -875,6 +940,7 @@ class AsyncOpenAIVecSeriesAccessor:
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
@@ -884,13 +950,17 @@ class AsyncOpenAIVecSeriesAccessor:
         """
         return await self.responses_with_cache(
             instructions=instructions,
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
         )
-    async def embeddings(self, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
+    async def embeddings(
+        self, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
+    ) -> pd.Series:
         """Compute OpenAI embeddings for every Series element (asynchronously).
         Example:
@@ -898,6 +968,14 @@ class AsyncOpenAIVecSeriesAccessor:
             animals = pd.Series(["cat", "dog", "elephant"])
             # Must be awaited
             embeddings = await animals.aio.embeddings()
+            # With progress bar for large datasets
+            large_texts = pd.Series(["text"] * 5000)
+            embeddings = await large_texts.aio.embeddings(
+                batch_size=100,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series of numpy arrays, each containing the
             embedding vector for the corresponding input.
@@ -909,6 +987,7 @@ class AsyncOpenAIVecSeriesAccessor:
                 single request. Defaults to ``128``.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -918,10 +997,14 @@ class AsyncOpenAIVecSeriesAccessor:
             This is an asynchronous method and must be awaited.
         """
         return await self.embeddings_with_cache(
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
         )
-    async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
+    async def task(
+        self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
+    ) -> pd.Series:
         """Execute a prepared task on every Series element (asynchronously).
         This method applies a pre-configured task to each element in the Series,
@@ -938,6 +1021,15 @@ class AsyncOpenAIVecSeriesAccessor:
             reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
             # Must be awaited
             results = await reviews.aio.task(sentiment_task)
+            # With progress bar for large datasets
+            large_reviews = pd.Series(["review text"] * 2000)
+            results = await large_reviews.aio.task(
+                sentiment_task,
+                batch_size=50,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series containing the task results for each
             corresponding input element, following the task's defined structure.
@@ -949,6 +1041,7 @@ class AsyncOpenAIVecSeriesAccessor:
                 request to optimize API usage. Defaults to 128.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to 8.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -959,7 +1052,9 @@ class AsyncOpenAIVecSeriesAccessor:
         """
         return await self.task_with_cache(
             task=task,
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
         )
@@ -975,7 +1070,7 @@ class AsyncOpenAIVecDataFrameAccessor:
         instructions: str,
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
         response_format: Type[ResponseFormat] = str,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
     ) -> pd.Series:
         """Generate a response for each row after serialising it to JSON using a provided cache (asynchronously).
@@ -1040,9 +1135,10 @@ class AsyncOpenAIVecDataFrameAccessor:
         instructions: str,
         response_format: Type[ResponseFormat] = str,
         batch_size: int = 128,
-        temperature: float = 0.0,
+        temperature: float | None = 0.0,
         top_p: float = 1.0,
         max_concurrency: int = 8,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Generate a response for each row after serialising it to JSON (asynchronously).
@@ -1055,6 +1151,15 @@ class AsyncOpenAIVecDataFrameAccessor:
             ])
             # Must be awaited
             results = await df.aio.responses(\"what is the animal\'s name?\")
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({\"id\": list(range(1000))})
+            results = await large_df.aio.responses(
+                \"generate a name for this ID\",
+                batch_size=20,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -1072,6 +1177,7 @@ class AsyncOpenAIVecDataFrameAccessor:
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Responses aligned with the DataFrame's original index.
@@ -1081,13 +1187,17 @@ class AsyncOpenAIVecDataFrameAccessor:
         """
         return await self.responses_with_cache(
             instructions=instructions,
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
         )
-    async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
+    async def task(
+        self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
+    ) -> pd.Series:
         """Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
         This method applies a pre-configured task to each row in the DataFrame,
@@ -1109,6 +1219,15 @@ class AsyncOpenAIVecDataFrameAccessor:
             ])
             # Must be awaited
             results = await df.aio.task(analysis_task)
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            results = await large_df.aio.task(
+                analysis_task,
+                batch_size=50,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series containing the task results for each
             corresponding row, following the task's defined structure.
@@ -1120,6 +1239,7 @@ class AsyncOpenAIVecDataFrameAccessor:
                 to optimize API usage. Defaults to 128.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to 8.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -1140,6 +1260,7 @@ class AsyncOpenAIVecDataFrameAccessor:
             task=task,
             batch_size=batch_size,
             max_concurrency=max_concurrency,
+            show_progress=show_progress,
         )
     async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:

openaivec 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl

openaivec 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl