PyPI - openaivec - Versions diffs - 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

openaivec 0.12.5py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

openaivec/__init__.py +13 -4
openaivec/_cache/__init__.py +12 -0
openaivec/_cache/optimize.py +109 -0
openaivec/_cache/proxy.py +806 -0
openaivec/{di.py → _di.py} +36 -12
openaivec/_embeddings.py +203 -0
openaivec/{log.py → _log.py} +2 -2
openaivec/_model.py +113 -0
openaivec/{prompt.py → _prompt.py} +95 -28
openaivec/_provider.py +207 -0
openaivec/_responses.py +511 -0
openaivec/_schema/__init__.py +9 -0
openaivec/_schema/infer.py +340 -0
openaivec/_schema/spec.py +350 -0
openaivec/_serialize.py +234 -0
openaivec/{util.py → _util.py} +25 -85
openaivec/pandas_ext.py +1496 -318
openaivec/spark.py +485 -183
openaivec/task/__init__.py +9 -7
openaivec/task/customer_support/__init__.py +9 -15
openaivec/task/customer_support/customer_sentiment.py +17 -15
openaivec/task/customer_support/inquiry_classification.py +23 -22
openaivec/task/customer_support/inquiry_summary.py +14 -13
openaivec/task/customer_support/intent_analysis.py +21 -19
openaivec/task/customer_support/response_suggestion.py +16 -16
openaivec/task/customer_support/urgency_analysis.py +24 -25
openaivec/task/nlp/__init__.py +4 -4
openaivec/task/nlp/dependency_parsing.py +10 -12
openaivec/task/nlp/keyword_extraction.py +11 -14
openaivec/task/nlp/morphological_analysis.py +12 -14
openaivec/task/nlp/named_entity_recognition.py +16 -18
openaivec/task/nlp/sentiment_analysis.py +14 -11
openaivec/task/nlp/translation.py +6 -9
openaivec/task/table/__init__.py +2 -2
openaivec/task/table/fillna.py +11 -11
openaivec-1.0.10.dist-info/METADATA +399 -0
openaivec-1.0.10.dist-info/RECORD +39 -0
{openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
openaivec/embeddings.py +0 -172
openaivec/model.py +0 -67
openaivec/provider.py +0 -45
openaivec/responses.py +0 -393
openaivec/serialize.py +0 -225
openaivec-0.12.5.dist-info/METADATA +0 -696
openaivec-0.12.5.dist-info/RECORD +0 -33
{openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0

openaivec/pandas_ext.py CHANGED Viewed

@@ -7,28 +7,35 @@ from openaivec import pandas_ext
 # Option 1: Use environment variables (automatic detection)
 # Set OPENAI_API_KEY or Azure OpenAI environment variables
-# (AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_ENDPOINT, AZURE_OPENAI_API_VERSION)
+# (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
 # No explicit setup needed - clients are automatically created
-# Option 2: Use an existing OpenAI client instance
+# Option 2: Register an existing OpenAI client instance
 client = OpenAI(api_key="your-api-key")
-pandas_ext.use(client)
+pandas_ext.set_client(client)
-# Option 3: Use an existing Azure OpenAI client instance
+# Option 3: Register an Azure OpenAI client instance
 azure_client = AzureOpenAI(
     api_key="your-azure-key",
-    azure_endpoint="https://your-resource.openai.azure.com",
-    api_version="2024-02-01"
+    base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
+    api_version="preview"
 )
-pandas_ext.use(azure_client)
+pandas_ext.set_client(azure_client)
-# Option 4: Use async clients
-async_client = AsyncOpenAI(api_key="your-api-key")
-pandas_ext.use_async(async_client)
+# Option 4: Register an async Azure OpenAI client instance
+async_azure_client = AsyncAzureOpenAI(
+    api_key="your-azure-key",
+    base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
+    api_version="preview"
+)
+pandas_ext.set_async_client(async_azure_client)
 # Set up model names (optional, defaults shown)
-pandas_ext.responses_model("gpt-4.1-mini")
-pandas_ext.embeddings_model("text-embedding-3-small")
+pandas_ext.set_responses_model("gpt-4.1-mini")
+pandas_ext.set_embeddings_model("text-embedding-3-small")
+# Inspect current configuration
+configured_model = pandas_ext.get_responses_model()
 ```
 This module provides `.ai` and `.aio` accessors for pandas Series and DataFrames
@@ -38,7 +45,8 @@ to easily interact with OpenAI APIs for tasks like generating responses or embed
 import inspect
 import json
 import logging
-from typing import Any, Awaitable, Callable, List, Type, TypeVar
+from collections.abc import Awaitable, Callable
+from typing import TypeVar
 import numpy as np
 import pandas as pd
@@ -46,87 +54,126 @@ import tiktoken
 from openai import AsyncOpenAI, OpenAI
 from pydantic import BaseModel
-from .di import Container
-from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
-from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
-from .provider import provide_async_openai_client, provide_openai_client
-from .responses import AsyncBatchResponses, BatchResponses
-from .task.table import FillNaResponse, fillna
+from openaivec._cache import AsyncBatchingMapProxy, BatchingMapProxy
+from openaivec._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
+from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
+from openaivec._provider import CONTAINER, _check_azure_v1_api_url
+from openaivec._responses import AsyncBatchResponses, BatchResponses
+from openaivec._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
+from openaivec.task.table import FillNaResponse, fillna
 __all__ = [
-    "use",
-    "use_async",
-    "responses_model",
-    "embeddings_model",
+    "get_async_client",
+    "get_client",
+    "get_embeddings_model",
+    "get_responses_model",
+    "set_async_client",
+    "set_client",
+    "set_embeddings_model",
+    "set_responses_model",
 ]
 _LOGGER = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Internal helpers (not exported)
+# ---------------------------------------------------------------------------
+def _df_rows_to_json_series(df: pd.DataFrame) -> pd.Series:
+    """Return a Series of JSON strings (UTF-8, no ASCII escaping) representing DataFrame rows.
+    Each element is the JSON serialisation of the corresponding row as a dict. Index and
+    name are preserved so downstream operations retain alignment. This consolidates the
+    previously duplicated inline pipeline used by responses*/task* DataFrame helpers.
+    """
+    return pd.Series(df.to_dict(orient="records"), index=df.index, name="record").map(
+        lambda x: json.dumps(x, ensure_ascii=False)
+    )
 T = TypeVar("T")  # For pipe function return type
-_DI = Container()
-_DI.register(OpenAI, provide_openai_client)
-_DI.register(AsyncOpenAI, provide_async_openai_client)
-_DI.register(ResponsesModelName, lambda: ResponsesModelName("gpt-4.1-mini"))
-_DI.register(EmbeddingsModelName, lambda: EmbeddingsModelName("text-embedding-3-small"))
+def set_client(client: OpenAI) -> None:
+    """Register a custom OpenAI-compatible client for pandas helpers.
-def _provide_tiktoken_encoding() -> tiktoken.Encoding:
-    model_name = _DI.resolve(ResponsesModelName).value
-    try:
-        return tiktoken.encoding_for_model(model_name)
-    except KeyError:
-        _LOGGER.info(
-            "The model name '%s' is not supported by tiktoken. Using 'o200k_base' encoding instead.",
-            model_name,
-        )
-        return tiktoken.get_encoding("o200k_base")
+    Args:
+        client (OpenAI): A pre-configured `openai.OpenAI` or
+            `openai.AzureOpenAI` instance reused by every helper in this module.
+    """
+    if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
+        _check_azure_v1_api_url(str(client.base_url))
+    CONTAINER.register(OpenAI, lambda: client)
-_DI.register(tiktoken.Encoding, _provide_tiktoken_encoding)
+def get_client() -> OpenAI:
+    """Get the currently registered OpenAI-compatible client.
-def use(client: OpenAI) -> None:
-    """Register a custom OpenAI‑compatible client.
+    Returns:
+        OpenAI: The registered `openai.OpenAI` or `openai.AzureOpenAI` instance.
+    """
+    return CONTAINER.resolve(OpenAI)
+def set_async_client(client: AsyncOpenAI) -> None:
+    """Register a custom asynchronous OpenAI-compatible client.
     Args:
-        client (OpenAI): A pre‑configured `openai.OpenAI` or
-            `openai.AzureOpenAI` instance.
-            The same instance is reused by every helper in this module.
+        client (AsyncOpenAI): A pre-configured `openai.AsyncOpenAI` or
+            `openai.AsyncAzureOpenAI` instance reused by every helper in this module.
     """
-    _DI.register(OpenAI, lambda: client)
+    if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
+        _check_azure_v1_api_url(str(client.base_url))
+    CONTAINER.register(AsyncOpenAI, lambda: client)
-def use_async(client: AsyncOpenAI) -> None:
-    """Register a custom asynchronous OpenAI‑compatible client.
-    Args:
-        client (AsyncOpenAI): A pre‑configured `openai.AsyncOpenAI` or
-            `openai.AsyncAzureOpenAI` instance.
-            The same instance is reused by every helper in this module.
+def get_async_client() -> AsyncOpenAI:
+    """Get the currently registered asynchronous OpenAI-compatible client.
+    Returns:
+        AsyncOpenAI: The registered `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` instance.
     """
-    _DI.register(AsyncOpenAI, lambda: client)
+    return CONTAINER.resolve(AsyncOpenAI)
-def responses_model(name: str) -> None:
+def set_responses_model(name: str) -> None:
     """Override the model used for text responses.
     Args:
-        name (str): Model name as listed in the OpenAI API
+        name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
             (for example, ``gpt-4.1-mini``).
     """
-    _DI.register(ResponsesModelName, lambda: ResponsesModelName(name))
-    _DI.register(tiktoken.Encoding, _provide_tiktoken_encoding)
+    CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
+def get_responses_model() -> str:
+    """Get the currently registered model name for text responses.
+    Returns:
+        str: The model name (for example, ``gpt-4.1-mini``).
+    """
+    return CONTAINER.resolve(ResponsesModelName).value
-def embeddings_model(name: str) -> None:
+def set_embeddings_model(name: str) -> None:
     """Override the model used for text embeddings.
     Args:
-        name (str): Embedding model name, e.g. ``text-embedding-3-small``.
+        name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name,
+            e.g. ``text-embedding-3-small``.
     """
-    _DI.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
+    CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
+def get_embeddings_model() -> str:
+    """Get the currently registered model name for text embeddings.
+    Returns:
+        str: The model name (for example, ``text-embedding-3-small``).
+    """
+    return CONTAINER.resolve(EmbeddingsModelName).value
 def _extract_value(x, series_name):
@@ -160,124 +207,463 @@ class OpenAIVecSeriesAccessor:
     def __init__(self, series_obj: pd.Series):
         self._obj = series_obj
+    def responses_with_cache(
+        self,
+        instructions: str,
+        cache: BatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] = str,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Call an LLM once for every Series element using a provided cache.
+        This is a lower-level method that allows explicit cache management for advanced
+        use cases. Most users should use the standard ``responses`` method instead.
+        Args:
+            instructions (str): System prompt prepended to every user message.
+            cache (BatchingMapProxy[str, ResponseFormat]): Explicit cache instance for
+                batching and deduplication control.
+            response_format (type[ResponseFormat], optional): Pydantic model or built-in
+                type the assistant should return. Defaults to ``str``.
+            **api_kwargs: Arbitrary OpenAI Responses API parameters (e.g. ``temperature``,
+                ``top_p``, ``frequency_penalty``, ``presence_penalty``, ``seed``, etc.) are
+                forwarded verbatim to the underlying client.
+        Returns:
+            pandas.Series: Series whose values are instances of ``response_format``.
+        """
+        client: BatchResponses = BatchResponses(
+            client=CONTAINER.resolve(OpenAI),
+            model_name=CONTAINER.resolve(ResponsesModelName).value,
+            system_message=instructions,
+            response_format=response_format,
+            cache=cache,
+            api_kwargs=api_kwargs,
+        )
+        return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
     def responses(
         self,
         instructions: str,
-        response_format: Type[ResponseFormat] = str,
-        batch_size: int = 128,
-        temperature: float = 0.0,
-        top_p: float = 1.0,
+        response_format: type[ResponseFormat] = str,
+        batch_size: int | None = None,
+        show_progress: bool = True,
+        **api_kwargs,
     ) -> pd.Series:
         """Call an LLM once for every Series element.
         Example:
             ```python
             animals = pd.Series(["cat", "dog", "elephant"])
+            # Basic usage
             animals.ai.responses("translate to French")
+            # With progress bar in Jupyter notebooks
+            large_series = pd.Series(["data"] * 1000)
+            large_series.ai.responses(
+                "analyze this data",
+                batch_size=32,
+                show_progress=True
+            )
+            # With custom temperature
+            animals.ai.responses(
+                "translate creatively",
+                temperature=0.8
+            )
             ```
-            This method returns a Series of strings, each containing the
-            assistant's response to the corresponding input.
-            The model used is set by the `responses_model` function.
-            The default model is `gpt-4.1-mini`.
         Args:
             instructions (str): System prompt prepended to every user message.
-            response_format (Type[ResponseFormat], optional): Pydantic model or built‑in
+            response_format (type[ResponseFormat], optional): Pydantic model or built‑in
                 type the assistant should return. Defaults to ``str``.
-            batch_size (int, optional): Number of prompts grouped into a single
-                request. Defaults to ``128``.
-            temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            batch_size (int | None, optional): Number of prompts grouped into a single
+                request. Defaults to ``None`` (automatic batch size optimization
+                based on execution time). Set to a positive integer for fixed batch size.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
         """
-        client: BatchResponses = BatchResponses(
-            client=_DI.resolve(OpenAI),
-            model_name=_DI.resolve(ResponsesModelName).value,
-            system_message=instructions,
+        return self.responses_with_cache(
+            instructions=instructions,
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
+            **api_kwargs,
+        )
+    def embeddings_with_cache(
+        self,
+        cache: BatchingMapProxy[str, np.ndarray],
+        **api_kwargs,
+    ) -> pd.Series:
+        """Compute OpenAI embeddings for every Series element using a provided cache.
+        This method allows external control over caching behavior by accepting
+        a pre-configured BatchingMapProxy instance, enabling cache sharing
+        across multiple operations or custom batch size management.
+        Example:
+            ```python
+            from openaivec._cache import BatchingMapProxy
+            import numpy as np
+            # Create a shared cache with custom batch size
+            shared_cache = BatchingMapProxy[str, np.ndarray](batch_size=64)
+            animals = pd.Series(["cat", "dog", "elephant"])
+            embeddings = animals.ai.embeddings_with_cache(cache=shared_cache)
+            ```
+        Args:
+            cache (BatchingMapProxy[str, np.ndarray]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            **api_kwargs: Additional keyword arguments to pass to the OpenAI API.
+        Returns:
+            pandas.Series: Series whose values are ``np.ndarray`` objects
+                (dtype ``float32``).
+        """
+        client: BatchEmbeddings = BatchEmbeddings(
+            client=CONTAINER.resolve(OpenAI),
+            model_name=CONTAINER.resolve(EmbeddingsModelName).value,
+            cache=cache,
+            api_kwargs=api_kwargs,
         )
         return pd.Series(
-            client.parse(self._obj.tolist(), batch_size=batch_size),
+            client.create(self._obj.tolist()),
             index=self._obj.index,
             name=self._obj.name,
         )
-    def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
-        """Execute a prepared task on every Series element.
+    def embeddings(self, batch_size: int | None = None, show_progress: bool = True, **api_kwargs) -> pd.Series:
+        """Compute OpenAI embeddings for every Series element.
+        Example:
+            ```python
+            animals = pd.Series(["cat", "dog", "elephant"])
+            # Basic usage
+            animals.ai.embeddings()
-        This method applies a pre-configured task to each element in the Series,
-        using the task's instructions and response format to generate structured
-        responses from the language model.
+            # With progress bar for large datasets
+            large_texts = pd.Series(["text"] * 5000)
+            embeddings = large_texts.ai.embeddings(
+                batch_size=100,
+                show_progress=True
+            )
+            ```
+        Args:
+            batch_size (int | None, optional): Number of inputs grouped into a
+                single request. Defaults to ``None`` (automatic batch size optimization
+                based on execution time). Set to a positive integer for fixed batch size.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
+        Returns:
+            pandas.Series: Series whose values are ``np.ndarray`` objects
+                (dtype ``float32``).
+        """
+        return self.embeddings_with_cache(
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
+            **api_kwargs,
+        )
+    def task_with_cache(
+        self,
+        task: PreparedTask[ResponseFormat],
+        cache: BatchingMapProxy[str, ResponseFormat],
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on every Series element using a provided cache.
+        This mirrors ``responses_with_cache`` but uses the task's stored instructions
+        and response format. A supplied ``BatchingMapProxy`` enables cross‑operation
+        deduplicated reuse and external batch size / progress control.
+        Example:
+            ```python
+            from openaivec._cache import BatchingMapProxy
+            shared_cache = BatchingMapProxy(batch_size=64)
+            reviews.ai.task_with_cache(sentiment_task, cache=shared_cache)
+            ```
+        Args:
+            task (PreparedTask): Prepared task (instructions + response_format).
+            cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Note:
+            Core routing keys (``model``, system instructions, user input) are managed
+            internally and cannot be overridden.
+        Returns:
+            pandas.Series: Task results aligned with the original Series index.
+        """
+        client: BatchResponses = BatchResponses(
+            client=CONTAINER.resolve(OpenAI),
+            model_name=CONTAINER.resolve(ResponsesModelName).value,
+            system_message=task.instructions,
+            response_format=task.response_format,
+            cache=cache,
+            api_kwargs=api_kwargs,
+        )
+        return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
+    def task(
+        self,
+        task: PreparedTask,
+        batch_size: int | None = None,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on every Series element.
         Example:
             ```python
-            from openaivec.model import PreparedTask
+            from openaivec._model import PreparedTask
             # Assume you have a prepared task for sentiment analysis
             sentiment_task = PreparedTask(...)
             reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
+            # Basic usage
             results = reviews.ai.task(sentiment_task)
+            # With progress bar for large datasets
+            large_reviews = pd.Series(["review text"] * 2000)
+            results = large_reviews.ai.task(
+                sentiment_task,
+                batch_size=50,
+                show_progress=True
+            )
             ```
-            This method returns a Series containing the task results for each
-            corresponding input element, following the task's defined structure.
         Args:
             task (PreparedTask): A pre-configured task containing instructions,
-                response format, and other parameters for processing the inputs.
-            batch_size (int, optional): Number of prompts grouped into a single
-                request to optimize API usage. Defaults to 128.
+                response format for processing the inputs.
+            batch_size (int | None, optional): Number of prompts grouped into a single
+                request to optimize API usage. Defaults to ``None`` (automatic batch size
+                optimization based on execution time). Set to a positive integer for fixed batch size.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Note:
+            Core batching / routing keys (``model``, ``instructions`` / system message,
+            user ``input``) are managed by the library and cannot be overridden.
         Returns:
-            pandas.Series: Series whose values are instances of the task's
-                response format, aligned with the original Series index.
+            pandas.Series: Series whose values are instances of the task's response format.
         """
-        client = BatchResponses.of_task(
-            client=_DI.resolve(OpenAI), model_name=_DI.resolve(ResponsesModelName).value, task=task
+        return self.task_with_cache(
+            task=task,
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
+            **api_kwargs,
         )
-        return pd.Series(
-            client.parse(self._obj.tolist(), batch_size=batch_size),
-            index=self._obj.index,
-            name=self._obj.name,
+    def parse_with_cache(
+        self,
+        instructions: str,
+        cache: BatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse Series values using an LLM with a provided cache.
+        This method allows external control over caching behavior while parsing
+        Series content into structured data. If no response format is provided,
+        the method automatically infers an appropriate schema by analyzing the
+        data patterns.
+        Args:
+            instructions (str): Plain language description of what information
+                to extract (e.g., "Extract customer information including name
+                and contact details"). This guides both the extraction process
+                and schema inference.
+            cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            response_format (type[ResponseFormat] | None, optional): Target structure
+                for the parsed data. Can be a Pydantic model class, built-in type
+                (str, int, float, bool, list, dict), or None. If None, the method
+                infers an appropriate schema based on the instructions and data.
+                Defaults to None.
+            max_examples (int, optional): Maximum number of Series values to
+                analyze when inferring the schema. Only used when response_format
+                is None. Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p,
+                frequency_penalty, presence_penalty, seed, etc.) forwarded to
+                the underlying API calls.
+        Returns:
+            pandas.Series: Series containing parsed structured data. Each value
+                is an instance of the specified response_format or the inferred
+                schema model, aligned with the original Series index.
+        """
+        schema: SchemaInferenceOutput | None = None
+        if response_format is None:
+            schema = self.infer_schema(instructions=instructions, max_examples=max_examples, **api_kwargs)
+        return self.responses_with_cache(
+            instructions=schema.inference_prompt if schema else instructions,
+            cache=cache,
+            response_format=response_format or schema.model,
+            **api_kwargs,
         )
-    def embeddings(self, batch_size: int = 128) -> pd.Series:
-        """Compute OpenAI embeddings for every Series element.
+    def parse(
+        self,
+        instructions: str,
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        batch_size: int | None = None,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse Series values into structured data using an LLM.
+        This method extracts structured information from unstructured text in
+        the Series. When no response format is provided, it automatically
+        infers an appropriate schema by analyzing patterns in the data.
+        Args:
+            instructions (str): Plain language description of what information
+                to extract (e.g., "Extract product details including price,
+                category, and availability"). This guides both the extraction
+                process and schema inference.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for the parsed data. Can be a Pydantic model class,
+                built-in type (str, int, float, bool, list, dict), or None.
+                If None, automatically infers a schema. Defaults to None.
+            max_examples (int, optional): Maximum number of Series values to
+                analyze when inferring schema. Only used when response_format
+                is None. Defaults to 100.
+            batch_size (int | None, optional): Number of requests to process
+                per batch. None enables automatic optimization. Defaults to None.
+            show_progress (bool, optional): Display progress bar in Jupyter
+                notebooks. Defaults to True.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p,
+                frequency_penalty, presence_penalty, seed, etc.).
+        Returns:
+            pandas.Series: Series containing parsed structured data as instances
+                of response_format or the inferred schema model.
         Example:
             ```python
-            animals = pd.Series(["cat", "dog", "elephant"])
-            animals.ai.embeddings()
+            # With explicit schema
+            from pydantic import BaseModel
+            class Product(BaseModel):
+                name: str
+                price: float
+                in_stock: bool
+            descriptions = pd.Series([
+                "iPhone 15 Pro - $999, available now",
+                "Samsung Galaxy S24 - $899, out of stock"
+            ])
+            products = descriptions.ai.parse(
+                "Extract product information",
+                response_format=Product
+            )
+            # With automatic schema inference
+            reviews = pd.Series([
+                "Great product! 5 stars. Fast shipping.",
+                "Poor quality. 2 stars. Slow delivery."
+            ])
+            parsed = reviews.ai.parse(
+                "Extract review rating and shipping feedback"
+            )
             ```
-            This method returns a Series of numpy arrays, each containing the
-            embedding vector for the corresponding input.
-            The embedding model is set by the `embeddings_model` function.
-            The default embedding model is `text-embedding-3-small`.
+        """
+        return self.parse_with_cache(
+            instructions=instructions,
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
+            response_format=response_format,
+            max_examples=max_examples,
+            **api_kwargs,
+        )
+    def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) -> SchemaInferenceOutput:
+        """Infer a structured data schema from Series content using AI.
+        This method analyzes a sample of Series values to automatically generate
+        a Pydantic model that captures the relevant information structure. The
+        inferred schema supports both flat and hierarchical (nested) structures,
+        making it suitable for complex data extraction tasks.
         Args:
-            batch_size (int, optional): Number of inputs grouped into a
-                single request. Defaults to ``128``.
+            instructions (str): Plain language description of the extraction goal
+                (e.g., "Extract customer information for CRM system", "Parse
+                event details for calendar integration"). This guides which
+                fields to include and their purpose.
+            max_examples (int, optional): Maximum number of Series values to
+                analyze for pattern detection. The method samples randomly up
+                to this limit. Higher values may improve schema quality but
+                increase inference time. Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters for fine-tuning
+                the inference process.
         Returns:
-            pandas.Series: Series whose values are ``np.ndarray`` objects
-                (dtype ``float32``).
+            InferredSchema: A comprehensive schema object containing:
+                - instructions: Refined extraction objective statement
+                - fields: Hierarchical field specifications with names, types,
+                  descriptions, and nested structures where applicable
+                - inference_prompt: Optimized prompt for consistent extraction
+                - model: Dynamically generated Pydantic model class supporting
+                  both flat and nested structures
+                - task: PreparedTask configured for batch extraction using
+                  the inferred schema
+        Example:
+            ```python
+            # Simple flat structure
+            reviews = pd.Series([
+                "5 stars! Great product, fast shipping to NYC.",
+                "2 stars. Product broke, slow delivery to LA."
+            ])
+            schema = reviews.ai.infer_schema(
+                "Extract review ratings and shipping information"
+            )
+            # Hierarchical structure
+            orders = pd.Series([
+                "Order #123: John Doe, 123 Main St, NYC. Items: iPhone ($999), Case ($29)",
+                "Order #456: Jane Smith, 456 Oak Ave, LA. Items: iPad ($799)"
+            ])
+            schema = orders.ai.infer_schema(
+                "Extract order details including customer and items"
+            )
+            # Inferred schema may include nested structures like:
+            # - customer: {name: str, address: str, city: str}
+            # - items: [{product: str, price: float}]
+            # Apply the schema for extraction
+            extracted = orders.ai.task(schema.task)
+            ```
+        Note:
+            The inference process uses multiple AI iterations to ensure schema
+            validity. Nested structures are automatically detected when the
+            data contains hierarchical relationships. The generated Pydantic
+            model ensures type safety and validation for all extracted data.
         """
-        client: BatchEmbeddings = BatchEmbeddings(
-            client=_DI.resolve(OpenAI),
-            model_name=_DI.resolve(EmbeddingsModelName).value,
-        )
+        inferer = CONTAINER.resolve(SchemaInferer)
-        return pd.Series(
-            client.create(self._obj.tolist(), batch_size=batch_size),
-            index=self._obj.index,
-            name=self._obj.name,
+        input: SchemaInferenceInput = SchemaInferenceInput(
+            examples=self._obj.sample(n=min(max_examples, len(self._obj))).tolist(),
+            instructions=instructions,
+            **api_kwargs,
         )
+        return inferer.infer_schema(input)
     def count_tokens(self) -> pd.Series:
         """Count `tiktoken` tokens per row.
@@ -288,12 +674,12 @@ class OpenAIVecSeriesAccessor:
             animals.ai.count_tokens()
             ```
             This method uses the `tiktoken` library to count tokens based on the
-            model name set by `responses_model`.
+            model name configured via `set_responses_model`.
         Returns:
             pandas.Series: Token counts for each element.
         """
-        encoding: tiktoken.Encoding = _DI.resolve(tiktoken.Encoding)
+        encoding: tiktoken.Encoding = CONTAINER.resolve(tiktoken.Encoding)
         return self._obj.map(encoding.encode).map(len).rename("num_tokens")
     def extract(self) -> pd.DataFrame:
@@ -333,47 +719,65 @@ class OpenAIVecDataFrameAccessor:
     def __init__(self, df_obj: pd.DataFrame):
         self._obj = df_obj
-    def extract(self, column: str) -> pd.DataFrame:
-        """Flatten one column of Pydantic models/dicts into top‑level columns.
+    def responses_with_cache(
+        self,
+        instructions: str,
+        cache: BatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] = str,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Generate a response for each row after serializing it to JSON using a provided cache.
+        This method allows external control over caching behavior by accepting
+        a pre-configured BatchingMapProxy instance, enabling cache sharing
+        across multiple operations or custom batch size management.
         Example:
             ```python
+            from openaivec._cache import BatchingMapProxy
+            # Create a shared cache with custom batch size
+            shared_cache = BatchingMapProxy(batch_size=64)
             df = pd.DataFrame([
-                {"animal": {"name": "cat", "legs": 4}},
-                {"animal": {"name": "dog", "legs": 4}},
-                {"animal": {"name": "elephant", "legs": 4}},
+                {"name": "cat", "legs": 4},
+                {"name": "dog", "legs": 4},
+                {"name": "elephant", "legs": 4},
             ])
-            df.ai.extract("animal")
+            result = df.ai.responses_with_cache(
+                "what is the animal's name?",
+                cache=shared_cache
+            )
             ```
-            This method returns a DataFrame with the same index as the original,
-            where each column corresponds to a key in the dictionaries.
-            The source column is dropped.
         Args:
-            column (str): Column to expand.
+            instructions (str): System prompt for the assistant.
+            cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            response_format (type[ResponseFormat], optional): Desired Python type of the
+                responses. Defaults to ``str``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
         Returns:
-            pandas.DataFrame: Original DataFrame with the extracted columns; the source column is dropped.
+            pandas.Series: Responses aligned with the DataFrame's original index.
         """
-        if column not in self._obj.columns:
-            raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
-        return (
-            self._obj.pipe(lambda df: df.reset_index(drop=True))
-            .pipe(lambda df: df.join(df[column].ai.extract()))
-            .pipe(lambda df: df.set_index(self._obj.index))
-            .pipe(lambda df: df.drop(columns=[column], axis=1))
+        return _df_rows_to_json_series(self._obj).ai.responses_with_cache(
+            instructions=instructions,
+            cache=cache,
+            response_format=response_format,
+            **api_kwargs,
         )
     def responses(
         self,
         instructions: str,
-        response_format: Type[ResponseFormat] = str,
-        batch_size: int = 128,
-        temperature: float = 0.0,
-        top_p: float = 1.0,
+        response_format: type[ResponseFormat] = str,
+        batch_size: int | None = None,
+        show_progress: bool = True,
+        **api_kwargs,
     ) -> pd.Series:
-        """Generate a response for each row after serialising it to JSON.
+        """Generate a response for each row after serializing it to JSON.
         Example:
             ```python
@@ -382,51 +786,75 @@ class OpenAIVecDataFrameAccessor:
                 {"name": "dog", "legs": 4},
                 {"name": "elephant", "legs": 4},
             ])
+            # Basic usage
             df.ai.responses("what is the animal's name?")
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            large_df.ai.responses(
+                "generate a name for this ID",
+                batch_size=20,
+                show_progress=True
+            )
             ```
-            This method returns a Series of strings, each containing the
-            assistant's response to the corresponding input.
-            Each row is serialised to JSON before being sent to the assistant.
-            The model used is set by the `responses_model` function.
-            The default model is `gpt-4.1-mini`.
         Args:
             instructions (str): System prompt for the assistant.
-            response_format (Type[ResponseFormat], optional): Desired Python type of the
+            response_format (type[ResponseFormat], optional): Desired Python type of the
                 responses. Defaults to ``str``.
-            batch_size (int, optional): Number of requests sent in one batch.
-                Defaults to ``128``.
-            temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
-        Returns:
-            pandas.Series: Responses aligned with the DataFrame’s original index.
-        """
-        return self._obj.pipe(
-            lambda df: (
-                df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
-                .map(lambda x: json.dumps(x, ensure_ascii=False))
-                .ai.responses(
-                    instructions=instructions,
-                    response_format=response_format,
-                    batch_size=batch_size,
-                    temperature=temperature,
-                    top_p=top_p,
-                )
-            )
+            batch_size (int | None, optional): Number of requests sent in one batch.
+                Defaults to ``None`` (automatic batch size optimization
+                based on execution time). Set to a positive integer for fixed batch size.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
+        Returns:
+            pandas.Series: Responses aligned with the DataFrame's original index.
+        """
+        return self.responses_with_cache(
+            instructions=instructions,
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
+            response_format=response_format,
+            **api_kwargs,
         )
-    def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
-        """Execute a prepared task on each DataFrame row after serialising it to JSON.
+    def task_with_cache(
+        self,
+        task: PreparedTask[ResponseFormat],
+        cache: BatchingMapProxy[str, ResponseFormat],
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on each DataFrame row after serializing it to JSON using a provided cache.
+        Args:
+            task (PreparedTask): Prepared task (instructions + response_format).
+            cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Note:
+            Core routing keys are managed internally.
-        This method applies a pre-configured task to each row in the DataFrame,
-        using the task's instructions and response format to generate structured
-        responses from the language model. Each row is serialised to JSON before
-        being processed by the task.
+        Returns:
+            pandas.Series: Task results aligned with the DataFrame's original index.
+        """
+        return _df_rows_to_json_series(self._obj).ai.task_with_cache(
+            task=task,
+            cache=cache,
+            **api_kwargs,
+        )
+    def task(
+        self,
+        task: PreparedTask,
+        batch_size: int | None = None,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on each DataFrame row after serializing it to JSON.
         Example:
             ```python
-            from openaivec.model import PreparedTask
+            from openaivec._model import PreparedTask
             # Assume you have a prepared task for data analysis
             analysis_task = PreparedTask(...)
@@ -436,30 +864,237 @@ class OpenAIVecDataFrameAccessor:
                 {"name": "dog", "legs": 4},
                 {"name": "elephant", "legs": 4},
             ])
+            # Basic usage
             results = df.ai.task(analysis_task)
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            results = large_df.ai.task(
+                analysis_task,
+                batch_size=50,
+                show_progress=True
+            )
             ```
-            This method returns a Series containing the task results for each
-            corresponding row, following the task's defined structure.
         Args:
             task (PreparedTask): A pre-configured task containing instructions,
-                response format, and other parameters for processing the inputs.
-            batch_size (int, optional): Number of requests sent in one batch
-                to optimize API usage. Defaults to 128.
+                response format for processing the inputs.
+            batch_size (int | None, optional): Number of requests sent in one batch
+                to optimize API usage. Defaults to ``None`` (automatic batch size
+                optimization based on execution time). Set to a positive integer for fixed batch size.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Note:
+            Core batching / routing keys (``model``, ``instructions`` / system message, user ``input``)
+            are managed by the library and cannot be overridden.
         Returns:
             pandas.Series: Series whose values are instances of the task's
                 response format, aligned with the DataFrame's original index.
         """
-        return self._obj.pipe(
-            lambda df: (
-                df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
-                .map(lambda x: json.dumps(x, ensure_ascii=False))
-                .ai.task(task=task, batch_size=batch_size)
+        return _df_rows_to_json_series(self._obj).ai.task(
+            task=task,
+            batch_size=batch_size,
+            show_progress=show_progress,
+            **api_kwargs,
+        )
+    def parse_with_cache(
+        self,
+        instructions: str,
+        cache: BatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse DataFrame rows into structured data using an LLM with a provided cache.
+        This method processes each DataFrame row (converted to JSON) and extracts
+        structured information using an LLM. External cache control enables
+        deduplication across operations and custom batch management.
+        Args:
+            instructions (str): Plain language description of what information
+                to extract from each row (e.g., "Extract shipping details and
+                order status"). Guides both extraction and schema inference.
+            cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None for automatic optimization.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for parsed data. Can be a Pydantic model, built-in
+                type, or None for automatic schema inference. Defaults to None.
+            max_examples (int, optional): Maximum rows to analyze when inferring
+                schema (only used when response_format is None). Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p,
+                frequency_penalty, presence_penalty, seed, etc.).
+        Returns:
+            pandas.Series: Series containing parsed structured data as instances
+                of response_format or the inferred schema model, indexed like
+                the original DataFrame.
+        """
+        return _df_rows_to_json_series(self._obj).ai.parse_with_cache(
+            instructions=instructions,
+            cache=cache,
+            response_format=response_format,
+            max_examples=max_examples,
+            **api_kwargs,
+        )
+    def parse(
+        self,
+        instructions: str,
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        batch_size: int | None = None,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse DataFrame rows into structured data using an LLM.
+        Each row is converted to JSON and processed to extract structured
+        information. When no response format is provided, the method
+        automatically infers an appropriate schema from the data.
+        Args:
+            instructions (str): Plain language description of extraction goals
+                (e.g., "Extract transaction details including amount, date,
+                and merchant"). Guides extraction and schema inference.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for parsed data. Can be a Pydantic model, built-in
+                type, or None for automatic inference. Defaults to None.
+            max_examples (int, optional): Maximum rows to analyze for schema
+                inference (when response_format is None). Defaults to 100.
+            batch_size (int | None, optional): Rows per API batch. None
+                enables automatic optimization. Defaults to None.
+            show_progress (bool, optional): Show progress bar in Jupyter
+                notebooks. Defaults to True.
+            **api_kwargs: Additional OpenAI API parameters.
+        Returns:
+            pandas.Series: Parsed structured data indexed like the original
+                DataFrame.
+        Example:
+            ```python
+            df = pd.DataFrame({
+                'log': [
+                    '2024-01-01 10:00 ERROR Database connection failed',
+                    '2024-01-01 10:05 INFO Service started successfully'
+                ]
+            })
+            # With automatic schema inference
+            parsed = df.ai.parse("Extract timestamp, level, and message")
+            # Returns Series with inferred structure like:
+            # {timestamp: str, level: str, message: str}
+            ```
+        """
+        return self.parse_with_cache(
+            instructions=instructions,
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
+            response_format=response_format,
+            max_examples=max_examples,
+            **api_kwargs,
+        )
+    def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) -> SchemaInferenceOutput:
+        """Infer a structured data schema from DataFrame rows using AI.
+        This method analyzes a sample of DataFrame rows to automatically infer
+        a structured schema that can be used for consistent data extraction.
+        Each row is converted to JSON format and analyzed to identify patterns,
+        field types, and potential categorical values.
+        Args:
+            instructions (str): Plain language description of how the extracted
+                structured data will be used (e.g., "Extract operational metrics
+                for dashboard", "Parse customer attributes for segmentation").
+                This guides field relevance and helps exclude irrelevant information.
+            max_examples (int): Maximum number of rows to analyze from the
+                DataFrame. The method will sample randomly up to this limit.
+                Defaults to 100.
+        Returns:
+            InferredSchema: An object containing:
+                - instructions: Normalized statement of the extraction objective
+                - fields: List of field specifications with names, types, and descriptions
+                - inference_prompt: Reusable prompt for future extractions
+                - model: Dynamically generated Pydantic model for parsing
+                - task: PreparedTask for batch extraction operations
+        Example:
+            ```python
+            df = pd.DataFrame({
+                'text': [
+                    "Order #123: Shipped to NYC, arriving Tuesday",
+                    "Order #456: Delayed due to weather, new ETA Friday",
+                    "Order #789: Delivered to customer in LA"
+                ],
+                'timestamp': ['2024-01-01', '2024-01-02', '2024-01-03']
+            })
+            # Infer schema for logistics tracking
+            schema = df.ai.infer_schema(
+                instructions="Extract shipping status and location data for logistics tracking"
             )
+            # Apply the schema to extract structured data
+            extracted_df = df.ai.task(schema.task)
+            ```
+        Note:
+            Each row is converted to JSON before analysis. The inference
+            process automatically detects hierarchical relationships and
+            creates appropriate nested structures when present. The generated
+            Pydantic model ensures type safety and validation.
+        """
+        return _df_rows_to_json_series(self._obj).ai.infer_schema(
+            instructions=instructions,
+            max_examples=max_examples,
+            **api_kwargs,
+        )
+    def extract(self, column: str) -> pd.DataFrame:
+        """Flatten one column of Pydantic models/dicts into top‑level columns.
+        Example:
+            ```python
+            df = pd.DataFrame([
+                {"animal": {"name": "cat", "legs": 4}},
+                {"animal": {"name": "dog", "legs": 4}},
+                {"animal": {"name": "elephant", "legs": 4}},
+            ])
+            df.ai.extract("animal")
+            ```
+            This method returns a DataFrame with the same index as the original,
+            where each column corresponds to a key in the dictionaries.
+            The source column is dropped.
+        Args:
+            column (str): Column to expand.
+        Returns:
+            pandas.DataFrame: Original DataFrame with the extracted columns; the source column is dropped.
+        """
+        if column not in self._obj.columns:
+            raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
+        return (
+            self._obj.pipe(lambda df: df.reset_index(drop=True))
+            .pipe(lambda df: df.join(df[column].ai.extract()))
+            .pipe(lambda df: df.set_index(self._obj.index))
+            .pipe(lambda df: df.drop(columns=[column], axis=1))
         )
-    def fillna(self, target_column_name: str, max_examples: int = 500, batch_size: int = 128) -> pd.DataFrame:
+    def fillna(
+        self,
+        target_column_name: str,
+        max_examples: int = 500,
+        batch_size: int | None = None,
+        show_progress: bool = True,
+    ) -> pd.DataFrame:
         """Fill missing values in a DataFrame column using AI-powered inference.
         This method uses machine learning to intelligently fill missing (NaN) values
@@ -473,8 +1108,10 @@ class OpenAIVecDataFrameAccessor:
             max_examples (int, optional): The maximum number of example rows to use
                 for context when predicting missing values. Higher values may improve
                 accuracy but increase API costs and processing time. Defaults to 500.
-            batch_size (int, optional): Number of requests sent in one batch
-                to optimize API usage. Defaults to 128.
+            batch_size (int | None, optional): Number of requests sent in one batch
+                to optimize API usage. Defaults to ``None`` (automatic batch size
+                optimization based on execution time). Set to a positive integer for fixed batch size.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
         Returns:
             pandas.DataFrame: A new DataFrame with missing values filled in the target
@@ -490,6 +1127,10 @@ class OpenAIVecDataFrameAccessor:
             # Fill missing values in the 'name' column
             filled_df = df.ai.fillna('name')
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({'name': [None] * 1000, 'age': list(range(1000))})
+            filled_df = large_df.ai.fillna('name', batch_size=32, show_progress=True)
             ```
         Note:
@@ -502,7 +1143,9 @@ class OpenAIVecDataFrameAccessor:
         if missing_rows.empty:
             return self._obj
-        filled_values: List[FillNaResponse] = missing_rows.ai.task(task=task, batch_size=batch_size)
+        filled_values: list[FillNaResponse] = missing_rows.ai.task(
+            task=task, batch_size=batch_size, show_progress=show_progress
+        )
         # get deep copy of the DataFrame to avoid modifying the original
         df = self._obj.copy()
@@ -519,27 +1162,106 @@ class OpenAIVecDataFrameAccessor:
         return df
     def similarity(self, col1: str, col2: str) -> pd.Series:
+        """Compute cosine similarity between two columns containing embedding vectors.
+        This method calculates the cosine similarity between vectors stored in
+        two columns of the DataFrame. The vectors should be numpy arrays or
+        array-like objects that support dot product operations.
+        Example:
+            ```python
+            df = pd.DataFrame({
+                'vec1': [np.array([1, 0, 0]), np.array([0, 1, 0])],
+                'vec2': [np.array([1, 0, 0]), np.array([1, 1, 0])]
+            })
+            similarities = df.ai.similarity('vec1', 'vec2')
+            ```
+        Args:
+            col1 (str): Name of the first column containing embedding vectors.
+            col2 (str): Name of the second column containing embedding vectors.
+        Returns:
+            pandas.Series: Series containing cosine similarity scores between
+                corresponding vectors in col1 and col2, with values ranging
+                from -1 to 1, where 1 indicates identical direction.
+        """
         return self._obj.apply(
             lambda row: np.dot(row[col1], row[col2]) / (np.linalg.norm(row[col1]) * np.linalg.norm(row[col2])),
             axis=1,
-        ).rename("similarity")
+        ).rename("similarity")  # type: ignore[arg-type]
+@pd.api.extensions.register_series_accessor("aio")
+class AsyncOpenAIVecSeriesAccessor:
+    """pandas Series accessor (``.aio``) that adds OpenAI helpers."""
+    def __init__(self, series_obj: pd.Series):
+        self._obj = series_obj
+    async def responses_with_cache(
+        self,
+        instructions: str,
+        cache: AsyncBatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] = str,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Call an LLM once for every Series element using a provided cache (asynchronously).
+        This method allows external control over caching behavior by accepting
+        a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
+        across multiple operations or custom batch size management. The concurrency
+        is controlled by the cache instance itself.
+        Example:
+            ```python
+            result = await series.aio.responses_with_cache(
+                "classify",
+                cache=shared,
+                max_output_tokens=256,
+                frequency_penalty=0.2,
+            )
+            ```
+        Args:
+            instructions (str): System prompt prepended to every user message.
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            response_format (type[ResponseFormat], optional): Pydantic model or built‑in
+                type the assistant should return. Defaults to ``str``.
+            **api_kwargs: Additional keyword arguments forwarded verbatim to
+                ``AsyncOpenAI.responses.parse`` (e.g. ``temperature``, ``top_p``,
+                ``max_output_tokens``, penalties, future parameters). Core batching keys
+                (model, instructions, input, text_format) are protected and silently
+                ignored if provided.
-@pd.api.extensions.register_series_accessor("aio")
-class AsyncOpenAIVecSeriesAccessor:
-    """pandas Series accessor (``.aio``) that adds OpenAI helpers."""
+        Returns:
+            pandas.Series: Series whose values are instances of ``response_format``.
-    def __init__(self, series_obj: pd.Series):
-        self._obj = series_obj
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        client: AsyncBatchResponses = AsyncBatchResponses(
+            client=CONTAINER.resolve(AsyncOpenAI),
+            model_name=CONTAINER.resolve(ResponsesModelName).value,
+            system_message=instructions,
+            response_format=response_format,
+            cache=cache,
+            api_kwargs=api_kwargs,
+        )
+        results = await client.parse(self._obj.tolist())
+        return pd.Series(results, index=self._obj.index, name=self._obj.name)
     async def responses(
         self,
         instructions: str,
-        response_format: Type[ResponseFormat] = str,
-        batch_size: int = 128,
-        temperature: float = 0.0,
-        top_p: float = 1.0,
+        response_format: type[ResponseFormat] = str,
+        batch_size: int | None = None,
         max_concurrency: int = 8,
+        show_progress: bool = True,
+        **api_kwargs,
     ) -> pd.Series:
         """Call an LLM once for every Series element (asynchronously).
@@ -548,22 +1270,32 @@ class AsyncOpenAIVecSeriesAccessor:
             animals = pd.Series(["cat", "dog", "elephant"])
             # Must be awaited
             results = await animals.aio.responses("translate to French")
+            # With progress bar for large datasets
+            large_series = pd.Series(["data"] * 1000)
+            results = await large_series.aio.responses(
+                "analyze this data",
+                batch_size=32,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
-            This method returns a Series of strings, each containing the
-            assistant's response to the corresponding input.
-            The model used is set by the `responses_model` function.
-            The default model is `gpt-4.1-mini`.
         Args:
             instructions (str): System prompt prepended to every user message.
-            response_format (Type[ResponseFormat], optional): Pydantic model or built‑in
+            response_format (type[ResponseFormat], optional): Pydantic model or built‑in
                 type the assistant should return. Defaults to ``str``.
-            batch_size (int, optional): Number of prompts grouped into a single
-                request. Defaults to ``128``.
-            temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            batch_size (int | None, optional): Number of prompts grouped into a single
+                request. Defaults to ``None`` (automatic batch size optimization
+                based on execution time). Set to a positive integer for fixed batch size.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional keyword arguments forwarded verbatim to
+                ``AsyncOpenAI.responses.parse`` (e.g. ``temperature``, ``top_p``,
+                ``max_output_tokens``, penalties, future parameters). Core batching keys
+                (model, instructions, input, text_format) are protected and silently
+                ignored if provided.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
@@ -571,18 +1303,64 @@ class AsyncOpenAIVecSeriesAccessor:
         Note:
             This is an asynchronous method and must be awaited.
         """
-        client: AsyncBatchResponses = AsyncBatchResponses(
-            client=_DI.resolve(AsyncOpenAI),
-            model_name=_DI.resolve(ResponsesModelName).value,
-            system_message=instructions,
+        return await self.responses_with_cache(
+            instructions=instructions,
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
-            max_concurrency=max_concurrency,
+            **api_kwargs,
+        )
+    async def embeddings_with_cache(
+        self,
+        cache: AsyncBatchingMapProxy[str, np.ndarray],
+        **api_kwargs,
+    ) -> pd.Series:
+        """Compute OpenAI embeddings for every Series element using a provided cache (asynchronously).
+        This method allows external control over caching behavior by accepting
+        a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
+        across multiple operations or custom batch size management. The concurrency
+        is controlled by the cache instance itself.
+        Example:
+            ```python
+            from openaivec._cache import AsyncBatchingMapProxy
+            import numpy as np
+            # Create a shared cache with custom batch size and concurrency
+            shared_cache = AsyncBatchingMapProxy[str, np.ndarray](
+                batch_size=64, max_concurrency=4
+            )
+            animals = pd.Series(["cat", "dog", "elephant"])
+            # Must be awaited
+            embeddings = await animals.aio.embeddings_with_cache(cache=shared_cache)
+            ```
+        Args:
+            cache (AsyncBatchingMapProxy[str, np.ndarray]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
+        Returns:
+            pandas.Series: Series whose values are ``np.ndarray`` objects
+                (dtype ``float32``).
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        client: AsyncBatchEmbeddings = AsyncBatchEmbeddings(
+            client=CONTAINER.resolve(AsyncOpenAI),
+            model_name=CONTAINER.resolve(EmbeddingsModelName).value,
+            cache=cache,
+            api_kwargs=api_kwargs,
         )
         # Await the async operation
-        results = await client.parse(self._obj.tolist(), batch_size=batch_size)
+        results = await client.create(self._obj.tolist())
         return pd.Series(
             results,
@@ -590,7 +1368,9 @@ class AsyncOpenAIVecSeriesAccessor:
             name=self._obj.name,
         )
-    async def embeddings(self, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
+    async def embeddings(
+        self, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = True, **api_kwargs
+    ) -> pd.Series:
         """Compute OpenAI embeddings for every Series element (asynchronously).
         Example:
@@ -598,17 +1378,24 @@ class AsyncOpenAIVecSeriesAccessor:
             animals = pd.Series(["cat", "dog", "elephant"])
             # Must be awaited
             embeddings = await animals.aio.embeddings()
+            # With progress bar for large datasets
+            large_texts = pd.Series(["text"] * 5000)
+            embeddings = await large_texts.aio.embeddings(
+                batch_size=100,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
-            This method returns a Series of numpy arrays, each containing the
-            embedding vector for the corresponding input.
-            The embedding model is set by the `embeddings_model` function.
-            The default embedding model is `text-embedding-3-small`.
         Args:
-            batch_size (int, optional): Number of inputs grouped into a
-                single request. Defaults to ``128``.
+            batch_size (int | None, optional): Number of inputs grouped into a
+                single request. Defaults to ``None`` (automatic batch size optimization
+                based on execution time). Set to a positive integer for fixed batch size.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -617,31 +1404,88 @@ class AsyncOpenAIVecSeriesAccessor:
         Note:
             This is an asynchronous method and must be awaited.
         """
-        client: AsyncBatchEmbeddings = AsyncBatchEmbeddings(
-            client=_DI.resolve(AsyncOpenAI),
-            model_name=_DI.resolve(EmbeddingsModelName).value,
-            max_concurrency=max_concurrency,
+        return await self.embeddings_with_cache(
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
+            **api_kwargs,
         )
-        # Await the async operation
-        results = await client.create(self._obj.tolist(), batch_size=batch_size)
+    async def task_with_cache(
+        self,
+        task: PreparedTask[ResponseFormat],
+        cache: AsyncBatchingMapProxy[str, ResponseFormat],
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on every Series element using a provided cache (asynchronously).
-        return pd.Series(
-            results,
-            index=self._obj.index,
-            name=self._obj.name,
+        This method allows external control over caching behavior by accepting
+        a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
+        across multiple operations or custom batch size management. The concurrency
+        is controlled by the cache instance itself.
+        Args:
+            task (PreparedTask): A pre-configured task containing instructions,
+                response format for processing the inputs.
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Example:
+            ```python
+            from openaivec._model import PreparedTask
+            from openaivec._cache import AsyncBatchingMapProxy
+            # Create a shared cache with custom batch size and concurrency
+            shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
+            # Assume you have a prepared task for sentiment analysis
+            sentiment_task = PreparedTask(...)
+            reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
+            # Must be awaited
+            results = await reviews.aio.task_with_cache(sentiment_task, cache=shared_cache)
+            ```
+        Additional Keyword Args:
+            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
+            ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
+            keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
+            library and cannot be overridden.
+        Returns:
+            pandas.Series: Series whose values are instances of the task's
+                response format, aligned with the original Series index.
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        client = AsyncBatchResponses(
+            client=CONTAINER.resolve(AsyncOpenAI),
+            model_name=CONTAINER.resolve(ResponsesModelName).value,
+            system_message=task.instructions,
+            response_format=task.response_format,
+            cache=cache,
+            api_kwargs=api_kwargs,
         )
+        results = await client.parse(self._obj.tolist())
-    async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
-        """Execute a prepared task on every Series element (asynchronously).
+        return pd.Series(results, index=self._obj.index, name=self._obj.name)
-        This method applies a pre-configured task to each element in the Series,
-        using the task's instructions and response format to generate structured
-        responses from the language model.
+    async def task(
+        self,
+        task: PreparedTask,
+        batch_size: int | None = None,
+        max_concurrency: int = 8,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on every Series element (asynchronously).
         Example:
             ```python
-            from openaivec.model import PreparedTask
+            from openaivec._model import PreparedTask
             # Assume you have a prepared task for sentiment analysis
             sentiment_task = PreparedTask(...)
@@ -649,17 +1493,32 @@ class AsyncOpenAIVecSeriesAccessor:
             reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
             # Must be awaited
             results = await reviews.aio.task(sentiment_task)
+            # With progress bar for large datasets
+            large_reviews = pd.Series(["review text"] * 2000)
+            results = await large_reviews.aio.task(
+                sentiment_task,
+                batch_size=50,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
-            This method returns a Series containing the task results for each
-            corresponding input element, following the task's defined structure.
         Args:
             task (PreparedTask): A pre-configured task containing instructions,
-                response format, and other parameters for processing the inputs.
-            batch_size (int, optional): Number of prompts grouped into a single
-                request to optimize API usage. Defaults to 128.
+                response format for processing the inputs.
+            batch_size (int | None, optional): Number of prompts grouped into a single
+                request to optimize API usage. Defaults to ``None`` (automatic batch size
+                optimization based on execution time). Set to a positive integer for fixed batch size.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to 8.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Note:
+            The task's stored API parameters are used. Core batching / routing
+            keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
+            library and cannot be overridden.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -668,20 +1527,117 @@ class AsyncOpenAIVecSeriesAccessor:
         Note:
             This is an asynchronous method and must be awaited.
         """
-        client = AsyncBatchResponses.of_task(
-            client=_DI.resolve(AsyncOpenAI),
-            model_name=_DI.resolve(ResponsesModelName).value,
+        return await self.task_with_cache(
             task=task,
-            max_concurrency=max_concurrency,
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
+            **api_kwargs,
         )
-        # Await the async operation
-        results = await client.parse(self._obj.tolist(), batch_size=batch_size)
+    async def parse_with_cache(
+        self,
+        instructions: str,
+        cache: AsyncBatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse Series values into structured data using an LLM with a provided cache (asynchronously).
-        return pd.Series(
-            results,
-            index=self._obj.index,
-            name=self._obj.name,
+        This async method provides external cache control while parsing Series
+        content into structured data. Automatic schema inference is performed
+        when no response format is specified.
+        Args:
+            instructions (str): Plain language description of what to extract
+                (e.g., "Extract dates, amounts, and descriptions from receipts").
+                Guides both extraction and schema inference.
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured
+                async cache for managing concurrent API calls and deduplication.
+                Set cache.batch_size=None for automatic optimization.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for parsed data. Can be a Pydantic model, built-in
+                type, or None for automatic inference. Defaults to None.
+            max_examples (int, optional): Maximum values to analyze for schema
+                inference (when response_format is None). Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters.
+        Returns:
+            pandas.Series: Series containing parsed structured data aligned
+                with the original index.
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        schema: SchemaInferenceOutput | None = None
+        if response_format is None:
+            # Use synchronous schema inference
+            schema = self._obj.ai.infer_schema(instructions=instructions, max_examples=max_examples)
+        return await self.responses_with_cache(
+            instructions=schema.inference_prompt if schema else instructions,
+            cache=cache,
+            response_format=response_format or schema.model,
+            **api_kwargs,
+        )
+    async def parse(
+        self,
+        instructions: str,
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        batch_size: int | None = None,
+        max_concurrency: int = 8,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse Series values into structured data using an LLM (asynchronously).
+        Async version of the parse method, extracting structured information
+        from unstructured text with automatic schema inference when needed.
+        Args:
+            instructions (str): Plain language extraction goals (e.g., "Extract
+                product names, prices, and categories from descriptions").
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure. None triggers automatic schema inference. Defaults to None.
+            max_examples (int, optional): Maximum values for schema inference.
+                Defaults to 100.
+            batch_size (int | None, optional): Requests per batch. None for
+                automatic optimization. Defaults to None.
+            max_concurrency (int, optional): Maximum concurrent API requests.
+                Defaults to 8.
+            show_progress (bool, optional): Show progress bar. Defaults to True.
+            **api_kwargs: Additional OpenAI API parameters.
+        Returns:
+            pandas.Series: Parsed structured data indexed like the original Series.
+        Example:
+            ```python
+            emails = pd.Series([
+                "Meeting tomorrow at 3pm with John about Q4 planning",
+                "Lunch with Sarah on Friday to discuss new project"
+            ])
+            # Async extraction with schema inference
+            parsed = await emails.aio.parse(
+                "Extract meeting details including time, person, and topic"
+            )
+            ```
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        return await self.parse_with_cache(
+            instructions=instructions,
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
+            response_format=response_format,
+            max_examples=max_examples,
+            **api_kwargs,
         )
@@ -692,78 +1648,163 @@ class AsyncOpenAIVecDataFrameAccessor:
     def __init__(self, df_obj: pd.DataFrame):
         self._obj = df_obj
+    async def responses_with_cache(
+        self,
+        instructions: str,
+        cache: AsyncBatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] = str,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Generate a response for each row after serializing it to JSON using a provided cache (asynchronously).
+        This method allows external control over caching behavior by accepting
+        a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
+        across multiple operations or custom batch size management. The concurrency
+        is controlled by the cache instance itself.
+        Example:
+            ```python
+            from openaivec._cache import AsyncBatchingMapProxy
+            # Create a shared cache with custom batch size and concurrency
+            shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
+            df = pd.DataFrame([
+                {"name": "cat", "legs": 4},
+                {"name": "dog", "legs": 4},
+                {"name": "elephant", "legs": 4},
+            ])
+            # Must be awaited
+            result = await df.aio.responses_with_cache(
+                "what is the animal's name?",
+                cache=shared_cache
+            )
+            ```
+        Args:
+            instructions (str): System prompt for the assistant.
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            response_format (type[ResponseFormat], optional): Desired Python type of the
+                responses. Defaults to ``str``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
+        Returns:
+            pandas.Series: Responses aligned with the DataFrame's original index.
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        # Await the call to the async Series method using .aio
+        return await _df_rows_to_json_series(self._obj).aio.responses_with_cache(
+            instructions=instructions,
+            cache=cache,
+            response_format=response_format,
+            **api_kwargs,
+        )
     async def responses(
         self,
         instructions: str,
-        response_format: Type[ResponseFormat] = str,
-        batch_size: int = 128,
-        temperature: float = 0.0,
-        top_p: float = 1.0,
+        response_format: type[ResponseFormat] = str,
+        batch_size: int | None = None,
         max_concurrency: int = 8,
+        show_progress: bool = True,
+        **api_kwargs,
     ) -> pd.Series:
-        """Generate a response for each row after serialising it to JSON (asynchronously).
+        """Generate a response for each row after serializing it to JSON (asynchronously).
         Example:
             ```python
             df = pd.DataFrame([
-                {\"name\": \"cat\", \"legs\": 4},
-                {\"name\": \"dog\", \"legs\": 4},
-                {\"name\": \"elephant\", \"legs\": 4},
+                {"name": "cat", "legs": 4},
+                {"name": "dog", "legs": 4},
+                {"name": "elephant", "legs": 4},
             ])
             # Must be awaited
-            results = await df.aio.responses(\"what is the animal\'s name?\")
+            results = await df.aio.responses("what is the animal's name?")
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            results = await large_df.aio.responses(
+                "generate a name for this ID",
+                batch_size=20,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
-            This method returns a Series of strings, each containing the
-            assistant's response to the corresponding input.
-            Each row is serialised to JSON before being sent to the assistant.
-            The model used is set by the `responses_model` function.
-            The default model is `gpt-4.1-mini`.
         Args:
             instructions (str): System prompt for the assistant.
-            response_format (Type[ResponseFormat], optional): Desired Python type of the
+            response_format (type[ResponseFormat], optional): Desired Python type of the
                 responses. Defaults to ``str``.
-            batch_size (int, optional): Number of requests sent in one batch.
-                Defaults to ``128``.
-            temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            batch_size (int | None, optional): Number of requests sent in one batch.
+                Defaults to ``None`` (automatic batch size optimization
+                based on execution time). Set to a positive integer for fixed batch size.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
         Returns:
-            pandas.Series: Responses aligned with the DataFrame’s original index.
+            pandas.Series: Responses aligned with the DataFrame's original index.
         Note:
             This is an asynchronous method and must be awaited.
         """
-        series_of_json = self._obj.pipe(
-            lambda df: (
-                pd.Series(df.to_dict(orient="records"), index=df.index, name="record").map(
-                    lambda x: json.dumps(x, ensure_ascii=False)
-                )
-            )
-        )
-        # Await the call to the async Series method using .aio
-        return await series_of_json.aio.responses(
+        return await self.responses_with_cache(
             instructions=instructions,
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
             response_format=response_format,
-            batch_size=batch_size,
-            temperature=temperature,
-            top_p=top_p,
-            max_concurrency=max_concurrency,
+            **api_kwargs,
         )
-    async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
-        """Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
+    async def task_with_cache(
+        self,
+        task: PreparedTask[ResponseFormat],
+        cache: AsyncBatchingMapProxy[str, ResponseFormat],
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on each DataFrame row using a provided cache (asynchronously).
+        After serializing each row to JSON, this method executes the prepared task.
+        Args:
+            task (PreparedTask): Prepared task (instructions + response_format).
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre‑configured async cache instance.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Note:
+            Core routing keys are managed internally.
+        Returns:
+            pandas.Series: Task results aligned with the DataFrame's original index.
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        return await _df_rows_to_json_series(self._obj).aio.task_with_cache(
+            task=task,
+            cache=cache,
+            **api_kwargs,
+        )
-        This method applies a pre-configured task to each row in the DataFrame,
-        using the task's instructions and response format to generate structured
-        responses from the language model. Each row is serialised to JSON before
-        being processed by the task.
+    async def task(
+        self,
+        task: PreparedTask,
+        batch_size: int | None = None,
+        max_concurrency: int = 8,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Execute a prepared task on each DataFrame row after serializing it to JSON (asynchronously).
         Example:
             ```python
-            from openaivec.model import PreparedTask
+            from openaivec._model import PreparedTask
             # Assume you have a prepared task for data analysis
             analysis_task = PreparedTask(...)
@@ -775,17 +1816,31 @@ class AsyncOpenAIVecDataFrameAccessor:
             ])
             # Must be awaited
             results = await df.aio.task(analysis_task)
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            results = await large_df.aio.task(
+                analysis_task,
+                batch_size=50,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
-            This method returns a Series containing the task results for each
-            corresponding row, following the task's defined structure.
         Args:
             task (PreparedTask): A pre-configured task containing instructions,
-                response format, and other parameters for processing the inputs.
-            batch_size (int, optional): Number of requests sent in one batch
-                to optimize API usage. Defaults to 128.
+                response format for processing the inputs.
+            batch_size (int | None, optional): Number of requests sent in one batch
+                to optimize API usage. Defaults to ``None`` (automatic batch size
+                optimization based on execution time). Set to a positive integer for fixed batch size.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to 8.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
+            **api_kwargs: Additional OpenAI API parameters forwarded to the Responses API.
+        Note:
+            Core batching / routing keys (``model``, ``instructions`` / system message, user ``input``)
+            are managed by the library and cannot be overridden.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -794,27 +1849,131 @@ class AsyncOpenAIVecDataFrameAccessor:
         Note:
             This is an asynchronous method and must be awaited.
         """
-        series_of_json = self._obj.pipe(
-            lambda df: (
-                pd.Series(df.to_dict(orient="records"), index=df.index, name="record").map(
-                    lambda x: json.dumps(x, ensure_ascii=False)
-                )
-            )
-        )
         # Await the call to the async Series method using .aio
-        return await series_of_json.aio.task(
+        return await _df_rows_to_json_series(self._obj).aio.task(
             task=task,
             batch_size=batch_size,
             max_concurrency=max_concurrency,
+            show_progress=show_progress,
+            **api_kwargs,
         )
-    async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:
+    async def parse_with_cache(
+        self,
+        instructions: str,
+        cache: AsyncBatchingMapProxy[str, ResponseFormat],
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse DataFrame rows into structured data using an LLM with cache (asynchronously).
+        Async method for parsing DataFrame rows (as JSON) with external cache
+        control, enabling deduplication across operations and concurrent processing.
+        Args:
+            instructions (str): Plain language extraction goals (e.g., "Extract
+                invoice details including items, quantities, and totals").
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured
+                async cache for concurrent API call management.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure. None triggers automatic schema inference. Defaults to None.
+            max_examples (int, optional): Maximum rows for schema inference.
+                Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters.
+        Returns:
+            pandas.Series: Parsed structured data indexed like the original DataFrame.
+        Note:
+            This is an asynchronous method and must be awaited.
+        """
+        return await _df_rows_to_json_series(self._obj).aio.parse_with_cache(
+            instructions=instructions,
+            cache=cache,
+            response_format=response_format,
+            max_examples=max_examples,
+            **api_kwargs,
+        )
+    async def parse(
+        self,
+        instructions: str,
+        response_format: type[ResponseFormat] | None = None,
+        max_examples: int = 100,
+        batch_size: int | None = None,
+        max_concurrency: int = 8,
+        show_progress: bool = True,
+        **api_kwargs,
+    ) -> pd.Series:
+        """Parse DataFrame rows into structured data using an LLM (asynchronously).
+        Async version for extracting structured information from DataFrame rows,
+        with automatic schema inference when no format is specified.
+        Args:
+            instructions (str): Plain language extraction goals (e.g., "Extract
+                customer details, order items, and payment information").
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure. None triggers automatic inference. Defaults to None.
+            max_examples (int, optional): Maximum rows for schema inference.
+                Defaults to 100.
+            batch_size (int | None, optional): Rows per batch. None for
+                automatic optimization. Defaults to None.
+            max_concurrency (int, optional): Maximum concurrent requests.
+                Defaults to 8.
+            show_progress (bool, optional): Show progress bar. Defaults to True.
+            **api_kwargs: Additional OpenAI API parameters.
+        Returns:
+            pandas.Series: Parsed structured data indexed like the original DataFrame.
+        Example:
+            ```python
+            df = pd.DataFrame({
+                'raw_data': [
+                    'Customer: John Doe, Order: 2 laptops @ $1200 each',
+                    'Customer: Jane Smith, Order: 5 phones @ $800 each'
+                ]
+            })
+            # Async parsing with automatic schema inference
+            parsed = await df.aio.parse(
+                "Extract customer name, product, quantity, and unit price"
+            )
+            ```
+        Note:
+            This is an asynchronous method and must be awaited.
         """
-        Apply a function to the DataFrame, supporting both synchronous and asynchronous functions.
+        return await self.parse_with_cache(
+            instructions=instructions,
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
+            response_format=response_format,
+            max_examples=max_examples,
+            **api_kwargs,
+        )
+    async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:
+        """Apply a function to the DataFrame, supporting both synchronous and asynchronous functions.
         This method allows chaining operations on the DataFrame, similar to pandas' `pipe` method,
         but with support for asynchronous functions.
+        Example:
+            ```python
+            async def process_data(df):
+                # Simulate an asynchronous computation
+                await asyncio.sleep(1)
+                return df.dropna()
+            df = pd.DataFrame({"col": [1, 2, None, 4]})
+            # Must be awaited
+            result = await df.aio.pipe(process_data)
+            ```
         Args:
             func (Callable[[pd.DataFrame], Awaitable[T] | T]): A function that takes a DataFrame
                 as input and returns either a result or an awaitable result.
@@ -831,7 +1990,7 @@ class AsyncOpenAIVecDataFrameAccessor:
         else:
             return result
-    async def assign(self, **kwargs: Any) -> pd.DataFrame:
+    async def assign(self, **kwargs) -> pd.DataFrame:
         """Asynchronously assign new columns to the DataFrame, evaluating sequentially.
         This method extends pandas' `assign` method by supporting asynchronous
@@ -866,7 +2025,7 @@ class AsyncOpenAIVecDataFrameAccessor:
             ```
         Args:
-            **kwargs: Any. Column names as keys and either static values or callables
+            **kwargs: Column names as keys and either static values or callables
                 (synchronous or asynchronous) as values.
         Returns:
@@ -891,7 +2050,12 @@ class AsyncOpenAIVecDataFrameAccessor:
         return df_current
     async def fillna(
-        self, target_column_name: str, max_examples: int = 500, batch_size: int = 128, max_concurrency: int = 8
+        self,
+        target_column_name: str,
+        max_examples: int = 500,
+        batch_size: int | None = None,
+        max_concurrency: int = 8,
+        show_progress: bool = True,
     ) -> pd.DataFrame:
         """Fill missing values in a DataFrame column using AI-powered inference (asynchronously).
@@ -906,10 +2070,12 @@ class AsyncOpenAIVecDataFrameAccessor:
             max_examples (int, optional): The maximum number of example rows to use
                 for context when predicting missing values. Higher values may improve
                 accuracy but increase API costs and processing time. Defaults to 500.
-            batch_size (int, optional): Number of requests sent in one batch
-                to optimize API usage. Defaults to 128.
+            batch_size (int | None, optional): Number of requests sent in one batch
+                to optimize API usage. Defaults to ``None`` (automatic batch size
+                optimization based on execution time). Set to a positive integer for fixed batch size.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to 8.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
         Returns:
             pandas.DataFrame: A new DataFrame with missing values filled in the target
@@ -925,6 +2091,15 @@ class AsyncOpenAIVecDataFrameAccessor:
             # Fill missing values in the 'name' column (must be awaited)
             filled_df = await df.aio.fillna('name')
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({'name': [None] * 1000, 'age': list(range(1000))})
+            filled_df = await large_df.aio.fillna(
+                'name',
+                batch_size=32,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
         Note:
@@ -938,8 +2113,11 @@ class AsyncOpenAIVecDataFrameAccessor:
         if missing_rows.empty:
             return self._obj
-        filled_values: List[FillNaResponse] = await missing_rows.aio.task(
-            task=task, batch_size=batch_size, max_concurrency=max_concurrency
+        filled_values: list[FillNaResponse] = await missing_rows.aio.task(
+            task=task,
+            batch_size=batch_size,
+            max_concurrency=max_concurrency,
+            show_progress=show_progress,
         )
         # get deep copy of the DataFrame to avoid modifying the original

openaivec 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl

openaivec 0.12.5py3-none-any.whl → 1.0.10py3-none-any.whl