PyPI - openaivec - Versions diffs - 0.14.10__py3-none-any.whl → 0.14.13__py3-none-any.whl - Mend

openaivec 0.14.10py3-none-any.whl → 0.14.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

openaivec/_di.py +21 -0
openaivec/_embeddings.py +17 -4
openaivec/_model.py +7 -12
openaivec/_prompt.py +3 -6
openaivec/_provider.py +8 -29
openaivec/_responses.py +39 -117
openaivec/_schema.py +27 -23
openaivec/pandas_ext.py +356 -343
openaivec/spark.py +253 -115
openaivec/task/__init__.py +1 -1
openaivec/task/customer_support/customer_sentiment.py +4 -9
openaivec/task/customer_support/inquiry_classification.py +5 -8
openaivec/task/customer_support/inquiry_summary.py +5 -6
openaivec/task/customer_support/intent_analysis.py +5 -7
openaivec/task/customer_support/response_suggestion.py +5 -8
openaivec/task/customer_support/urgency_analysis.py +5 -8
openaivec/task/nlp/dependency_parsing.py +1 -2
openaivec/task/nlp/keyword_extraction.py +1 -2
openaivec/task/nlp/morphological_analysis.py +1 -2
openaivec/task/nlp/named_entity_recognition.py +1 -2
openaivec/task/nlp/sentiment_analysis.py +1 -2
openaivec/task/nlp/translation.py +1 -1
openaivec/task/table/fillna.py +8 -3
{openaivec-0.14.10.dist-info → openaivec-0.14.13.dist-info}/METADATA +40 -16
openaivec-0.14.13.dist-info/RECORD +37 -0
openaivec-0.14.10.dist-info/RECORD +0 -37
{openaivec-0.14.10.dist-info → openaivec-0.14.13.dist-info}/WHEEL +0 -0
{openaivec-0.14.10.dist-info → openaivec-0.14.13.dist-info}/licenses/LICENSE +0 -0

openaivec/pandas_ext.py CHANGED Viewed

@@ -181,8 +181,6 @@ class OpenAIVecSeriesAccessor:
         instructions: str,
         cache: BatchingMapProxy[str, ResponseFormat],
         response_format: type[ResponseFormat] = str,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
         """Call an LLM once for every Series element using a provided cache.
@@ -196,40 +194,30 @@ class OpenAIVecSeriesAccessor:
                 batching and deduplication control.
             response_format (type[ResponseFormat], optional): Pydantic model or built-in
                 type the assistant should return. Defaults to ``str``.
-            temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) are forwarded verbatim to the underlying client.
+            **api_kwargs: Arbitrary OpenAI Responses API parameters (e.g. ``temperature``,
+                ``top_p``, ``frequency_penalty``, ``presence_penalty``, ``seed``, etc.) are
+                forwarded verbatim to the underlying client.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
         """
         client: BatchResponses = BatchResponses(
             client=CONTAINER.resolve(OpenAI),
             model_name=CONTAINER.resolve(ResponsesModelName).value,
             system_message=instructions,
             response_format=response_format,
             cache=cache,
-            temperature=temperature,
-            top_p=top_p,
+            api_kwargs=api_kwargs,
         )
-        # Forward any extra kwargs to the underlying Responses API, excluding proxy-specific ones.
-        proxy_params = {"show_progress", "batch_size"}
-        filtered_kwargs = {k: v for k, v in api_kwargs.items() if k not in proxy_params}
-        return pd.Series(
-            client.parse(self._obj.tolist(), **filtered_kwargs), index=self._obj.index, name=self._obj.name
-        )
+        return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
     def responses(
         self,
         instructions: str,
         response_format: type[ResponseFormat] = str,
         batch_size: int | None = None,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         show_progress: bool = False,
         **api_kwargs,
     ) -> pd.Series:
@@ -248,6 +236,12 @@ class OpenAIVecSeriesAccessor:
                 batch_size=32,
                 show_progress=True
             )
+            # With custom temperature
+            animals.ai.responses(
+                "translate creatively",
+                temperature=0.8
+            )
             ```
         Args:
@@ -257,9 +251,8 @@ class OpenAIVecSeriesAccessor:
             batch_size (int | None, optional): Number of prompts grouped into a single
                 request. Defaults to ``None`` (automatic batch size optimization
                 based on execution time). Set to a positive integer for fixed batch size.
-            temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
@@ -268,14 +261,13 @@ class OpenAIVecSeriesAccessor:
             instructions=instructions,
             cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
     def embeddings_with_cache(
         self,
         cache: BatchingMapProxy[str, np.ndarray],
+        **api_kwargs,
     ) -> pd.Series:
         """Compute OpenAI embeddings for every Series element using a provided cache.
@@ -299,6 +291,7 @@ class OpenAIVecSeriesAccessor:
             cache (BatchingMapProxy[str, np.ndarray]): Pre-configured cache
                 instance for managing API call batching and deduplication.
                 Set cache.batch_size=None to enable automatic batch size optimization.
+            **api_kwargs: Additional keyword arguments to pass to the OpenAI API.
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -308,6 +301,7 @@ class OpenAIVecSeriesAccessor:
             client=CONTAINER.resolve(OpenAI),
             model_name=CONTAINER.resolve(EmbeddingsModelName).value,
             cache=cache,
+            api_kwargs=api_kwargs,
         )
         return pd.Series(
@@ -316,7 +310,7 @@ class OpenAIVecSeriesAccessor:
             name=self._obj.name,
         )
-    def embeddings(self, batch_size: int | None = None, show_progress: bool = False) -> pd.Series:
+    def embeddings(self, batch_size: int | None = None, show_progress: bool = False, **api_kwargs) -> pd.Series:
         """Compute OpenAI embeddings for every Series element.
         Example:
@@ -338,6 +332,7 @@ class OpenAIVecSeriesAccessor:
                 single request. Defaults to ``None`` (automatic batch size optimization
                 based on execution time). Set to a positive integer for fixed batch size.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -345,18 +340,18 @@ class OpenAIVecSeriesAccessor:
         """
         return self.embeddings_with_cache(
             cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
+            **api_kwargs,
         )
     def task_with_cache(
         self,
         task: PreparedTask[ResponseFormat],
         cache: BatchingMapProxy[str, ResponseFormat],
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on every Series element using a provided cache.
         This mirrors ``responses_with_cache`` but uses the task's stored instructions,
-        response format, temperature and top_p. A supplied ``BatchingMapProxy`` enables
+        response format, and API parameters. A supplied ``BatchingMapProxy`` enables
         cross‑operation deduplicated reuse and external batch size / progress control.
         Example:
@@ -370,10 +365,9 @@ class OpenAIVecSeriesAccessor:
             task (PreparedTask): Prepared task (instructions + response_format + sampling params).
             cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) forwarded verbatim to the underlying client. Core routing keys
-            (``model``, system instructions, user input) are managed internally and cannot be overridden.
+        Note:
+            The task's stored API parameters are used. Core routing keys (``model``, system
+            instructions, user input) are managed internally and cannot be overridden.
         Returns:
             pandas.Series: Task results aligned with the original Series index.
@@ -384,17 +378,15 @@ class OpenAIVecSeriesAccessor:
             system_message=task.instructions,
             response_format=task.response_format,
             cache=cache,
-            temperature=task.temperature,
-            top_p=task.top_p,
+            api_kwargs=task.api_kwargs,
         )
-        return pd.Series(client.parse(self._obj.tolist(), **api_kwargs), index=self._obj.index, name=self._obj.name)
+        return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
     def task(
         self,
         task: PreparedTask,
         batch_size: int | None = None,
         show_progress: bool = False,
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on every Series element.
@@ -426,10 +418,9 @@ class OpenAIVecSeriesAccessor:
                 optimization based on execution time). Set to a positive integer for fixed batch size.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
-            keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
+        Note:
+            The task's stored API parameters are used. Core batching / routing keys
+            (``model``, ``instructions`` / system message, user ``input``) are managed by the
             library and cannot be overridden.
         Returns:
@@ -438,148 +429,204 @@ class OpenAIVecSeriesAccessor:
         return self.task_with_cache(
             task=task,
             cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
-            **api_kwargs,
         )
     def parse_with_cache(
         self,
         instructions: str,
         cache: BatchingMapProxy[str, ResponseFormat],
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
         """Parse Series values using an LLM with a provided cache.
-        This method allows you to parse the Series content into structured data
-        using an LLM, optionally inferring a schema based on the provided purpose.
+        This method allows external control over caching behavior while parsing
+        Series content into structured data. If no response format is provided,
+        the method automatically infers an appropriate schema by analyzing the
+        data patterns.
         Args:
-            instructions (str): System prompt for the LLM.
-            cache (BatchingMapProxy[str, BaseModel]): Explicit cache instance for
-                batching and deduplication control.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
-                Defaults to 100.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. `frequency_penalty`, `presence_penalty`,
-            `seed`, etc.) are forwarded verbatim to the underlying client.
+            instructions (str): Plain language description of what information
+                to extract (e.g., "Extract customer information including name
+                and contact details"). This guides both the extraction process
+                and schema inference.
+            cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None to enable automatic batch size optimization.
+            response_format (type[ResponseFormat] | None, optional): Target structure
+                for the parsed data. Can be a Pydantic model class, built-in type
+                (str, int, float, bool, list, dict), or None. If None, the method
+                infers an appropriate schema based on the instructions and data.
+                Defaults to None.
+            max_examples (int, optional): Maximum number of Series values to
+                analyze when inferring the schema. Only used when response_format
+                is None. Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p,
+                frequency_penalty, presence_penalty, seed, etc.) forwarded to
+                the underlying API calls.
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Series containing parsed structured data. Each value
+                is an instance of the specified response_format or the inferred
+                schema model, aligned with the original Series index.
         """
         schema: InferredSchema | None = None
         if response_format is None:
-            schema = self.infer_schema(purpose=instructions, max_examples=max_examples, **api_kwargs)
+            schema = self.infer_schema(instructions=instructions, max_examples=max_examples, **api_kwargs)
         return self.responses_with_cache(
             instructions=schema.inference_prompt if schema else instructions,
             cache=cache,
             response_format=response_format or schema.model,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
     def parse(
         self,
         instructions: str,
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
         batch_size: int | None = None,
         show_progress: bool = False,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
-        """Parse Series values using an LLM with optional schema inference.
+        """Parse Series values into structured data using an LLM.
-        This method allows you to parse the Series content into structured data
-        using an LLM, optionally inferring a schema based on the provided purpose.
+        This method extracts structured information from unstructured text in
+        the Series. When no response format is provided, it automatically
+        infers an appropriate schema by analyzing patterns in the data.
         Args:
-            instructions (str): System prompt for the LLM.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
-                Defaults to 100.
-            batch_size (int | None): Number of requests to process in parallel.
-                Defaults to None (automatic optimization).
-            show_progress (bool): Whether to display a progress bar during processing.
-                Defaults to False.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
+            instructions (str): Plain language description of what information
+                to extract (e.g., "Extract product details including price,
+                category, and availability"). This guides both the extraction
+                process and schema inference.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for the parsed data. Can be a Pydantic model class,
+                built-in type (str, int, float, bool, list, dict), or None.
+                If None, automatically infers a schema. Defaults to None.
+            max_examples (int, optional): Maximum number of Series values to
+                analyze when inferring schema. Only used when response_format
+                is None. Defaults to 100.
+            batch_size (int | None, optional): Number of requests to process
+                per batch. None enables automatic optimization. Defaults to None.
+            show_progress (bool, optional): Display progress bar in Jupyter
+                notebooks. Defaults to False.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p,
+                frequency_penalty, presence_penalty, seed, etc.).
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Series containing parsed structured data as instances
+                of response_format or the inferred schema model.
+        Example:
+            ```python
+            # With explicit schema
+            from pydantic import BaseModel
+            class Product(BaseModel):
+                name: str
+                price: float
+                in_stock: bool
+            descriptions = pd.Series([
+                "iPhone 15 Pro - $999, available now",
+                "Samsung Galaxy S24 - $899, out of stock"
+            ])
+            products = descriptions.ai.parse(
+                "Extract product information",
+                response_format=Product
+            )
+            # With automatic schema inference
+            reviews = pd.Series([
+                "Great product! 5 stars. Fast shipping.",
+                "Poor quality. 2 stars. Slow delivery."
+            ])
+            parsed = reviews.ai.parse(
+                "Extract review rating and shipping feedback"
+            )
+            ```
         """
         return self.parse_with_cache(
             instructions=instructions,
             cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
             max_examples=max_examples,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
-    def infer_schema(self, purpose: str, max_examples: int = 100, **api_kwargs) -> InferredSchema:
+    def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) -> InferredSchema:
         """Infer a structured data schema from Series content using AI.
-        This method analyzes a sample of the Series values to automatically infer
-        a structured schema that can be used for consistent data extraction.
-        The inferred schema includes field names, types, descriptions, and
-        potential enum values based on patterns found in the data.
+        This method analyzes a sample of Series values to automatically generate
+        a Pydantic model that captures the relevant information structure. The
+        inferred schema supports both flat and hierarchical (nested) structures,
+        making it suitable for complex data extraction tasks.
         Args:
-            purpose (str): Plain language description of how the extracted
-                structured data will be used (e.g., "Extract customer sentiment
-                signals for analytics", "Parse product features for search").
-                This guides field relevance and helps exclude irrelevant information.
-            max_examples (int): Maximum number of examples to analyze from the
-                Series. The method will sample randomly from the Series up to this
-                limit. Defaults to 100.
+            instructions (str): Plain language description of the extraction goal
+                (e.g., "Extract customer information for CRM system", "Parse
+                event details for calendar integration"). This guides which
+                fields to include and their purpose.
+            max_examples (int, optional): Maximum number of Series values to
+                analyze for pattern detection. The method samples randomly up
+                to this limit. Higher values may improve schema quality but
+                increase inference time. Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters for fine-tuning
+                the inference process.
         Returns:
-            InferredSchema: An object containing:
-                - purpose: Normalized statement of the extraction objective
-                - fields: List of field specifications with names, types, and descriptions
-                - inference_prompt: Reusable prompt for future extractions
-                - model: Dynamically generated Pydantic model for parsing
-                - task: PreparedTask for batch extraction operations
+            InferredSchema: A comprehensive schema object containing:
+                - instructions: Refined extraction objective statement
+                - fields: Hierarchical field specifications with names, types,
+                  descriptions, and nested structures where applicable
+                - inference_prompt: Optimized prompt for consistent extraction
+                - model: Dynamically generated Pydantic model class supporting
+                  both flat and nested structures
+                - task: PreparedTask configured for batch extraction using
+                  the inferred schema
         Example:
             ```python
+            # Simple flat structure
             reviews = pd.Series([
-                "Great product! Fast shipping and excellent quality.",
-                "Terrible experience. Item broke after 2 days.",
-                "Average product. Price is fair but nothing special."
+                "5 stars! Great product, fast shipping to NYC.",
+                "2 stars. Product broke, slow delivery to LA."
             ])
-            # Infer schema for sentiment analysis
             schema = reviews.ai.infer_schema(
-                purpose="Extract sentiment and product quality indicators"
+                "Extract review ratings and shipping information"
+            )
+            # Hierarchical structure
+            orders = pd.Series([
+                "Order #123: John Doe, 123 Main St, NYC. Items: iPhone ($999), Case ($29)",
+                "Order #456: Jane Smith, 456 Oak Ave, LA. Items: iPad ($799)"
+            ])
+            schema = orders.ai.infer_schema(
+                "Extract order details including customer and items"
             )
+            # Inferred schema may include nested structures like:
+            # - customer: {name: str, address: str, city: str}
+            # - items: [{product: str, price: float}]
-            # Use the inferred schema for batch extraction
-            extracted = reviews.ai.task(schema.task)
+            # Apply the schema for extraction
+            extracted = orders.ai.task(schema.task)
             ```
         Note:
-            The schema inference uses AI to analyze patterns in the data and may
-            require multiple attempts to produce a valid schema. Fields are limited
-            to primitive types (string, integer, float, boolean) with optional
-            enum values for categorical fields.
+            The inference process uses multiple AI iterations to ensure schema
+            validity. Nested structures are automatically detected when the
+            data contains hierarchical relationships. The generated Pydantic
+            model ensures type safety and validation for all extracted data.
         """
         inferer = CONTAINER.resolve(SchemaInferer)
         input: SchemaInferenceInput = SchemaInferenceInput(
-            examples=self._obj.sample(n=min(max_examples, len(self._obj))).tolist(), purpose=purpose, **api_kwargs
+            examples=self._obj.sample(n=min(max_examples, len(self._obj))).tolist(),
+            instructions=instructions,
+            **api_kwargs,
         )
         return inferer.infer_schema(input)
@@ -642,8 +689,6 @@ class OpenAIVecDataFrameAccessor:
         instructions: str,
         cache: BatchingMapProxy[str, ResponseFormat],
         response_format: type[ResponseFormat] = str,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
         """Generate a response for each row after serializing it to JSON using a provided cache.
@@ -677,8 +722,7 @@ class OpenAIVecDataFrameAccessor:
                 Set cache.batch_size=None to enable automatic batch size optimization.
             response_format (type[ResponseFormat], optional): Desired Python type of the
                 responses. Defaults to ``str``.
-            temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
         Returns:
             pandas.Series: Responses aligned with the DataFrame's original index.
@@ -687,8 +731,6 @@ class OpenAIVecDataFrameAccessor:
             instructions=instructions,
             cache=cache,
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
@@ -697,8 +739,6 @@ class OpenAIVecDataFrameAccessor:
         instructions: str,
         response_format: type[ResponseFormat] = str,
         batch_size: int | None = None,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         show_progress: bool = False,
         **api_kwargs,
     ) -> pd.Series:
@@ -730,9 +770,8 @@ class OpenAIVecDataFrameAccessor:
             batch_size (int | None, optional): Number of requests sent in one batch.
                 Defaults to ``None`` (automatic batch size optimization
                 based on execution time). Set to a positive integer for fixed batch size.
-            temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
         Returns:
             pandas.Series: Responses aligned with the DataFrame's original index.
@@ -741,8 +780,6 @@ class OpenAIVecDataFrameAccessor:
             instructions=instructions,
             cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
@@ -750,7 +787,6 @@ class OpenAIVecDataFrameAccessor:
         self,
         task: PreparedTask[ResponseFormat],
         cache: BatchingMapProxy[str, ResponseFormat],
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on each DataFrame row after serializing it to JSON using a provided cache.
@@ -758,9 +794,8 @@ class OpenAIVecDataFrameAccessor:
             task (PreparedTask): Prepared task (instructions + response_format + sampling params).
             cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``) forwarded verbatim. Core routing keys are managed internally.
+        Note:
+            The task's stored API parameters are used. Core routing keys are managed internally.
         Returns:
             pandas.Series: Task results aligned with the DataFrame's original index.
@@ -768,7 +803,6 @@ class OpenAIVecDataFrameAccessor:
         return _df_rows_to_json_series(self._obj).ai.task_with_cache(
             task=task,
             cache=cache,
-            **api_kwargs,
         )
     def task(
@@ -776,7 +810,6 @@ class OpenAIVecDataFrameAccessor:
         task: PreparedTask,
         batch_size: int | None = None,
         show_progress: bool = False,
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on each DataFrame row after serializing it to JSON.
@@ -812,9 +845,8 @@ class OpenAIVecDataFrameAccessor:
                 optimization based on execution time). Set to a positive integer for fixed batch size.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
+        Note:
+            The task's stored API parameters are used. Core batching / routing
             keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
             library and cannot be overridden.
@@ -826,99 +858,108 @@ class OpenAIVecDataFrameAccessor:
             task=task,
             batch_size=batch_size,
             show_progress=show_progress,
-            **api_kwargs,
         )
     def parse_with_cache(
         self,
         instructions: str,
         cache: BatchingMapProxy[str, ResponseFormat],
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
-        """Parse DataFrame rows using an LLM with a provided cache.
+        """Parse DataFrame rows into structured data using an LLM with a provided cache.
-        This method allows you to parse each DataFrame row (serialized as JSON)
-        into structured data using an LLM, optionally inferring a schema based
-        on the provided purpose.
+        This method processes each DataFrame row (converted to JSON) and extracts
+        structured information using an LLM. External cache control enables
+        deduplication across operations and custom batch management.
         Args:
-            instructions (str): System prompt for the LLM.
-            cache (BatchingMapProxy[str, ResponseFormat]): Explicit cache instance for
-                batching and deduplication control.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
-                Defaults to 100.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. `frequency_penalty`, `presence_penalty`,
-            `seed`, etc.) are forwarded verbatim to the underlying client.
+            instructions (str): Plain language description of what information
+                to extract from each row (e.g., "Extract shipping details and
+                order status"). Guides both extraction and schema inference.
+            cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
+                instance for managing API call batching and deduplication.
+                Set cache.batch_size=None for automatic optimization.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for parsed data. Can be a Pydantic model, built-in
+                type, or None for automatic schema inference. Defaults to None.
+            max_examples (int, optional): Maximum rows to analyze when inferring
+                schema (only used when response_format is None). Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p,
+                frequency_penalty, presence_penalty, seed, etc.).
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Series containing parsed structured data as instances
+                of response_format or the inferred schema model, indexed like
+                the original DataFrame.
         """
         return _df_rows_to_json_series(self._obj).ai.parse_with_cache(
             instructions=instructions,
             cache=cache,
             response_format=response_format,
             max_examples=max_examples,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
     def parse(
         self,
         instructions: str,
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
         batch_size: int | None = None,
         show_progress: bool = False,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
-        """Parse DataFrame rows using an LLM with optional schema inference.
+        """Parse DataFrame rows into structured data using an LLM.
-        This method allows you to parse each DataFrame row (serialized as JSON)
-        into structured data using an LLM, optionally inferring a schema based
-        on the provided purpose.
+        Each row is converted to JSON and processed to extract structured
+        information. When no response format is provided, the method
+        automatically infers an appropriate schema from the data.
         Args:
-            instructions (str): System prompt for the LLM.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
-                Defaults to 100.
-            batch_size (int | None): Number of requests to process in parallel.
-                Defaults to None (automatic optimization).
-            show_progress (bool): Whether to display a progress bar during processing.
-                Defaults to False.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
+            instructions (str): Plain language description of extraction goals
+                (e.g., "Extract transaction details including amount, date,
+                and merchant"). Guides extraction and schema inference.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for parsed data. Can be a Pydantic model, built-in
+                type, or None for automatic inference. Defaults to None.
+            max_examples (int, optional): Maximum rows to analyze for schema
+                inference (when response_format is None). Defaults to 100.
+            batch_size (int | None, optional): Rows per API batch. None
+                enables automatic optimization. Defaults to None.
+            show_progress (bool, optional): Show progress bar in Jupyter
+                notebooks. Defaults to False.
+            **api_kwargs: Additional OpenAI API parameters.
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Parsed structured data indexed like the original
+                DataFrame.
+        Example:
+            ```python
+            df = pd.DataFrame({
+                'log': [
+                    '2024-01-01 10:00 ERROR Database connection failed',
+                    '2024-01-01 10:05 INFO Service started successfully'
+                ]
+            })
+            # With automatic schema inference
+            parsed = df.ai.parse("Extract timestamp, level, and message")
+            # Returns Series with inferred structure like:
+            # {timestamp: str, level: str, message: str}
+            ```
         """
         return self.parse_with_cache(
             instructions=instructions,
             cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
             max_examples=max_examples,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
-    def infer_schema(self, purpose: str, max_examples: int = 100) -> InferredSchema:
+    def infer_schema(self, instructions: str, max_examples: int = 100, **api_kwargs) -> InferredSchema:
         """Infer a structured data schema from DataFrame rows using AI.
         This method analyzes a sample of DataFrame rows to automatically infer
@@ -927,7 +968,7 @@ class OpenAIVecDataFrameAccessor:
         field types, and potential categorical values.
         Args:
-            purpose (str): Plain language description of how the extracted
+            instructions (str): Plain language description of how the extracted
                 structured data will be used (e.g., "Extract operational metrics
                 for dashboard", "Parse customer attributes for segmentation").
                 This guides field relevance and helps exclude irrelevant information.
@@ -937,7 +978,7 @@ class OpenAIVecDataFrameAccessor:
         Returns:
             InferredSchema: An object containing:
-                - purpose: Normalized statement of the extraction objective
+                - instructions: Normalized statement of the extraction objective
                 - fields: List of field specifications with names, types, and descriptions
                 - inference_prompt: Reusable prompt for future extractions
                 - model: Dynamically generated Pydantic model for parsing
@@ -956,7 +997,7 @@ class OpenAIVecDataFrameAccessor:
             # Infer schema for logistics tracking
             schema = df.ai.infer_schema(
-                purpose="Extract shipping status and location data for logistics tracking"
+                instructions="Extract shipping status and location data for logistics tracking"
             )
             # Apply the schema to extract structured data
@@ -964,14 +1005,15 @@ class OpenAIVecDataFrameAccessor:
             ```
         Note:
-            The DataFrame rows are internally converted to JSON format before
-            analysis. The inferred schema is flat (no nested structures) and
-            uses only primitive types to ensure compatibility with pandas and
-            Spark operations.
+            Each row is converted to JSON before analysis. The inference
+            process automatically detects hierarchical relationships and
+            creates appropriate nested structures when present. The generated
+            Pydantic model ensures type safety and validation.
         """
         return _df_rows_to_json_series(self._obj).ai.infer_schema(
-            purpose=purpose,
+            instructions=instructions,
             max_examples=max_examples,
+            **api_kwargs,
         )
     def extract(self, column: str) -> pd.DataFrame:
@@ -1012,7 +1054,6 @@ class OpenAIVecDataFrameAccessor:
         max_examples: int = 500,
         batch_size: int | None = None,
         show_progress: bool = False,
-        **api_kwargs,
     ) -> pd.DataFrame:
         """Fill missing values in a DataFrame column using AI-powered inference.
@@ -1032,10 +1073,6 @@ class OpenAIVecDataFrameAccessor:
                 optimization based on execution time). Set to a positive integer for fixed batch size.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) are forwarded verbatim to the underlying task execution.
         Returns:
             pandas.DataFrame: A new DataFrame with missing values filled in the target
                 column. The original DataFrame is not modified.
@@ -1067,7 +1104,7 @@ class OpenAIVecDataFrameAccessor:
             return self._obj
         filled_values: list[FillNaResponse] = missing_rows.ai.task(
-            task=task, batch_size=batch_size, show_progress=show_progress, **api_kwargs
+            task=task, batch_size=batch_size, show_progress=show_progress
         )
         # get deep copy of the DataFrame to avoid modifying the original
@@ -1127,8 +1164,6 @@ class AsyncOpenAIVecSeriesAccessor:
         instructions: str,
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
         response_format: type[ResponseFormat] = str,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
         """Call an LLM once for every Series element using a provided cache (asynchronously).
@@ -1155,13 +1190,11 @@ class AsyncOpenAIVecSeriesAccessor:
                 Set cache.batch_size=None to enable automatic batch size optimization.
             response_format (type[ResponseFormat], optional): Pydantic model or built‑in
                 type the assistant should return. Defaults to ``str``.
-            temperature (float | None, optional): Sampling temperature. ``None`` omits the
-                parameter (recommended for reasoning models). Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             **api_kwargs: Additional keyword arguments forwarded verbatim to
-                ``AsyncOpenAI.responses.parse`` (e.g. ``max_output_tokens``, penalties,
-                future parameters). Core batching keys (model, instructions, input,
-                text_format) are protected and silently ignored if provided.
+                ``AsyncOpenAI.responses.parse`` (e.g. ``temperature``, ``top_p``,
+                ``max_output_tokens``, penalties, future parameters). Core batching keys
+                (model, instructions, input, text_format) are protected and silently
+                ignored if provided.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
@@ -1175,14 +1208,10 @@ class AsyncOpenAIVecSeriesAccessor:
             system_message=instructions,
             response_format=response_format,
             cache=cache,
-            temperature=temperature,
-            top_p=top_p,
+            api_kwargs=api_kwargs,
         )
-        # Forward any extra kwargs to the underlying Responses API, excluding proxy-specific ones.
-        proxy_params = {"show_progress", "batch_size", "max_concurrency"}
-        filtered_kwargs = {k: v for k, v in api_kwargs.items() if k not in proxy_params}
-        results = await client.parse(self._obj.tolist(), **filtered_kwargs)
+        results = await client.parse(self._obj.tolist())
         return pd.Series(results, index=self._obj.index, name=self._obj.name)
     async def responses(
@@ -1190,8 +1219,6 @@ class AsyncOpenAIVecSeriesAccessor:
         instructions: str,
         response_format: type[ResponseFormat] = str,
         batch_size: int | None = None,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         max_concurrency: int = 8,
         show_progress: bool = False,
         **api_kwargs,
@@ -1221,11 +1248,14 @@ class AsyncOpenAIVecSeriesAccessor:
             batch_size (int | None, optional): Number of prompts grouped into a single
                 request. Defaults to ``None`` (automatic batch size optimization
                 based on execution time). Set to a positive integer for fixed batch size.
-            temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
+            **api_kwargs: Additional keyword arguments forwarded verbatim to
+                ``AsyncOpenAI.responses.parse`` (e.g. ``temperature``, ``top_p``,
+                ``max_output_tokens``, penalties, future parameters). Core batching keys
+                (model, instructions, input, text_format) are protected and silently
+                ignored if provided.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
@@ -1239,14 +1269,13 @@ class AsyncOpenAIVecSeriesAccessor:
                 batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
             ),
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
     async def embeddings_with_cache(
         self,
         cache: AsyncBatchingMapProxy[str, np.ndarray],
+        **api_kwargs,
     ) -> pd.Series:
         """Compute OpenAI embeddings for every Series element using a provided cache (asynchronously).
@@ -1274,6 +1303,7 @@ class AsyncOpenAIVecSeriesAccessor:
             cache (AsyncBatchingMapProxy[str, np.ndarray]): Pre-configured cache
                 instance for managing API call batching and deduplication.
                 Set cache.batch_size=None to enable automatic batch size optimization.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -1286,6 +1316,7 @@ class AsyncOpenAIVecSeriesAccessor:
             client=CONTAINER.resolve(AsyncOpenAI),
             model_name=CONTAINER.resolve(EmbeddingsModelName).value,
             cache=cache,
+            api_kwargs=api_kwargs,
         )
         # Await the async operation
@@ -1298,7 +1329,7 @@ class AsyncOpenAIVecSeriesAccessor:
         )
     async def embeddings(
-        self, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = False
+        self, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = False, **api_kwargs
     ) -> pd.Series:
         """Compute OpenAI embeddings for every Series element (asynchronously).
@@ -1324,6 +1355,7 @@ class AsyncOpenAIVecSeriesAccessor:
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
+            **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -1336,13 +1368,13 @@ class AsyncOpenAIVecSeriesAccessor:
             cache=AsyncBatchingMapProxy(
                 batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
             ),
+            **api_kwargs,
         )
     async def task_with_cache(
         self,
         task: PreparedTask[ResponseFormat],
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on every Series element using a provided cache (asynchronously).
@@ -1393,11 +1425,9 @@ class AsyncOpenAIVecSeriesAccessor:
             system_message=task.instructions,
             response_format=task.response_format,
             cache=cache,
-            temperature=task.temperature,
-            top_p=task.top_p,
+            api_kwargs=task.api_kwargs,
         )
-        # Await the async operation
-        results = await client.parse(self._obj.tolist(), **api_kwargs)
+        results = await client.parse(self._obj.tolist())
         return pd.Series(results, index=self._obj.index, name=self._obj.name)
@@ -1407,7 +1437,6 @@ class AsyncOpenAIVecSeriesAccessor:
         batch_size: int | None = None,
         max_concurrency: int = 8,
         show_progress: bool = False,
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on every Series element (asynchronously).
@@ -1442,9 +1471,8 @@ class AsyncOpenAIVecSeriesAccessor:
                 requests. Defaults to 8.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
+        Note:
+            The task's stored API parameters are used. Core batching / routing
             keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
             library and cannot be overridden.
@@ -1460,42 +1488,39 @@ class AsyncOpenAIVecSeriesAccessor:
             cache=AsyncBatchingMapProxy(
                 batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
             ),
-            **api_kwargs,
         )
     async def parse_with_cache(
         self,
         instructions: str,
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
-        """Parse Series values using an LLM with a provided cache (asynchronously).
+        """Parse Series values into structured data using an LLM with a provided cache (asynchronously).
-        This method allows you to parse the Series content into structured data
-        using an LLM, optionally inferring a schema based on the provided purpose.
+        This async method provides external cache control while parsing Series
+        content into structured data. Automatic schema inference is performed
+        when no response format is specified.
         Args:
-            instructions (str): System prompt for the LLM.
-            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Explicit cache instance for
-                batching and deduplication control.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
-                Defaults to 100.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. `frequency_penalty`, `presence_penalty`,
-            `seed`, etc.) are forwarded verbatim to the underlying client.
+            instructions (str): Plain language description of what to extract
+                (e.g., "Extract dates, amounts, and descriptions from receipts").
+                Guides both extraction and schema inference.
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured
+                async cache for managing concurrent API calls and deduplication.
+                Set cache.batch_size=None for automatic optimization.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure for parsed data. Can be a Pydantic model, built-in
+                type, or None for automatic inference. Defaults to None.
+            max_examples (int, optional): Maximum values to analyze for schema
+                inference (when response_format is None). Defaults to 100.
+            **api_kwargs: Additional OpenAI API parameters.
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Series containing parsed structured data aligned
+                with the original index.
         Note:
             This is an asynchronous method and must be awaited.
@@ -1503,51 +1528,59 @@ class AsyncOpenAIVecSeriesAccessor:
         schema: InferredSchema | None = None
         if response_format is None:
             # Use synchronous schema inference
-            schema = self._obj.ai.infer_schema(purpose=instructions, max_examples=max_examples)
+            schema = self._obj.ai.infer_schema(instructions=instructions, max_examples=max_examples)
         return await self.responses_with_cache(
             instructions=schema.inference_prompt if schema else instructions,
             cache=cache,
             response_format=response_format or schema.model,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
     async def parse(
         self,
         instructions: str,
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
         batch_size: int | None = None,
         max_concurrency: int = 8,
         show_progress: bool = False,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
-        """Parse Series values using an LLM with optional schema inference (asynchronously).
+        """Parse Series values into structured data using an LLM (asynchronously).
-        This method allows you to parse the Series content into structured data
-        using an LLM, optionally inferring a schema based on the provided purpose.
+        Async version of the parse method, extracting structured information
+        from unstructured text with automatic schema inference when needed.
         Args:
-            instructions (str): System prompt for the LLM.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
+            instructions (str): Plain language extraction goals (e.g., "Extract
+                product names, prices, and categories from descriptions").
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure. None triggers automatic schema inference. Defaults to None.
+            max_examples (int, optional): Maximum values for schema inference.
                 Defaults to 100.
-            batch_size (int | None): Number of requests to process in parallel.
-                Defaults to None (automatic optimization).
-            max_concurrency (int): Maximum number of concurrent requests. Defaults to 8.
-            show_progress (bool): Whether to display a progress bar during processing.
-                Defaults to False.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
+            batch_size (int | None, optional): Requests per batch. None for
+                automatic optimization. Defaults to None.
+            max_concurrency (int, optional): Maximum concurrent API requests.
+                Defaults to 8.
+            show_progress (bool, optional): Show progress bar. Defaults to False.
+            **api_kwargs: Additional OpenAI API parameters.
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Parsed structured data indexed like the original Series.
+        Example:
+            ```python
+            emails = pd.Series([
+                "Meeting tomorrow at 3pm with John about Q4 planning",
+                "Lunch with Sarah on Friday to discuss new project"
+            ])
+            # Async extraction with schema inference
+            parsed = await emails.aio.parse(
+                "Extract meeting details including time, person, and topic"
+            )
+            ```
         Note:
             This is an asynchronous method and must be awaited.
@@ -1559,8 +1592,6 @@ class AsyncOpenAIVecSeriesAccessor:
             ),
             response_format=response_format,
             max_examples=max_examples,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
@@ -1577,8 +1608,6 @@ class AsyncOpenAIVecDataFrameAccessor:
         instructions: str,
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
         response_format: type[ResponseFormat] = str,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
         """Generate a response for each row after serializing it to JSON using a provided cache (asynchronously).
@@ -1614,8 +1643,7 @@ class AsyncOpenAIVecDataFrameAccessor:
                 Set cache.batch_size=None to enable automatic batch size optimization.
             response_format (type[ResponseFormat], optional): Desired Python type of the
                 responses. Defaults to ``str``.
-            temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
         Returns:
             pandas.Series: Responses aligned with the DataFrame's original index.
@@ -1628,8 +1656,6 @@ class AsyncOpenAIVecDataFrameAccessor:
             instructions=instructions,
             cache=cache,
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
@@ -1638,8 +1664,6 @@ class AsyncOpenAIVecDataFrameAccessor:
         instructions: str,
         response_format: type[ResponseFormat] = str,
         batch_size: int | None = None,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         max_concurrency: int = 8,
         show_progress: bool = False,
         **api_kwargs,
@@ -1673,8 +1697,7 @@ class AsyncOpenAIVecDataFrameAccessor:
             batch_size (int | None, optional): Number of requests sent in one batch.
                 Defaults to ``None`` (automatic batch size optimization
                 based on execution time). Set to a positive integer for fixed batch size.
-            temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
@@ -1691,8 +1714,6 @@ class AsyncOpenAIVecDataFrameAccessor:
                 batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
             ),
             response_format=response_format,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
@@ -1700,7 +1721,6 @@ class AsyncOpenAIVecDataFrameAccessor:
         self,
         task: PreparedTask[ResponseFormat],
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on each DataFrame row using a provided cache (asynchronously).
@@ -1710,8 +1730,8 @@ class AsyncOpenAIVecDataFrameAccessor:
             task (PreparedTask): Prepared task (instructions + response_format + sampling params).
             cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre‑configured async cache instance.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters forwarded verbatim. Core routing keys are protected.
+        Note:
+            The task's stored API parameters are used. Core routing keys are managed internally.
         Returns:
             pandas.Series: Task results aligned with the DataFrame's original index.
@@ -1722,7 +1742,6 @@ class AsyncOpenAIVecDataFrameAccessor:
         return await _df_rows_to_json_series(self._obj).aio.task_with_cache(
             task=task,
             cache=cache,
-            **api_kwargs,
         )
     async def task(
@@ -1731,7 +1750,6 @@ class AsyncOpenAIVecDataFrameAccessor:
         batch_size: int | None = None,
         max_concurrency: int = 8,
         show_progress: bool = False,
-        **api_kwargs,
     ) -> pd.Series:
         """Execute a prepared task on each DataFrame row after serializing it to JSON (asynchronously).
@@ -1770,9 +1788,8 @@ class AsyncOpenAIVecDataFrameAccessor:
                 requests. Defaults to 8.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
+        Note:
+            The task's stored API parameters are used. Core batching / routing
             keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
             library and cannot be overridden.
@@ -1789,43 +1806,34 @@ class AsyncOpenAIVecDataFrameAccessor:
             batch_size=batch_size,
             max_concurrency=max_concurrency,
             show_progress=show_progress,
-            **api_kwargs,
         )
     async def parse_with_cache(
         self,
         instructions: str,
         cache: AsyncBatchingMapProxy[str, ResponseFormat],
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
-        """Parse DataFrame rows using an LLM with a provided cache (asynchronously).
+        """Parse DataFrame rows into structured data using an LLM with cache (asynchronously).
-        This method allows you to parse each DataFrame row (serialized as JSON)
-        into structured data using an LLM, optionally inferring a schema based
-        on the provided purpose.
+        Async method for parsing DataFrame rows (as JSON) with external cache
+        control, enabling deduplication across operations and concurrent processing.
         Args:
-            instructions (str): System prompt for the LLM.
-            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Explicit cache instance for
-                batching and deduplication control.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
+            instructions (str): Plain language extraction goals (e.g., "Extract
+                invoice details including items, quantities, and totals").
+            cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured
+                async cache for concurrent API call management.
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure. None triggers automatic schema inference. Defaults to None.
+            max_examples (int, optional): Maximum rows for schema inference.
                 Defaults to 100.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. `frequency_penalty`, `presence_penalty`,
-            `seed`, etc.) are forwarded verbatim to the underlying client.
+            **api_kwargs: Additional OpenAI API parameters.
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Parsed structured data indexed like the original DataFrame.
         Note:
             This is an asynchronous method and must be awaited.
@@ -1835,46 +1843,55 @@ class AsyncOpenAIVecDataFrameAccessor:
             cache=cache,
             response_format=response_format,
             max_examples=max_examples,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
     async def parse(
         self,
         instructions: str,
-        response_format: ResponseFormat = None,
+        response_format: type[ResponseFormat] | None = None,
         max_examples: int = 100,
         batch_size: int | None = None,
         max_concurrency: int = 8,
         show_progress: bool = False,
-        temperature: float | None = 0.0,
-        top_p: float = 1.0,
         **api_kwargs,
     ) -> pd.Series:
-        """Parse DataFrame rows using an LLM with optional schema inference (asynchronously).
+        """Parse DataFrame rows into structured data using an LLM (asynchronously).
-        This method allows you to parse each DataFrame row (serialized as JSON)
-        into structured data using an LLM, optionally inferring a schema based
-        on the provided purpose.
+        Async version for extracting structured information from DataFrame rows,
+        with automatic schema inference when no format is specified.
         Args:
-            instructions (str): System prompt for the LLM.
-            response_format (type[BaseModel] | None): Pydantic model or built-in type
-                for structured output. If None, schema is inferred.
-            max_examples (int): Maximum number of examples to use for schema inference.
+            instructions (str): Plain language extraction goals (e.g., "Extract
+                customer details, order items, and payment information").
+            response_format (type[ResponseFormat] | None, optional): Target
+                structure. None triggers automatic inference. Defaults to None.
+            max_examples (int, optional): Maximum rows for schema inference.
                 Defaults to 100.
-            batch_size (int | None): Number of requests to process in parallel.
-                Defaults to None (automatic optimization).
-            max_concurrency (int): Maximum number of concurrent requests. Defaults to 8.
-            show_progress (bool): Whether to display a progress bar during processing.
-                Defaults to False.
-            temperature (float | None): Sampling temperature. Defaults to 0.0.
-            top_p (float): Nucleus sampling parameter. Defaults to 1.0.
+            batch_size (int | None, optional): Rows per batch. None for
+                automatic optimization. Defaults to None.
+            max_concurrency (int, optional): Maximum concurrent requests.
+                Defaults to 8.
+            show_progress (bool, optional): Show progress bar. Defaults to False.
+            **api_kwargs: Additional OpenAI API parameters.
         Returns:
-            pandas.Series: Series with parsed structured data as instances of
-                `response_format` or inferred schema model.
+            pandas.Series: Parsed structured data indexed like the original DataFrame.
+        Example:
+            ```python
+            df = pd.DataFrame({
+                'raw_data': [
+                    'Customer: John Doe, Order: 2 laptops @ $1200 each',
+                    'Customer: Jane Smith, Order: 5 phones @ $800 each'
+                ]
+            })
+            # Async parsing with automatic schema inference
+            parsed = await df.aio.parse(
+                "Extract customer name, product, quantity, and unit price"
+            )
+            ```
         Note:
             This is an asynchronous method and must be awaited.
@@ -1886,8 +1903,6 @@ class AsyncOpenAIVecDataFrameAccessor:
             ),
             response_format=response_format,
             max_examples=max_examples,
-            temperature=temperature,
-            top_p=top_p,
             **api_kwargs,
         )
@@ -1991,7 +2006,6 @@ class AsyncOpenAIVecDataFrameAccessor:
         batch_size: int | None = None,
         max_concurrency: int = 8,
         show_progress: bool = False,
-        **api_kwargs,
     ) -> pd.DataFrame:
         """Fill missing values in a DataFrame column using AI-powered inference (asynchronously).
@@ -2013,10 +2027,6 @@ class AsyncOpenAIVecDataFrameAccessor:
                 requests. Defaults to 8.
             show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
-        Additional Keyword Args:
-            Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-            ``seed``, etc.) are forwarded verbatim to the underlying task execution.
         Returns:
             pandas.DataFrame: A new DataFrame with missing values filled in the target
                 column. The original DataFrame is not modified.
@@ -2054,7 +2064,10 @@ class AsyncOpenAIVecDataFrameAccessor:
             return self._obj
         filled_values: list[FillNaResponse] = await missing_rows.aio.task(
-            task=task, batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress, **api_kwargs
+            task=task,
+            batch_size=batch_size,
+            max_concurrency=max_concurrency,
+            show_progress=show_progress,
         )
         # get deep copy of the DataFrame to avoid modifying the original

openaivec 0.14.10__py3-none-any.whl → 0.14.13__py3-none-any.whl

openaivec 0.14.10py3-none-any.whl → 0.14.13py3-none-any.whl