PyPI - openaivec - Versions diffs - 0.14.12__py3-none-any.whl → 0.14.14__py3-none-any.whl - Mend

openaivec 0.14.12py3-none-any.whl → 0.14.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

openaivec/_embeddings.py +17 -4
openaivec/_model.py +7 -12
openaivec/_prompt.py +3 -6
openaivec/_responses.py +39 -117
openaivec/_schema.py +27 -23
openaivec/pandas_ext.py +355 -343
openaivec/spark.py +98 -56
openaivec/task/__init__.py +1 -1
openaivec/task/customer_support/customer_sentiment.py +4 -9
openaivec/task/customer_support/inquiry_classification.py +5 -8
openaivec/task/customer_support/inquiry_summary.py +5 -6
openaivec/task/customer_support/intent_analysis.py +5 -7
openaivec/task/customer_support/response_suggestion.py +5 -8
openaivec/task/customer_support/urgency_analysis.py +5 -8
openaivec/task/nlp/dependency_parsing.py +1 -2
openaivec/task/nlp/keyword_extraction.py +1 -2
openaivec/task/nlp/morphological_analysis.py +1 -2
openaivec/task/nlp/named_entity_recognition.py +1 -2
openaivec/task/nlp/sentiment_analysis.py +1 -2
openaivec/task/nlp/translation.py +1 -1
openaivec/task/table/fillna.py +8 -3
{openaivec-0.14.12.dist-info → openaivec-0.14.14.dist-info}/METADATA +1 -1
openaivec-0.14.14.dist-info/RECORD +37 -0
openaivec-0.14.12.dist-info/RECORD +0 -37
{openaivec-0.14.12.dist-info → openaivec-0.14.14.dist-info}/WHEEL +0 -0
{openaivec-0.14.12.dist-info → openaivec-0.14.14.dist-info}/licenses/LICENSE +0 -0

openaivec/spark.py CHANGED Viewed

@@ -134,6 +134,7 @@ import numpy as np
 import pandas as pd
 import tiktoken
 from pydantic import BaseModel
+from pyspark import SparkContext
 from pyspark.sql import SparkSession
 from pyspark.sql.pandas.functions import pandas_udf
 from pyspark.sql.types import ArrayType, BooleanType, FloatType, IntegerType, StringType, StructField, StructType
@@ -180,7 +181,10 @@ def setup(
             If provided, registers `EmbeddingsModelName` in the DI container.
     """
-    sc = spark.sparkContext
+    CONTAINER.register(SparkSession, lambda: spark)
+    CONTAINER.register(SparkContext, lambda: CONTAINER.resolve(SparkSession).sparkContext)
+    sc = CONTAINER.resolve(SparkContext)
     sc.environment["OPENAI_API_KEY"] = api_key
     os.environ["OPENAI_API_KEY"] = api_key
@@ -189,8 +193,6 @@ def setup(
         CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(responses_model_name))
     if embeddings_model_name:
-        from openaivec._model import EmbeddingsModelName
         CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(embeddings_model_name))
     CONTAINER.clear_singletons()
@@ -219,7 +221,10 @@ def setup_azure(
             If provided, registers `EmbeddingsModelName` in the DI container.
     """
-    sc = spark.sparkContext
+    CONTAINER.register(SparkSession, lambda: spark)
+    CONTAINER.register(SparkContext, lambda: CONTAINER.resolve(SparkSession).sparkContext)
+    sc = CONTAINER.resolve(SparkContext)
     sc.environment["AZURE_OPENAI_API_KEY"] = api_key
     sc.environment["AZURE_OPENAI_BASE_URL"] = base_url
     sc.environment["AZURE_OPENAI_API_VERSION"] = api_version
@@ -237,6 +242,50 @@ def setup_azure(
     CONTAINER.clear_singletons()
+def set_responses_model(model_name: str):
+    """Set the default model name for response generation in the DI container.
+    Args:
+        model_name (str): The model name to set as default for responses.
+    """
+    CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(model_name))
+    CONTAINER.clear_singletons()
+def get_responses_model() -> str | None:
+    """Get the default model name for response generation from the DI container.
+    Returns:
+        str | None: The default model name for responses, or None if not set.
+    """
+    try:
+        return CONTAINER.resolve(ResponsesModelName).value
+    except Exception:
+        return None
+def set_embeddings_model(model_name: str):
+    """Set the default model name for embeddings in the DI container.
+    Args:
+        model_name (str): The model name to set as default for embeddings.
+    """
+    CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(model_name))
+    CONTAINER.clear_singletons()
+def get_embeddings_model() -> str | None:
+    """Get the default model name for embeddings from the DI container.
+    Returns:
+        str | None: The default model name for embeddings, or None if not set.
+    """
+    try:
+        return CONTAINER.resolve(EmbeddingsModelName).value
+    except Exception:
+        return None
 def _python_type_to_spark(python_type):
     origin = get_origin(python_type)
@@ -315,10 +364,8 @@ def _safe_dump(x: BaseModel | None) -> dict:
 def responses_udf(
     instructions: str,
     response_format: type[ResponseFormat] = str,
-    model_name: str = CONTAINER.resolve(ResponsesModelName).value,
+    model_name: str | None = None,
     batch_size: int | None = None,
-    temperature: float | None = 0.0,
-    top_p: float = 1.0,
     max_concurrency: int = 8,
     **api_kwargs,
 ) -> UserDefinedFunction:
@@ -346,24 +393,22 @@ def responses_udf(
         instructions (str): The system prompt or instructions for the model.
         response_format (type[ResponseFormat]): The desired output format. Either `str` for plain text
             or a Pydantic `BaseModel` for structured JSON output. Defaults to `str`.
-        model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
-            For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
+        model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
+            For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
+            via ResponsesModelName if not provided.
         batch_size (int | None): Number of rows per async batch request within each partition.
             Larger values reduce API call overhead but increase memory usage.
             Defaults to None (automatic batch size optimization that dynamically
             adjusts based on execution time, targeting 30-60 seconds per batch).
             Set to a positive integer (e.g., 32-128) for fixed batch size.
-        temperature (float): Sampling temperature (0.0 to 2.0). Defaults to 0.0.
-        top_p (float): Nucleus sampling parameter. Defaults to 1.0.
         max_concurrency (int): Maximum number of concurrent API requests **PER EXECUTOR**.
             Total cluster concurrency = max_concurrency × number_of_executors.
             Higher values increase throughput but may hit OpenAI rate limits.
             Recommended: 4-12 per executor. Defaults to 8.
-    Additional Keyword Args:
-        Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-        ``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
-        These parameters are applied to all API requests made by the UDF.
+        **api_kwargs: Additional OpenAI API parameters (e.g. ``temperature``, ``top_p``,
+            ``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
+            forwarded verbatim to the underlying API calls. These parameters are applied to
+            all API requests made by the UDF.
     Returns:
         UserDefinedFunction: A Spark pandas UDF configured to generate responses asynchronously.
@@ -380,13 +425,15 @@ def responses_udf(
         - Consider your OpenAI tier limits: total_requests = max_concurrency × executors
         - Use Spark UI to optimize partition sizes relative to batch_size
     """
+    _model_name = model_name or CONTAINER.resolve(ResponsesModelName).value
     if issubclass(response_format, BaseModel):
         spark_schema = _pydantic_to_spark_schema(response_format)
         json_schema_string = serialize_base_model(response_format)
         @pandas_udf(returnType=spark_schema)  # type: ignore[call-overload]
         def structure_udf(col: Iterator[pd.Series]) -> Iterator[pd.DataFrame]:
-            pandas_ext.responses_model(model_name)
+            pandas_ext.responses_model(_model_name)
             response_format = deserialize_base_model(json_schema_string)
             cache = AsyncBatchingMapProxy[str, response_format](
                 batch_size=batch_size,
@@ -399,8 +446,6 @@ def responses_udf(
                         part.aio.responses_with_cache(
                             instructions=instructions,
                             response_format=response_format,
-                            temperature=temperature,
-                            top_p=top_p,
                             cache=cache,
                             **api_kwargs,
                         )
@@ -415,7 +460,7 @@ def responses_udf(
         @pandas_udf(returnType=StringType())  # type: ignore[call-overload]
         def string_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
-            pandas_ext.responses_model(model_name)
+            pandas_ext.responses_model(_model_name)
             cache = AsyncBatchingMapProxy[str, str](
                 batch_size=batch_size,
                 max_concurrency=max_concurrency,
@@ -427,8 +472,6 @@ def responses_udf(
                         part.aio.responses_with_cache(
                             instructions=instructions,
                             response_format=str,
-                            temperature=temperature,
-                            top_p=top_p,
                             cache=cache,
                             **api_kwargs,
                         )
@@ -445,7 +488,7 @@ def responses_udf(
 def task_udf(
     task: PreparedTask[ResponseFormat],
-    model_name: str = CONTAINER.resolve(ResponsesModelName).value,
+    model_name: str | None = None,
     batch_size: int | None = None,
     max_concurrency: int = 8,
     **api_kwargs,
@@ -460,9 +503,10 @@ def task_udf(
     Args:
         task (PreparedTask): A predefined task configuration containing instructions,
-            response format, temperature, and top_p settings.
-        model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
-            For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
+            response format, and API parameters.
+        model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
+            For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
+            via ResponsesModelName if not provided.
         batch_size (int | None): Number of rows per async batch request within each partition.
             Larger values reduce API call overhead but increase memory usage.
             Defaults to None (automatic batch size optimization that dynamically
@@ -474,10 +518,10 @@ def task_udf(
             Recommended: 4-12 per executor. Defaults to 8.
     Additional Keyword Args:
-        Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-        ``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
-        These parameters are applied to all API requests made by the UDF and override any
-        parameters set in the task configuration.
+        Arbitrary OpenAI Responses API parameters (e.g. ``temperature``, ``top_p``,
+        ``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
+        are forwarded verbatim to the underlying API calls. These parameters are applied to
+        all API requests made by the UDF and override any parameters set in the task configuration.
     Returns:
         UserDefinedFunction: A Spark pandas UDF configured to execute the specified task
@@ -498,15 +542,16 @@ def task_udf(
         **Automatic Caching**: Duplicate inputs within each partition are cached,
         reducing API calls and costs significantly on datasets with repeated content.
     """
+    # Merge task's api_kwargs with caller's api_kwargs (caller takes precedence)
+    merged_kwargs = {**task.api_kwargs, **api_kwargs}
     return responses_udf(
         instructions=task.instructions,
         response_format=task.response_format,
         model_name=model_name,
         batch_size=batch_size,
-        temperature=task.temperature,
-        top_p=task.top_p,
         max_concurrency=max_concurrency,
-        **api_kwargs,
+        **merged_kwargs,
     )
@@ -532,15 +577,13 @@ def infer_schema(
         InferredSchema: An object containing the inferred schema and response format.
     """
-    from pyspark.sql import SparkSession
-    spark = SparkSession.builder.getOrCreate()
+    spark = CONTAINER.resolve(SparkSession)
     examples: list[str] = (
         spark.table(example_table_name).rdd.map(lambda row: row[example_field_name]).takeSample(False, max_examples)
     )
     input = SchemaInferenceInput(
-        purpose=instructions,
+        instructions=instructions,
         examples=examples,
     )
     inferer = CONTAINER.resolve(SchemaInferer)
@@ -553,10 +596,8 @@ def parse_udf(
     example_table_name: str | None = None,
     example_field_name: str | None = None,
     max_examples: int = 100,
-    model_name: str = CONTAINER.resolve(ResponsesModelName).value,
+    model_name: str | None = None,
     batch_size: int | None = None,
-    temperature: float | None = 0.0,
-    top_p: float = 1.0,
     max_concurrency: int = 8,
     **api_kwargs,
 ) -> UserDefinedFunction:
@@ -579,24 +620,23 @@ def parse_udf(
             If provided, `example_table_name` must also be specified.
         max_examples (int): Maximum number of examples to retrieve for schema inference.
             Defaults to 100.
-        model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
-            For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
+        model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
+            For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
+            via ResponsesModelName if not provided.
         batch_size (int | None): Number of rows per async batch request within each partition.
             Larger values reduce API call overhead but increase memory usage.
             Defaults to None (automatic batch size optimization that dynamically
             adjusts based on execution time, targeting 30-60 seconds per batch).
             Set to a positive integer (e.g., 32-128) for fixed batch size
-        temperature (float | None): Sampling temperature (0.0 to 2.0). Defaults to 0.0.
-        top_p (float): Nucleus sampling parameter. Defaults to 1.0.
         max_concurrency (int): Maximum number of concurrent API requests **PER EXECUTOR**.
             Total cluster concurrency = max_concurrency × number_of_executors.
             Higher values increase throughput but may hit OpenAI rate limits.
             Recommended: 4-12 per executor. Defaults to 8.
-    Additional Keyword Args:
-        Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
-        ``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
-        These parameters are applied to all API requests made by the UDF and override any
-        parameters set in the response_format or example data.
+        **api_kwargs: Additional OpenAI API parameters (e.g. ``temperature``, ``top_p``,
+            ``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
+            forwarded verbatim to the underlying API calls. These parameters are applied to
+            all API requests made by the UDF and override any parameters set in the
+            response_format or example data.
     Returns:
         UserDefinedFunction: A Spark pandas UDF configured to parse responses asynchronously.
             Output schema is `StringType` for str response format or a struct derived from
@@ -623,17 +663,16 @@ def parse_udf(
         response_format=schema.model if schema else response_format,
         model_name=model_name,
         batch_size=batch_size,
-        temperature=temperature,
-        top_p=top_p,
         max_concurrency=max_concurrency,
         **api_kwargs,
     )
 def embeddings_udf(
-    model_name: str = CONTAINER.resolve(EmbeddingsModelName).value,
+    model_name: str | None = None,
     batch_size: int | None = None,
     max_concurrency: int = 8,
+    **api_kwargs,
 ) -> UserDefinedFunction:
     """Create an asynchronous Spark pandas UDF for generating embeddings.
@@ -656,9 +695,9 @@ def embeddings_udf(
             sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
     Args:
-        model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-embedding-deployment").
+        model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-embedding-deployment").
             For OpenAI, use the model name (e.g., "text-embedding-3-small").
-            Defaults to configured model in DI container.
+            Defaults to configured model in DI container via EmbeddingsModelName if not provided.
         batch_size (int | None): Number of rows per async batch request within each partition.
             Larger values reduce API call overhead but increase memory usage.
             Defaults to None (automatic batch size optimization that dynamically
@@ -669,6 +708,7 @@ def embeddings_udf(
             Total cluster concurrency = max_concurrency × number_of_executors.
             Higher values increase throughput but may hit OpenAI rate limits.
             Recommended: 4-12 per executor. Defaults to 8.
+        **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
     Returns:
         UserDefinedFunction: A Spark pandas UDF configured to generate embeddings asynchronously
@@ -685,9 +725,11 @@ def embeddings_udf(
         - Use larger batch_size for embeddings compared to response generation
     """
+    _model_name = model_name or CONTAINER.resolve(EmbeddingsModelName).value
     @pandas_udf(returnType=ArrayType(FloatType()))  # type: ignore[call-overload,misc]
     def _embeddings_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
-        pandas_ext.embeddings_model(model_name)
+        pandas_ext.embeddings_model(_model_name)
         cache = AsyncBatchingMapProxy[str, np.ndarray](
             batch_size=batch_size,
             max_concurrency=max_concurrency,
@@ -695,7 +737,7 @@ def embeddings_udf(
         try:
             for part in col:
-                embeddings: pd.Series = asyncio.run(part.aio.embeddings_with_cache(cache=cache))
+                embeddings: pd.Series = asyncio.run(part.aio.embeddings_with_cache(cache=cache, **api_kwargs))
                 yield embeddings.map(lambda x: x.tolist())
         finally:
             asyncio.run(cache.clear())

openaivec/task/__init__.py CHANGED Viewed

@@ -117,7 +117,7 @@ All tasks are built using the `PreparedTask` dataclass:
 @dataclass(frozen=True)
 class PreparedTask:
     instructions: str           # Detailed prompt for the LLM
-    response_format: Type[ResponseFormat]    # Pydantic model or str for structured/plain output
+    response_format: type[ResponseFormat]    # Pydantic model or str for structured/plain output
     temperature: float = 0.0    # Sampling temperature
     top_p: float = 1.0         # Nucleus sampling parameter
 ```

openaivec/task/customer_support/customer_sentiment.py CHANGED Viewed

@@ -95,15 +95,12 @@ class CustomerSentiment(BaseModel):
     )
-def customer_sentiment(
-    business_context: str = "general customer support", temperature: float = 0.0, top_p: float = 1.0
-) -> PreparedTask:
+def customer_sentiment(business_context: str = "general customer support", **api_kwargs) -> PreparedTask:
     """Create a configurable customer sentiment analysis task.
     Args:
         business_context (str): Business context for sentiment analysis.
-        temperature (float): Sampling temperature (0.0-1.0).
-        top_p (float): Nucleus sampling parameter (0.0-1.0).
+        **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
     Returns:
         PreparedTask configured for customer sentiment analysis.
@@ -169,10 +166,8 @@ values like "positive" for sentiment.
 Provide comprehensive sentiment analysis with business context and recommended response strategy."""
-    return PreparedTask(
-        instructions=instructions, response_format=CustomerSentiment, temperature=temperature, top_p=top_p
-    )
+    return PreparedTask(instructions=instructions, response_format=CustomerSentiment, api_kwargs=api_kwargs)
 # Backward compatibility - default configuration
-CUSTOMER_SENTIMENT = customer_sentiment()
+CUSTOMER_SENTIMENT = customer_sentiment(temperature=0.0, top_p=1.0)

openaivec/task/customer_support/inquiry_classification.py CHANGED Viewed

@@ -119,8 +119,7 @@ def inquiry_classification(
     priority_rules: Dict[str, str] | None = None,
     business_context: str = "general customer support",
     custom_keywords: Dict[str, list[str]] | None = None,
-    temperature: float = 0.0,
-    top_p: float = 1.0,
+    **api_kwargs,
 ) -> PreparedTask:
     """Create a configurable inquiry classification task.
@@ -133,8 +132,8 @@ def inquiry_classification(
             Default uses standard priority indicators.
         business_context (str): Description of the business context to help with classification.
         custom_keywords (dict[str, list[str]] | None): Dictionary mapping categories to relevant keywords.
-        temperature (float): Sampling temperature (0.0-1.0).
-        top_p (float): Nucleus sampling parameter (0.0-1.0).
+        **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
+            such as temperature, top_p, etc.
     Returns:
         PreparedTask configured for inquiry classification.
@@ -254,10 +253,8 @@ language where appropriate, but priority must use English values like "high".
 Provide accurate classification with detailed reasoning."""
-    return PreparedTask(
-        instructions=instructions, response_format=InquiryClassification, temperature=temperature, top_p=top_p
-    )
+    return PreparedTask(instructions=instructions, response_format=InquiryClassification, api_kwargs=api_kwargs)
 # Backward compatibility - default configuration
-INQUIRY_CLASSIFICATION = inquiry_classification()
+INQUIRY_CLASSIFICATION = inquiry_classification(temperature=0.0, top_p=1.0)

openaivec/task/customer_support/inquiry_summary.py CHANGED Viewed

@@ -87,16 +87,15 @@ class InquirySummary(BaseModel):
 def inquiry_summary(
     summary_length: str = "concise",
     business_context: str = "general customer support",
-    temperature: float = 0.0,
-    top_p: float = 1.0,
+    **api_kwargs,
 ) -> PreparedTask:
     """Create a configurable inquiry summary task.
     Args:
         summary_length (str): Length of summary (concise, detailed, bullet_points).
         business_context (str): Business context for summary.
-        temperature (float): Sampling temperature (0.0-1.0).
-        top_p (float): Nucleus sampling parameter (0.0-1.0).
+        **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
+            such as temperature, top_p, etc.
     Returns:
         PreparedTask configured for inquiry summarization.
@@ -163,8 +162,8 @@ input is in German, provide all summary content in German, but use English value
 Provide accurate, actionable summary that enables efficient support resolution."""
-    return PreparedTask(instructions=instructions, response_format=InquirySummary, temperature=temperature, top_p=top_p)
+    return PreparedTask(instructions=instructions, response_format=InquirySummary, api_kwargs=api_kwargs)
 # Backward compatibility - default configuration
-INQUIRY_SUMMARY = inquiry_summary()
+INQUIRY_SUMMARY = inquiry_summary(temperature=0.0, top_p=1.0)

openaivec/task/customer_support/intent_analysis.py CHANGED Viewed

@@ -100,15 +100,13 @@ class IntentAnalysis(BaseModel):
     )
-def intent_analysis(
-    business_context: str = "general customer support", temperature: float = 0.0, top_p: float = 1.0
-) -> PreparedTask:
+def intent_analysis(business_context: str = "general customer support", **api_kwargs) -> PreparedTask:
     """Create a configurable intent analysis task.
     Args:
         business_context (str): Business context for intent analysis.
-        temperature (float): Sampling temperature (0.0-1.0).
-        top_p (float): Nucleus sampling parameter (0.0-1.0).
+        **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
+            such as temperature, top_p, etc.
     Returns:
         PreparedTask configured for intent analysis.
@@ -171,8 +169,8 @@ next_steps, and reasoning in Japanese, but use English values like "get_help" fo
 Provide comprehensive intent analysis with actionable recommendations."""
-    return PreparedTask(instructions=instructions, response_format=IntentAnalysis, temperature=temperature, top_p=top_p)
+    return PreparedTask(instructions=instructions, response_format=IntentAnalysis, api_kwargs=api_kwargs)
 # Backward compatibility - default configuration
-INTENT_ANALYSIS = intent_analysis()
+INTENT_ANALYSIS = intent_analysis(temperature=0.0, top_p=1.0)

openaivec/task/customer_support/response_suggestion.py CHANGED Viewed

@@ -92,8 +92,7 @@ def response_suggestion(
     response_style: str = "professional",
     company_name: str = "our company",
     business_context: str = "general customer support",
-    temperature: float = 0.0,
-    top_p: float = 1.0,
+    **api_kwargs,
 ) -> PreparedTask:
     """Create a configurable response suggestion task.
@@ -101,8 +100,8 @@ def response_suggestion(
         response_style (str): Style of response (professional, friendly, empathetic, formal).
         company_name (str): Name of the company for personalization.
         business_context (str): Business context for responses.
-        temperature (float): Sampling temperature (0.0-1.0).
-        top_p (float): Nucleus sampling parameter (0.0-1.0).
+        **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
+            such as temperature, top_p, etc.
     Returns:
         PreparedTask configured for response suggestions.
@@ -190,10 +189,8 @@ but use English values like "empathetic" for tone.
 Generate helpful, professional response that moves toward resolution while maintaining
 positive customer relationship."""
-    return PreparedTask(
-        instructions=instructions, response_format=ResponseSuggestion, temperature=temperature, top_p=top_p
-    )
+    return PreparedTask(instructions=instructions, response_format=ResponseSuggestion, api_kwargs=api_kwargs)
 # Backward compatibility - default configuration
-RESPONSE_SUGGESTION = response_suggestion()
+RESPONSE_SUGGESTION = response_suggestion(temperature=0.0, top_p=1.0)

openaivec/task/customer_support/urgency_analysis.py CHANGED Viewed

@@ -135,8 +135,7 @@ def urgency_analysis(
     business_context: str = "general customer support",
     business_hours: str = "24/7 support",
     sla_rules: Dict[str, str] | None = None,
-    temperature: float = 0.0,
-    top_p: float = 1.0,
+    **api_kwargs,
 ) -> PreparedTask:
     """Create a configurable urgency analysis task.
@@ -149,8 +148,8 @@ def urgency_analysis(
         business_context (str): Description of the business context.
         business_hours (str): Description of business hours for response time calculation.
         sla_rules (dict[str, str] | None): Dictionary mapping customer tiers to SLA requirements.
-        temperature (float): Sampling temperature (0.0-1.0).
-        top_p (float): Nucleus sampling parameter (0.0-1.0).
+        **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
+            such as temperature, top_p, etc.
     Returns:
         PreparedTask configured for urgency analysis.
@@ -287,10 +286,8 @@ urgency_level.
 Provide detailed analysis with clear reasoning for urgency level and response time recommendations."""
-    return PreparedTask(
-        instructions=instructions, response_format=UrgencyAnalysis, temperature=temperature, top_p=top_p
-    )
+    return PreparedTask(instructions=instructions, response_format=UrgencyAnalysis, api_kwargs=api_kwargs)
 # Backward compatibility - default configuration
-URGENCY_ANALYSIS = urgency_analysis()
+URGENCY_ANALYSIS = urgency_analysis(temperature=0.0, top_p=1.0)

openaivec/task/nlp/dependency_parsing.py CHANGED Viewed

@@ -75,6 +75,5 @@ DEPENDENCY_PARSING = PreparedTask(
     "relations between words, determine the root word, and provide a tree representation of the "
     "syntactic structure.",
     response_format=DependencyParsing,
-    temperature=0.0,
-    top_p=1.0,
+    api_kwargs={"temperature": 0.0, "top_p": 1.0},
 )

openaivec/task/nlp/keyword_extraction.py CHANGED Viewed

@@ -75,6 +75,5 @@ KEYWORD_EXTRACTION = PreparedTask(
     instructions="Extract important keywords and phrases from the following text. Rank them "
     "by importance, provide frequency counts, identify main topics, and generate a brief summary.",
     response_format=KeywordExtraction,
-    temperature=0.0,
-    top_p=1.0,
+    api_kwargs={"temperature": 0.0, "top_p": 1.0},
 )

openaivec/task/nlp/morphological_analysis.py CHANGED Viewed

@@ -70,6 +70,5 @@ MORPHOLOGICAL_ANALYSIS = PreparedTask(
     "identify part-of-speech tags, provide lemmatized forms, and extract morphological features "
     "for each token.",
     response_format=MorphologicalAnalysis,
-    temperature=0.0,
-    top_p=1.0,
+    api_kwargs={"temperature": 0.0, "top_p": 1.0},
 )

openaivec/task/nlp/named_entity_recognition.py CHANGED Viewed

@@ -78,6 +78,5 @@ NAMED_ENTITY_RECOGNITION = PreparedTask(
     "organizations, locations, dates, money, percentages, and other miscellaneous entities "
     "with their positions and confidence scores.",
     response_format=NamedEntityRecognition,
-    temperature=0.0,
-    top_p=1.0,
+    api_kwargs={"temperature": 0.0, "top_p": 1.0},
 )

openaivec/task/nlp/sentiment_analysis.py CHANGED Viewed

@@ -78,6 +78,5 @@ SENTIMENT_ANALYSIS = PreparedTask(
     "English values specified (positive/negative/neutral for sentiment, and "
     "joy/sadness/anger/fear/surprise/disgust for emotions).",
     response_format=SentimentAnalysis,
-    temperature=0.0,
-    top_p=1.0,
+    api_kwargs={"temperature": 0.0, "top_p": 1.0},
 )

openaivec/task/nlp/translation.py CHANGED Viewed

@@ -157,5 +157,5 @@ class TranslatedString(BaseModel):
 instructions = "Translate the following text into multiple languages. "
 MULTILINGUAL_TRANSLATION = PreparedTask(
-    instructions=instructions, response_format=TranslatedString, temperature=0.0, top_p=1.0
+    instructions=instructions, response_format=TranslatedString, api_kwargs={"temperature": 0.0, "top_p": 1.0}
 )

openaivec/task/table/fillna.py CHANGED Viewed

@@ -125,7 +125,7 @@ class FillNaResponse(BaseModel):
     )
-def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -> PreparedTask:
+def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500, **api_kwargs) -> PreparedTask:
     """Create a prepared task for filling missing values in a DataFrame column.
     Analyzes the provided DataFrame to understand data patterns and creates
@@ -141,12 +141,14 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
         max_examples (int): Maximum number of example rows to use for few-shot
             learning. Defaults to 500. Higher values provide more context
             but increase token usage and processing time.
+        **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
+            such as temperature, top_p, etc.
     Returns:
         PreparedTask configured for missing value imputation with:
         - Instructions based on DataFrame patterns
         - FillNaResponse format for structured output
-        - Temperature=0.0 and top_p=1.0 for deterministic results
+        - Default deterministic settings (temperature=0.0, top_p=1.0)
     Raises:
         ValueError: If target_column_name doesn't exist in DataFrame,
@@ -180,4 +182,7 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
     if df[target_column_name].notna().sum() == 0:
         raise ValueError(f"Column '{target_column_name}' contains no non-null values for training examples.")
     instructions = get_instructions(df, target_column_name, max_examples)
-    return PreparedTask(instructions=instructions, response_format=FillNaResponse, temperature=0.0, top_p=1.0)
+    # Set default values for deterministic results if not provided
+    if not api_kwargs:
+        api_kwargs = {"temperature": 0.0, "top_p": 1.0}
+    return PreparedTask(instructions=instructions, response_format=FillNaResponse, api_kwargs=api_kwargs)

openaivec 0.14.12__py3-none-any.whl → 0.14.14__py3-none-any.whl

openaivec 0.14.12py3-none-any.whl → 0.14.14py3-none-any.whl