PyPI - openaivec - Versions diffs - 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

openaivec 0.10.0py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

openaivec/__init__.py +13 -4
openaivec/_cache/__init__.py +12 -0
openaivec/_cache/optimize.py +109 -0
openaivec/_cache/proxy.py +806 -0
openaivec/_di.py +326 -0
openaivec/_embeddings.py +203 -0
openaivec/{log.py → _log.py} +2 -2
openaivec/_model.py +113 -0
openaivec/{prompt.py → _prompt.py} +95 -28
openaivec/_provider.py +207 -0
openaivec/_responses.py +511 -0
openaivec/_schema/__init__.py +9 -0
openaivec/_schema/infer.py +340 -0
openaivec/_schema/spec.py +350 -0
openaivec/_serialize.py +234 -0
openaivec/{util.py → _util.py} +25 -85
openaivec/pandas_ext.py +1635 -425
openaivec/spark.py +604 -335
openaivec/task/__init__.py +27 -29
openaivec/task/customer_support/__init__.py +9 -15
openaivec/task/customer_support/customer_sentiment.py +51 -41
openaivec/task/customer_support/inquiry_classification.py +86 -61
openaivec/task/customer_support/inquiry_summary.py +44 -45
openaivec/task/customer_support/intent_analysis.py +56 -41
openaivec/task/customer_support/response_suggestion.py +49 -43
openaivec/task/customer_support/urgency_analysis.py +76 -71
openaivec/task/nlp/__init__.py +4 -4
openaivec/task/nlp/dependency_parsing.py +19 -20
openaivec/task/nlp/keyword_extraction.py +22 -24
openaivec/task/nlp/morphological_analysis.py +25 -25
openaivec/task/nlp/named_entity_recognition.py +26 -28
openaivec/task/nlp/sentiment_analysis.py +29 -21
openaivec/task/nlp/translation.py +24 -30
openaivec/task/table/__init__.py +3 -0
openaivec/task/table/fillna.py +183 -0
openaivec-1.0.10.dist-info/METADATA +399 -0
openaivec-1.0.10.dist-info/RECORD +39 -0
{openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
openaivec/embeddings.py +0 -172
openaivec/responses.py +0 -392
openaivec/serialize.py +0 -225
openaivec/task/model.py +0 -84
openaivec-0.10.0.dist-info/METADATA +0 -546
openaivec-0.10.0.dist-info/RECORD +0 -29
{openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0

openaivec/{prompt.py → _prompt.py} RENAMED Viewed

@@ -6,7 +6,7 @@ construction of a prompt in a structured way, including setting the
 purpose, adding cautions, and providing examples.
 ```python
-from openaivec.prompt import FewShotPromptBuilder
+from openaivec import FewShotPromptBuilder
 prompt_str: str = (
     FewShotPromptBuilder()
@@ -44,13 +44,15 @@ this will produce an XML string that looks like this:
 import difflib
 import logging
-from typing import Any, List
 from xml.etree import ElementTree
 from openai import OpenAI
 from openai.types.responses import ParsedResponse
 from pydantic import BaseModel
+from openaivec._model import ResponsesModelName
+from openaivec._provider import CONTAINER
 __all__ = [
     "FewShotPrompt",
     "FewShotPromptBuilder",
@@ -87,8 +89,8 @@ class FewShotPrompt(BaseModel):
     """
     purpose: str
-    cautions: List[str]
-    examples: List[Example]
+    cautions: list[str]
+    examples: list[Example]
 class Step(BaseModel):
@@ -113,7 +115,7 @@ class Request(BaseModel):
 class Response(BaseModel):
-    iterations: List[Step]
+    iterations: list[Step]
 _PROMPT: str = """
@@ -123,6 +125,7 @@ _PROMPT: str = """
             Receive the prompt in JSON format with fields "purpose",
             "cautions", and "examples". Ensure the entire prompt is free
             from logical contradictions, redundancies, and ambiguities.
+            IMPORTANT: The "examples" array must always contain at least one example throughout all iterations.
         </Instruction>
         <Instruction id="2">
             - Modify only one element per iteration among “purpose”, “examples”, or
@@ -152,8 +155,10 @@ _PROMPT: str = """
         </Instruction>
         <Instruction id="6">
             In the "examples" field, enhance the examples to cover a wide range of scenarios.
+            CRITICAL: The examples array must NEVER be empty - always maintain at least one example.
             Add as many non-redundant examples as possible,
             since having more examples leads to better coverage and understanding.
+            You may modify existing examples or add new ones, but never remove all examples.
         </Instruction>
         <Instruction id="7">
             Verify that the improved prompt adheres to the Request and
@@ -163,6 +168,7 @@ _PROMPT: str = """
             Generate the final refined FewShotPrompt as an iteration in
             the Response, ensuring the final output is consistent,
             unambiguous, and free from any redundancies or contradictions.
+            MANDATORY: Verify that the examples array contains at least one example before completing.
         </Instruction>
     </Instructions>
     <Example>
@@ -203,7 +209,9 @@ _PROMPT: str = """
   "iterations": [
     {
       "id": 1,
-      "analysis": "The original purpose was vague and did not explicitly state the main objective. This ambiguity could lead to confusion about the task. In this iteration, we refined the purpose to clearly specify that the goal is to determine the correct category for a given word based on its context.",
+      "analysis": "The original purpose was vague and did not explicitly state the main objective.
+        This ambiguity could lead to confusion about the task. In this iteration, we refined the purpose to
+        clearly specify that the goal is to determine the correct category for a given word based on its context.",
       "prompt": {
         "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
         "cautions": [
@@ -225,7 +233,10 @@ _PROMPT: str = """
     },
     {
       "id": 2,
-      "analysis": "Next, we focused solely on the cautions section. The original cautions were generic and did not mention potential pitfalls like homonyms or polysemy. Failing to address these could result in misclassification. Therefore, we added a specific caution regarding homonyms while keeping the purpose and examples unchanged.",
+      "analysis": "Next, we focused solely on the cautions section. The original cautions were generic and
+        did not mention potential pitfalls like homonyms or polysemy. Failing to address these could result in
+        misclassification. Therefore, we added a specific caution regarding homonyms while keeping the purpose
+        and examples unchanged.",
       "prompt": {
         "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
         "cautions": [
@@ -248,7 +259,10 @@ _PROMPT: str = """
     },
     {
       "id": 3,
-      "analysis": "In this step, we improved the examples section to cover a broader range of scenarios and address potential ambiguities. By adding examples that include words with multiple interpretations (such as 'Mercury' for both a planet and an element), we enhance clarity and ensure better coverage. This iteration only modifies the examples section, leaving purpose and cautions intact.",
+      "analysis": "In this step, we improved the examples section to cover a broader range of scenarios and
+        address potential ambiguities. By adding examples that include words with multiple interpretations
+        (such as 'Mercury' for both a planet and an element), we enhance clarity and ensure better coverage.
+        This iteration only modifies the examples section, leaving purpose and cautions intact.",
       "prompt": {
         "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
         "cautions": [
@@ -328,11 +342,29 @@ def _render_prompt(prompt: FewShotPrompt) -> str:
 class FewShotPromptBuilder:
+    """Builder for creating few-shot prompts with validation.
+    Usage:
+        builder = (FewShotPromptBuilder()
+                  .purpose("Your task description")
+                  .example("input1", "output1")  # At least one required
+                  .example("input2", "output2")
+                  .build())
+    Note:
+        Both .purpose() and at least one .example() call are required before
+        calling .build(), .improve(), or .get_object().
+    """
     _prompt: FewShotPrompt
-    _steps: List[Step]
+    _steps: list[Step]
     def __init__(self):
-        """Initialize an empty FewShotPromptBuilder."""
+        """Initialize an empty FewShotPromptBuilder.
+        Note:
+            You must call .purpose() and at least one .example() before building.
+        """
         self._prompt = FewShotPrompt(purpose="", cautions=[], examples=[])
     @classmethod
@@ -391,6 +423,8 @@ class FewShotPromptBuilder:
     ) -> "FewShotPromptBuilder":
         """Add a single input/output example.
+        At least one example is required before calling .build(), .improve(), or .get_object().
         Args:
             input_value (str | BaseModel): Example input; if a Pydantic model is
                 provided it is serialised to JSON.
@@ -409,45 +443,67 @@ class FewShotPromptBuilder:
     def improve(
         self,
-        client: OpenAI,
-        model_name: str,
-        temperature: float = 0.0,
-        top_p: float = 1.0,
+        client: OpenAI | None = None,
+        model_name: str | None = None,
+        **api_kwargs,
     ) -> "FewShotPromptBuilder":
         """Iteratively refine the prompt using an LLM.
         The method calls a single LLM request that returns multiple
         editing steps and stores each step for inspection.
+        When client is None, automatically creates a client using environment variables:
+        - For OpenAI: ``OPENAI_API_KEY``
+        - For Azure OpenAI: ``AZURE_OPENAI_API_KEY``, ``AZURE_OPENAI_BASE_URL``, ``AZURE_OPENAI_API_VERSION``
         Args:
-            client (openai.OpenAI): Configured OpenAI client.
-            model_name (str): Model identifier (e.g. ``gpt-4o-mini``).
-            temperature (float, optional): Sampling temperature. Defaults to 0.0.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to 1.0.
+            client (OpenAI | None): Configured OpenAI client. If None, uses DI container with environment variables.
+            model_name (str | None): Model identifier. If None, uses default ``gpt-4.1-mini``.
+            **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
         Returns:
             FewShotPromptBuilder: The current builder instance containing the refined prompt and iteration history.
+        Raises:
+            ValueError: If the prompt is not valid (missing purpose or examples).
         """
+        # Validate before making API call to provide early feedback
+        self._validate()
-        response: ParsedResponse[Response] = client.responses.parse(
-            model=model_name,
+        _client = client or CONTAINER.resolve(OpenAI)
+        _model_name = model_name or CONTAINER.resolve(ResponsesModelName).value
+        response: ParsedResponse[Response] = _client.responses.parse(
+            model=_model_name,
             instructions=_PROMPT,
             input=Request(prompt=self._prompt).model_dump_json(),
-            temperature=temperature,
-            top_p=top_p,
             text_format=Response,
+            **api_kwargs,
         )
         # keep the original prompt
         self._steps = [Step(id=0, analysis="Original Prompt", prompt=self._prompt)]
         # add the histories
-        for step in response.output_parsed.iterations:
-            self._steps.append(step)
+        if response.output_parsed:
+            for step in response.output_parsed.iterations:
+                self._steps.append(step)
         # set the final prompt
         self._prompt = self._steps[-1].prompt
+        # Validate the improved prompt to ensure examples weren't removed by LLM
+        try:
+            self._validate()
+        except ValueError as e:
+            _logger.warning(f"LLM produced invalid prompt during improve(): {e}")
+            # Restore original prompt if LLM produced invalid result
+            self._prompt = self._steps[0].prompt
+            raise ValueError(
+                f"LLM improvement failed to maintain required fields: {e}. "
+                "This may indicate an issue with the improvement instructions or model behavior."
+            )
         return self
     def explain(self) -> "FewShotPromptBuilder":
@@ -456,6 +512,10 @@ class FewShotPromptBuilder:
         Returns:
             FewShotPromptBuilder: The current builder instance.
         """
+        if not hasattr(self, "_steps") or not self._steps:
+            print("No improvement steps available. Call improve() first.")
+            return self
         for previous, current in zip(self._steps, self._steps[1:]):
             print(f"=== Iteration {current.id} ===\n")
             print(f"Instruction: {current.analysis}")
@@ -479,9 +539,14 @@ class FewShotPromptBuilder:
         """
         # Validate that 'purpose' and 'examples' are not empty.
         if not self._prompt.purpose:
-            raise ValueError("Purpose is required.")
+            raise ValueError(
+                "Purpose is required. Please call .purpose('your purpose description') before building the prompt."
+            )
         if not self._prompt.examples or len(self._prompt.examples) == 0:
-            raise ValueError("At least one example is required.")
+            raise ValueError(
+                "At least one example is required. Please add examples using "
+                ".example('input', 'output') before building the prompt."
+            )
     def get_object(self) -> FewShotPrompt:
         """Return the underlying FewShotPrompt object.
@@ -501,11 +566,13 @@ class FewShotPromptBuilder:
         self._validate()
         return self.build_xml()
-    def build_json(self, **kwargs: Any) -> str:
+    def build_json(self, **kwargs) -> str:
         """Build and return the prompt as a JSON string.
         Args:
-            **kwargs: Keyword arguments forwarded to ``model_dump_json``.
+            **kwargs: Keyword arguments forwarded to Pydantic's ``model_dump_json``.
+                Common options include ``indent``, ``include``, ``exclude``,
+                ``by_alias``, ``exclude_unset``, ``exclude_defaults``, ``exclude_none``.
         Returns:
             str: JSON representation of the prompt.

openaivec/_provider.py ADDED Viewed

@@ -0,0 +1,207 @@
+import os
+import warnings
+import tiktoken
+from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
+from openaivec import _di as di
+from openaivec._model import (
+    AzureOpenAIAPIKey,
+    AzureOpenAIAPIVersion,
+    AzureOpenAIBaseURL,
+    EmbeddingsModelName,
+    OpenAIAPIKey,
+    ResponsesModelName,
+)
+from openaivec._schema import SchemaInferer
+from openaivec._util import TextChunker
+__all__ = []
+CONTAINER = di.Container()
+def _build_missing_credentials_error(
+    openai_api_key: str | None,
+    azure_api_key: str | None,
+    azure_base_url: str | None,
+    azure_api_version: str | None,
+) -> str:
+    """Build a detailed error message for missing credentials.
+    Args:
+        openai_api_key (str | None): The OpenAI API key value.
+        azure_api_key (str | None): The Azure OpenAI API key value.
+        azure_base_url (str | None): The Azure OpenAI base URL value.
+        azure_api_version (str | None): The Azure OpenAI API version value.
+    Returns:
+        str: A detailed error message with missing variables and setup instructions.
+    """
+    lines = ["No valid OpenAI or Azure OpenAI credentials found.", ""]
+    # Check OpenAI
+    lines.append("Option 1: Set OPENAI_API_KEY for OpenAI")
+    if openai_api_key:
+        lines.append("  ✓ OPENAI_API_KEY is set")
+    else:
+        lines.append("  ✗ OPENAI_API_KEY is not set")
+        lines.append('    Example: export OPENAI_API_KEY="sk-..."')
+    lines.append("")
+    # Check Azure OpenAI
+    lines.append("Option 2: Set all Azure OpenAI variables")
+    azure_vars = [
+        ("AZURE_OPENAI_API_KEY", azure_api_key, '"your-azure-api-key"'),
+        ("AZURE_OPENAI_BASE_URL", azure_base_url, '"https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"'),
+        ("AZURE_OPENAI_API_VERSION", azure_api_version, '"2024-12-01-preview"'),
+    ]
+    for var_name, var_value, example in azure_vars:
+        if var_value:
+            lines.append(f"  ✓ {var_name} is set")
+        else:
+            lines.append(f"  ✗ {var_name} is not set")
+            lines.append(f"    Example: export {var_name}={example}")
+    return "\n".join(lines)
+def _check_azure_v1_api_url(base_url: str) -> None:
+    """Check if Azure OpenAI base URL uses the recommended v1 API format.
+    Issues a warning if the URL doesn't end with '/openai/v1/' to encourage
+    migration to the v1 API format as recommended by Microsoft.
+    Reference: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle
+    Args:
+        base_url (str): The Azure OpenAI base URL to check.
+    """
+    if base_url and not base_url.rstrip("/").endswith("/openai/v1"):
+        warnings.warn(
+            "⚠️  Azure OpenAI v1 API is recommended. Your base URL should end with '/openai/v1/'. "
+            f"Current URL: '{base_url}'. "
+            "Consider updating to: 'https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/' "
+            "for better performance and future compatibility. "
+            "See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle",
+            UserWarning,
+            stacklevel=3,
+        )
+def provide_openai_client() -> OpenAI:
+    """Provide OpenAI client based on environment variables.
+    Automatically detects and prioritizes OpenAI over Azure OpenAI configuration.
+    Checks the following environment variables in order:
+    1. OPENAI_API_KEY - if set, creates standard OpenAI client
+    2. Azure OpenAI variables (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL,
+       AZURE_OPENAI_API_VERSION) - if all set, creates Azure OpenAI client
+    Returns:
+        OpenAI: Configured OpenAI or AzureOpenAI client instance.
+    Raises:
+        ValueError: If no valid environment variables are found for either service.
+    """
+    openai_api_key = CONTAINER.resolve(OpenAIAPIKey)
+    if openai_api_key.value:
+        return OpenAI()
+    azure_api_key = CONTAINER.resolve(AzureOpenAIAPIKey)
+    azure_base_url = CONTAINER.resolve(AzureOpenAIBaseURL)
+    azure_api_version = CONTAINER.resolve(AzureOpenAIAPIVersion)
+    if all(param.value for param in [azure_api_key, azure_base_url, azure_api_version]):
+        # Type checker support: values are guaranteed non-None by the all() check above
+        assert azure_api_key.value is not None
+        assert azure_base_url.value is not None
+        assert azure_api_version.value is not None
+        _check_azure_v1_api_url(azure_base_url.value)
+        return AzureOpenAI(
+            api_key=azure_api_key.value,
+            base_url=azure_base_url.value,
+            api_version=azure_api_version.value,
+        )
+    raise ValueError(
+        _build_missing_credentials_error(
+            openai_api_key=openai_api_key.value,
+            azure_api_key=azure_api_key.value,
+            azure_base_url=azure_base_url.value,
+            azure_api_version=azure_api_version.value,
+        )
+    )
+def provide_async_openai_client() -> AsyncOpenAI:
+    """Provide asynchronous OpenAI client based on environment variables.
+    Automatically detects and prioritizes OpenAI over Azure OpenAI configuration.
+    Checks the following environment variables in order:
+    1. OPENAI_API_KEY - if set, creates standard AsyncOpenAI client
+    2. Azure OpenAI variables (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL,
+       AZURE_OPENAI_API_VERSION) - if all set, creates AsyncAzureOpenAI client
+    Returns:
+        AsyncOpenAI: Configured AsyncOpenAI or AsyncAzureOpenAI client instance.
+    Raises:
+        ValueError: If no valid environment variables are found for either service.
+    """
+    openai_api_key = CONTAINER.resolve(OpenAIAPIKey)
+    if openai_api_key.value:
+        return AsyncOpenAI()
+    azure_api_key = CONTAINER.resolve(AzureOpenAIAPIKey)
+    azure_base_url = CONTAINER.resolve(AzureOpenAIBaseURL)
+    azure_api_version = CONTAINER.resolve(AzureOpenAIAPIVersion)
+    if all(param.value for param in [azure_api_key, azure_base_url, azure_api_version]):
+        # Type checker support: values are guaranteed non-None by the all() check above
+        assert azure_api_key.value is not None
+        assert azure_base_url.value is not None
+        assert azure_api_version.value is not None
+        _check_azure_v1_api_url(azure_base_url.value)
+        return AsyncAzureOpenAI(
+            api_key=azure_api_key.value,
+            base_url=azure_base_url.value,
+            api_version=azure_api_version.value,
+        )
+    raise ValueError(
+        _build_missing_credentials_error(
+            openai_api_key=openai_api_key.value,
+            azure_api_key=azure_api_key.value,
+            azure_base_url=azure_base_url.value,
+            azure_api_version=azure_api_version.value,
+        )
+    )
+def set_default_registrations():
+    CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName("gpt-4.1-mini"))
+    CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName("text-embedding-3-small"))
+    CONTAINER.register(OpenAIAPIKey, lambda: OpenAIAPIKey(os.getenv("OPENAI_API_KEY")))
+    CONTAINER.register(AzureOpenAIAPIKey, lambda: AzureOpenAIAPIKey(os.getenv("AZURE_OPENAI_API_KEY")))
+    CONTAINER.register(AzureOpenAIBaseURL, lambda: AzureOpenAIBaseURL(os.getenv("AZURE_OPENAI_BASE_URL")))
+    CONTAINER.register(
+        cls=AzureOpenAIAPIVersion,
+        provider=lambda: AzureOpenAIAPIVersion(os.getenv("AZURE_OPENAI_API_VERSION", "preview")),
+    )
+    CONTAINER.register(OpenAI, provide_openai_client)
+    CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
+    CONTAINER.register(tiktoken.Encoding, lambda: tiktoken.get_encoding("o200k_base"))
+    CONTAINER.register(TextChunker, lambda: TextChunker(CONTAINER.resolve(tiktoken.Encoding)))
+    CONTAINER.register(
+        SchemaInferer,
+        lambda: SchemaInferer(
+            client=CONTAINER.resolve(OpenAI),
+            model_name=CONTAINER.resolve(ResponsesModelName).value,
+        ),
+    )
+set_default_registrations()

openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl

openaivec 0.10.0py3-none-any.whl → 1.0.10py3-none-any.whl