PyPI - openaivec - Versions diffs - 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

openaivec 0.10.0py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

openaivec/__init__.py +13 -4
openaivec/_cache/__init__.py +12 -0
openaivec/_cache/optimize.py +109 -0
openaivec/_cache/proxy.py +806 -0
openaivec/_di.py +326 -0
openaivec/_embeddings.py +203 -0
openaivec/{log.py → _log.py} +2 -2
openaivec/_model.py +113 -0
openaivec/{prompt.py → _prompt.py} +95 -28
openaivec/_provider.py +207 -0
openaivec/_responses.py +511 -0
openaivec/_schema/__init__.py +9 -0
openaivec/_schema/infer.py +340 -0
openaivec/_schema/spec.py +350 -0
openaivec/_serialize.py +234 -0
openaivec/{util.py → _util.py} +25 -85
openaivec/pandas_ext.py +1635 -425
openaivec/spark.py +604 -335
openaivec/task/__init__.py +27 -29
openaivec/task/customer_support/__init__.py +9 -15
openaivec/task/customer_support/customer_sentiment.py +51 -41
openaivec/task/customer_support/inquiry_classification.py +86 -61
openaivec/task/customer_support/inquiry_summary.py +44 -45
openaivec/task/customer_support/intent_analysis.py +56 -41
openaivec/task/customer_support/response_suggestion.py +49 -43
openaivec/task/customer_support/urgency_analysis.py +76 -71
openaivec/task/nlp/__init__.py +4 -4
openaivec/task/nlp/dependency_parsing.py +19 -20
openaivec/task/nlp/keyword_extraction.py +22 -24
openaivec/task/nlp/morphological_analysis.py +25 -25
openaivec/task/nlp/named_entity_recognition.py +26 -28
openaivec/task/nlp/sentiment_analysis.py +29 -21
openaivec/task/nlp/translation.py +24 -30
openaivec/task/table/__init__.py +3 -0
openaivec/task/table/fillna.py +183 -0
openaivec-1.0.10.dist-info/METADATA +399 -0
openaivec-1.0.10.dist-info/RECORD +39 -0
{openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
openaivec/embeddings.py +0 -172
openaivec/responses.py +0 -392
openaivec/serialize.py +0 -225
openaivec/task/model.py +0 -84
openaivec-0.10.0.dist-info/METADATA +0 -546
openaivec-0.10.0.dist-info/RECORD +0 -29
{openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0

openaivec/task/nlp/named_entity_recognition.py CHANGED Viewed

@@ -5,22 +5,22 @@ identifies and classifies named entities in text using OpenAI's language models.
 Example:
     Basic usage with BatchResponses:
     ```python
     from openai import OpenAI
-    from openaivec.responses import BatchResponses
+    from openaivec import BatchResponses
     from openaivec.task import nlp
     client = OpenAI()
     analyzer = BatchResponses.of_task(
         client=client,
-        model_name="gpt-4o-mini",
+        model_name="gpt-4.1-mini",
         task=nlp.NAMED_ENTITY_RECOGNITION
     )
     texts = ["John works at Microsoft in Seattle", "The meeting is on March 15th"]
     analyses = analyzer.parse(texts)
     for analysis in analyses:
         print(f"Persons: {analysis.persons}")
         print(f"Organizations: {analysis.organizations}")
@@ -28,31 +28,29 @@ Example:
     ```
     With pandas integration:
     ```python
     import pandas as pd
     from openaivec import pandas_ext  # Required for .ai accessor
     from openaivec.task import nlp
     df = pd.DataFrame({"text": ["John works at Microsoft in Seattle", "The meeting is on March 15th"]})
     df["entities"] = df["text"].ai.task(nlp.NAMED_ENTITY_RECOGNITION)
     # Extract entity components
     extracted_df = df.ai.extract("entities")
     print(extracted_df[["text", "entities_persons", "entities_organizations", "entities_locations"]])
     ```
 Attributes:
-    NAMED_ENTITY_RECOGNITION (PreparedTask): A prepared task instance
-        configured for named entity recognition with temperature=0.0 and
-        top_p=1.0 for deterministic output.
+    NAMED_ENTITY_RECOGNITION (PreparedTask): A prepared task instance configured for named
+        entity recognition. Provide ``temperature=0.0`` and ``top_p=1.0`` to API calls for
+        deterministic output.
 """
-from typing import List, Optional
-from pydantic import BaseModel
-from pydantic import Field
+from pydantic import BaseModel, Field
-from ..model import PreparedTask
+from openaivec._model import PreparedTask
 __all__ = ["NAMED_ENTITY_RECOGNITION"]
@@ -62,22 +60,22 @@ class NamedEntity(BaseModel):
     label: str = Field(description="Entity type label")
     start: int = Field(description="Start position in the original text")
     end: int = Field(description="End position in the original text")
-    confidence: Optional[float] = Field(description="Confidence score (0.0-1.0)")
+    confidence: float | None = Field(description="Confidence score (0.0-1.0)")
 class NamedEntityRecognition(BaseModel):
-    persons: List[NamedEntity] = Field(description="Person entities")
-    organizations: List[NamedEntity] = Field(description="Organization entities")
-    locations: List[NamedEntity] = Field(description="Location entities")
-    dates: List[NamedEntity] = Field(description="Date and time entities")
-    money: List[NamedEntity] = Field(description="Money and currency entities")
-    percentages: List[NamedEntity] = Field(description="Percentage entities")
-    miscellaneous: List[NamedEntity] = Field(description="Other named entities")
+    persons: list[NamedEntity] = Field(description="Person entities")
+    organizations: list[NamedEntity] = Field(description="Organization entities")
+    locations: list[NamedEntity] = Field(description="Location entities")
+    dates: list[NamedEntity] = Field(description="Date and time entities")
+    money: list[NamedEntity] = Field(description="Money and currency entities")
+    percentages: list[NamedEntity] = Field(description="Percentage entities")
+    miscellaneous: list[NamedEntity] = Field(description="Other named entities")
 NAMED_ENTITY_RECOGNITION = PreparedTask(
-    instructions="Identify and classify named entities in the following text. Extract persons, organizations, locations, dates, money, percentages, and other miscellaneous entities with their positions and confidence scores.",
+    instructions="Identify and classify named entities in the following text. Extract persons, "
+    "organizations, locations, dates, money, percentages, and other miscellaneous entities "
+    "with their positions and confidence scores.",
     response_format=NamedEntityRecognition,
-    temperature=0.0,
-    top_p=1.0
-)
+)

openaivec/task/nlp/sentiment_analysis.py CHANGED Viewed

@@ -5,22 +5,22 @@ sentiment and emotions in text using OpenAI's language models.
 Example:
     Basic usage with BatchResponses:
     ```python
     from openai import OpenAI
-    from openaivec.responses import BatchResponses
+    from openaivec import BatchResponses
     from openaivec.task import nlp
     client = OpenAI()
     analyzer = BatchResponses.of_task(
         client=client,
-        model_name="gpt-4o-mini",
+        model_name="gpt-4.1-mini",
         task=nlp.SENTIMENT_ANALYSIS
     )
     texts = ["I love this product!", "This is terrible and disappointing."]
     analyses = analyzer.parse(texts)
     for analysis in analyses:
         print(f"Sentiment: {analysis.sentiment}")
         print(f"Confidence: {analysis.confidence}")
@@ -28,46 +28,54 @@ Example:
     ```
     With pandas integration:
     ```python
     import pandas as pd
     from openaivec import pandas_ext  # Required for .ai accessor
     from openaivec.task import nlp
     df = pd.DataFrame({"text": ["I love this product!", "This is terrible and disappointing."]})
     df["sentiment"] = df["text"].ai.task(nlp.SENTIMENT_ANALYSIS)
     # Extract sentiment components
     extracted_df = df.ai.extract("sentiment")
     print(extracted_df[["text", "sentiment_sentiment", "sentiment_confidence", "sentiment_polarity"]])
     ```
 Attributes:
-    SENTIMENT_ANALYSIS (PreparedTask): A prepared task instance
-        configured for sentiment analysis with temperature=0.0 and
-        top_p=1.0 for deterministic output.
+    SENTIMENT_ANALYSIS (PreparedTask): A prepared task instance configured for sentiment
+        analysis. Provide ``temperature=0.0`` and ``top_p=1.0`` to API calls for
+        deterministic output.
 """
-from typing import List, Literal
+from typing import Literal
 from pydantic import BaseModel, Field
-from ..model import PreparedTask
+from openaivec._model import PreparedTask
 __all__ = ["SENTIMENT_ANALYSIS"]
 class SentimentAnalysis(BaseModel):
-    sentiment: Literal["positive", "negative", "neutral"] = Field(description="Overall sentiment (positive, negative, neutral)")
+    sentiment: Literal["positive", "negative", "neutral"] = Field(
+        description="Overall sentiment (positive, negative, neutral)"
+    )
     confidence: float = Field(description="Confidence score for sentiment (0.0-1.0)")
-    emotions: List[Literal["joy", "sadness", "anger", "fear", "surprise", "disgust"]] = Field(description="Detected emotions (joy, sadness, anger, fear, surprise, disgust)")
-    emotion_scores: List[float] = Field(description="Confidence scores for each emotion (0.0-1.0)")
+    emotions: list[Literal["joy", "sadness", "anger", "fear", "surprise", "disgust"]] = Field(
+        description="Detected emotions (joy, sadness, anger, fear, surprise, disgust)"
+    )
+    emotion_scores: list[float] = Field(description="Confidence scores for each emotion (0.0-1.0)")
     polarity: float = Field(description="Polarity score from -1.0 (negative) to 1.0 (positive)")
     subjectivity: float = Field(description="Subjectivity score from 0.0 (objective) to 1.0 (subjective)")
 SENTIMENT_ANALYSIS = PreparedTask(
-    instructions="Analyze the sentiment and emotions in the following text. Provide overall sentiment classification, confidence scores, detected emotions, polarity, and subjectivity measures.\n\nIMPORTANT: Provide all analysis in the same language as the input text, except for the predefined categorical fields (sentiment, emotions) which must use the exact English values specified (positive/negative/neutral for sentiment, and joy/sadness/anger/fear/surprise/disgust for emotions).",
+    instructions="Analyze the sentiment and emotions in the following text. Provide overall "
+    "sentiment classification, confidence scores, detected emotions, polarity, and subjectivity "
+    "measures.\n\nIMPORTANT: Provide all analysis in the same language as the input text, except "
+    "for the predefined categorical fields (sentiment, emotions) which must use the exact "
+    "English values specified (positive/negative/neutral for sentiment, and "
+    "joy/sadness/anger/fear/surprise/disgust for emotions).",
     response_format=SentimentAnalysis,
-    temperature=0.0,
-    top_p=1.0
-)
+)

openaivec/task/nlp/translation.py CHANGED Viewed

@@ -10,22 +10,22 @@ provides structured output with consistent language code naming.
 Example:
     Basic usage with BatchResponses:
     ```python
     from openai import OpenAI
-    from openaivec.responses import BatchResponses
+    from openaivec import BatchResponses
     from openaivec.task import nlp
     client = OpenAI()
     translator = BatchResponses.of_task(
         client=client,
-        model_name="gpt-4o-mini",
+        model_name="gpt-4.1-mini",
         task=nlp.MULTILINGUAL_TRANSLATION
     )
     texts = ["Hello", "Good morning", "Thank you"]
     translations = translator.parse(texts)
     for translation in translations:
         print(f"English: {translation.en}")
         print(f"Japanese: {translation.ja}")
@@ -33,15 +33,15 @@ Example:
     ```
     With pandas integration:
     ```python
     import pandas as pd
     from openaivec import pandas_ext  # Required for .ai accessor
     from openaivec.task import nlp
     df = pd.DataFrame({"text": ["Hello", "Goodbye"]})
     df["translations"] = df["text"].ai.task(nlp.MULTILINGUAL_TRANSLATION)
     # Extract specific languages
     extracted_df = df.ai.extract("translations")
     print(extracted_df[["text", "translations_en", "translations_ja", "translations_fr"]])
@@ -49,8 +49,8 @@ Example:
 Attributes:
     MULTILINGUAL_TRANSLATION (PreparedTask): A prepared task instance configured
-        for multilingual translation with temperature=0.0 and top_p=1.0 for
-        deterministic output.
+        for multilingual translation. Provide ``temperature=0.0`` and ``top_p=1.0``
+        to the calling API wrapper for deterministic output.
 Note:
     The translation covers 58 languages across major language families. All field
@@ -72,10 +72,9 @@ Note:
     - Other: Basque, Maltese
 """
-from openai import BaseModel
-from pydantic import Field
+from pydantic import BaseModel, Field
-from ..model import PreparedTask
+from openaivec._model import PreparedTask
 __all__ = ["MULTILINGUAL_TRANSLATION"]
@@ -88,7 +87,7 @@ class TranslatedString(BaseModel):
     sv: str = Field(description="Translated text in Swedish")
     da: str = Field(description="Translated text in Danish")
     no: str = Field(description="Translated text in Norwegian")
     # Romance languages
     es: str = Field(description="Translated text in Spanish")
     fr: str = Field(description="Translated text in French")
@@ -96,7 +95,7 @@ class TranslatedString(BaseModel):
     pt: str = Field(description="Translated text in Portuguese")
     ro: str = Field(description="Translated text in Romanian")
     ca: str = Field(description="Translated text in Catalan")
     # Slavic languages
     ru: str = Field(description="Translated text in Russian")
     pl: str = Field(description="Translated text in Polish")
@@ -106,37 +105,37 @@ class TranslatedString(BaseModel):
     bg: str = Field(description="Translated text in Bulgarian")
     hr: str = Field(description="Translated text in Croatian")
     sr: str = Field(description="Translated text in Serbian")
     # East Asian languages
     ja: str = Field(description="Translated text in Japanese")
     ko: str = Field(description="Translated text in Korean")
     zh: str = Field(description="Translated text in Chinese (Simplified)")
     zh_tw: str = Field(description="Translated text in Chinese (Traditional)")
     # South Asian languages
     hi: str = Field(description="Translated text in Hindi")
     bn: str = Field(description="Translated text in Bengali")
     te: str = Field(description="Translated text in Telugu")
     ta: str = Field(description="Translated text in Tamil")
     ur: str = Field(description="Translated text in Urdu")
     # Southeast Asian languages
     th: str = Field(description="Translated text in Thai")
     vi: str = Field(description="Translated text in Vietnamese")
     id: str = Field(description="Translated text in Indonesian")
     ms: str = Field(description="Translated text in Malay")
     tl: str = Field(description="Translated text in Filipino")
     # Middle Eastern languages
     ar: str = Field(description="Translated text in Arabic")
     he: str = Field(description="Translated text in Hebrew")
     fa: str = Field(description="Translated text in Persian")
     tr: str = Field(description="Translated text in Turkish")
     # African languages
     sw: str = Field(description="Translated text in Swahili")
     am: str = Field(description="Translated text in Amharic")
     # Other European languages
     fi: str = Field(description="Translated text in Finnish")
     hu: str = Field(description="Translated text in Hungarian")
@@ -144,10 +143,10 @@ class TranslatedString(BaseModel):
     lv: str = Field(description="Translated text in Latvian")
     lt: str = Field(description="Translated text in Lithuanian")
     el: str = Field(description="Translated text in Greek")
     # Nordic languages
     is_: str = Field(description="Translated text in Icelandic")
     # Other languages
     eu: str = Field(description="Translated text in Basque")
     cy: str = Field(description="Translated text in Welsh")
@@ -157,9 +156,4 @@ class TranslatedString(BaseModel):
 instructions = "Translate the following text into multiple languages. "
-MULTILINGUAL_TRANSLATION = PreparedTask(
-    instructions=instructions,
-    response_format=TranslatedString,
-    temperature=0.0,
-    top_p=1.0
-)
+MULTILINGUAL_TRANSLATION = PreparedTask(instructions=instructions, response_format=TranslatedString)

openaivec/task/table/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .fillna import FillNaResponse, fillna
+__all__ = ["fillna", "FillNaResponse"]

openaivec/task/table/fillna.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""Missing value imputation task for DataFrame columns.
+This module provides functionality to intelligently fill missing values in DataFrame
+columns using AI-powered analysis. The task analyzes existing data patterns to
+generate contextually appropriate values for missing entries.
+Example:
+    Basic usage with pandas DataFrame:
+    ```python
+    import pandas as pd
+    from openaivec import pandas_ext  # Required for .ai accessor
+    from openaivec.task.table import fillna
+    # Create DataFrame with missing values
+    df = pd.DataFrame({
+        "name": ["Alice", "Bob", None, "David"],
+        "age": [25, 30, 35, None],
+        "city": ["New York", "London", "Tokyo", "Paris"],
+        "salary": [50000, 60000, 70000, None]
+    })
+    # Fill missing values in the 'salary' column
+    task = fillna(df, "salary")
+    filled_salaries = df[df["salary"].isna()].ai.task(task)
+    # Apply filled values back to DataFrame
+    for result in filled_salaries:
+        df.loc[result.index, "salary"] = result.output
+    ```
+    With BatchResponses for more control:
+    ```python
+    from openai import OpenAI
+    from openaivec import BatchResponses
+    from openaivec.task.table import fillna
+    client = OpenAI()
+    df = pd.DataFrame({...})  # Your DataFrame with missing values
+    # Create fillna task for target column
+    task = fillna(df, "target_column")
+    # Get rows with missing values in target column
+    missing_rows = df[df["target_column"].isna()]
+    # Process with BatchResponses
+    filler = BatchResponses.of_task(
+        client=client,
+        model_name="gpt-4.1-mini",
+        task=task
+    )
+    # Generate inputs for missing rows
+    inputs = []
+    for idx, row in missing_rows.iterrows():
+        inputs.append({
+            "index": idx,
+            "input": {k: v for k, v in row.items() if k != "target_column"}
+        })
+    filled_values = filler.parse(inputs)
+    ```
+"""
+import json
+import pandas as pd
+from pydantic import BaseModel, Field
+from openaivec._model import PreparedTask
+from openaivec._prompt import FewShotPromptBuilder
+__all__ = ["fillna", "FillNaResponse"]
+def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> list[dict]:
+    examples: list[dict] = []
+    samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
+    samples = samples.dropna(subset=[target_column_name])
+    for i, row in samples.head(max_examples).iterrows():
+        examples.append(
+            {
+                "index": i,
+                "input": {k: v for k, v in row.items() if k != target_column_name},
+                "output": row[target_column_name],
+            }
+        )
+    return examples
+def get_instructions(df: pd.DataFrame, target_column_name: str, max_examples: int) -> str:
+    examples = get_examples(df, target_column_name, max_examples)
+    builder = (
+        FewShotPromptBuilder()
+        .purpose("Fill missing values in the target column based on the context provided by other columns.")
+        .caution("Ensure that the filled values are consistent with the data in other columns.")
+    )
+    for row in examples:
+        builder.example(
+            input_value=json.dumps({"index": row["index"], "input": row["input"]}, ensure_ascii=False),
+            output_value=json.dumps({"index": row["index"], "output": row["output"]}, ensure_ascii=False),
+        )
+    return builder.improve().build()
+class FillNaResponse(BaseModel):
+    """Response model for missing value imputation results.
+    Contains the row index and the imputed value for a specific missing
+    entry in the target column.
+    """
+    index: int = Field(description="Index of the row in the original DataFrame")
+    output: int | float | str | bool | None = Field(
+        description="Filled value for the target column. This value should be JSON-compatible "
+        "and match the target column type in the original DataFrame."
+    )
+def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -> PreparedTask:
+    """Create a prepared task for filling missing values in a DataFrame column.
+    Analyzes the provided DataFrame to understand data patterns and creates
+    a configured task that can intelligently fill missing values in the
+    specified target column. The task uses few-shot learning with examples
+    extracted from non-null rows in the DataFrame.
+    Args:
+        df (pd.DataFrame): Source DataFrame containing the data with missing values.
+        target_column_name (str): Name of the column to fill missing values for.
+            This column should exist in the DataFrame and contain some
+            non-null values to serve as training examples.
+        max_examples (int): Maximum number of example rows to use for few-shot
+            learning. Defaults to 500. Higher values provide more context
+            but increase token usage and processing time.
+    Returns:
+        PreparedTask configured for missing value imputation with:
+        - Instructions based on DataFrame patterns
+        - FillNaResponse format for structured output
+        - Default deterministic settings (temperature=0.0, top_p=1.0)
+    Raises:
+        ValueError: If target_column_name doesn't exist in DataFrame,
+            contains no non-null values for training examples, DataFrame is empty,
+            or max_examples is not a positive integer.
+    Example:
+        ```python
+        import pandas as pd
+        from openaivec.task.table import fillna
+        df = pd.DataFrame({
+            "product": ["laptop", "phone", "tablet", "laptop"],
+            "brand": ["Apple", "Samsung", None, "Dell"],
+            "price": [1200, 800, 600, 1000]
+        })
+        # Create task to fill missing brand values
+        task = fillna(df, "brand")
+        # Use with pandas AI accessor
+        missing_brands = df[df["brand"].isna()].ai.task(task)
+        ```
+    """
+    if df.empty:
+        raise ValueError("DataFrame is empty.")
+    if not isinstance(max_examples, int) or max_examples <= 0:
+        raise ValueError("max_examples must be a positive integer.")
+    if target_column_name not in df.columns:
+        raise ValueError(f"Column '{target_column_name}' does not exist in the DataFrame.")
+    if df[target_column_name].notna().sum() == 0:
+        raise ValueError(f"Column '{target_column_name}' contains no non-null values for training examples.")
+    instructions = get_instructions(df, target_column_name, max_examples)
+    return PreparedTask(instructions=instructions, response_format=FillNaResponse)

openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl

openaivec 0.10.0py3-none-any.whl → 1.0.10py3-none-any.whl