PyPI - openaivec - Versions diffs - 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

openaivec 0.12.5py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

openaivec/__init__.py +13 -4
openaivec/_cache/__init__.py +12 -0
openaivec/_cache/optimize.py +109 -0
openaivec/_cache/proxy.py +806 -0
openaivec/{di.py → _di.py} +36 -12
openaivec/_embeddings.py +203 -0
openaivec/{log.py → _log.py} +2 -2
openaivec/_model.py +113 -0
openaivec/{prompt.py → _prompt.py} +95 -28
openaivec/_provider.py +207 -0
openaivec/_responses.py +511 -0
openaivec/_schema/__init__.py +9 -0
openaivec/_schema/infer.py +340 -0
openaivec/_schema/spec.py +350 -0
openaivec/_serialize.py +234 -0
openaivec/{util.py → _util.py} +25 -85
openaivec/pandas_ext.py +1496 -318
openaivec/spark.py +485 -183
openaivec/task/__init__.py +9 -7
openaivec/task/customer_support/__init__.py +9 -15
openaivec/task/customer_support/customer_sentiment.py +17 -15
openaivec/task/customer_support/inquiry_classification.py +23 -22
openaivec/task/customer_support/inquiry_summary.py +14 -13
openaivec/task/customer_support/intent_analysis.py +21 -19
openaivec/task/customer_support/response_suggestion.py +16 -16
openaivec/task/customer_support/urgency_analysis.py +24 -25
openaivec/task/nlp/__init__.py +4 -4
openaivec/task/nlp/dependency_parsing.py +10 -12
openaivec/task/nlp/keyword_extraction.py +11 -14
openaivec/task/nlp/morphological_analysis.py +12 -14
openaivec/task/nlp/named_entity_recognition.py +16 -18
openaivec/task/nlp/sentiment_analysis.py +14 -11
openaivec/task/nlp/translation.py +6 -9
openaivec/task/table/__init__.py +2 -2
openaivec/task/table/fillna.py +11 -11
openaivec-1.0.10.dist-info/METADATA +399 -0
openaivec-1.0.10.dist-info/RECORD +39 -0
{openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
openaivec/embeddings.py +0 -172
openaivec/model.py +0 -67
openaivec/provider.py +0 -45
openaivec/responses.py +0 -393
openaivec/serialize.py +0 -225
openaivec-0.12.5.dist-info/METADATA +0 -696
openaivec-0.12.5.dist-info/RECORD +0 -33
{openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0

openaivec/_serialize.py ADDED Viewed

@@ -0,0 +1,234 @@
+"""Refactored serialization utilities for Pydantic BaseModel classes.
+This module provides utilities for converting Pydantic BaseModel classes
+to and from JSON schema representations with simplified, maintainable code.
+"""
+from typing import Any, Literal
+from pydantic import BaseModel, Field, create_model
+__all__ = []
+def serialize_base_model(obj: type[BaseModel]) -> dict[str, Any]:
+    """Serialize a Pydantic BaseModel to JSON schema."""
+    return obj.model_json_schema()
+def dereference_json_schema(json_schema: dict[str, Any]) -> dict[str, Any]:
+    """Dereference JSON schema by resolving $ref pointers with circular reference protection."""
+    model_map = json_schema.get("$defs", {})
+    def dereference(obj, current_path=None):
+        if current_path is None:
+            current_path = []
+        if isinstance(obj, dict):
+            if "$ref" in obj:
+                ref = obj["$ref"].split("/")[-1]
+                # Check for circular reference
+                if ref in current_path:
+                    # Return a placeholder to break the cycle
+                    return {"type": "object", "description": f"Circular reference to {ref}"}
+                if ref in model_map:
+                    # Add to path and recurse
+                    new_path = current_path + [ref]
+                    return dereference(model_map[ref], new_path)
+                else:
+                    # Invalid reference, return placeholder
+                    return {"type": "object", "description": f"Invalid reference to {ref}"}
+            else:
+                return {k: dereference(v, current_path) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [dereference(x, current_path) for x in obj]
+        else:
+            return obj
+    result = {}
+    for k, v in json_schema.items():
+        if k == "$defs":
+            continue
+        result[k] = dereference(v)
+    return result
+# ============================================================================
+# Type Resolution - Separated into focused functions
+# ============================================================================
+def _resolve_union_type(union_options: list[dict[str, Any]]) -> type:
+    """Resolve anyOf/oneOf to Union type."""
+    union_types = []
+    for option in union_options:
+        if option.get("type") == "null":
+            union_types.append(type(None))
+        else:
+            union_types.append(parse_field(option))
+    if len(union_types) == 1:
+        return union_types[0]
+    elif len(union_types) == 2 and type(None) in union_types:
+        # Optional type: T | None
+        non_none_type = next(t for t in union_types if t is not type(None))
+        return non_none_type | None  # type: ignore[return-value]
+    else:
+        from typing import Union
+        return Union[tuple(union_types)]  # type: ignore[return-value]
+def _resolve_basic_type(type_name: str, field_def: dict[str, Any]) -> type:
+    """Resolve basic JSON schema types to Python types."""
+    type_mapping = {
+        "string": str,
+        "integer": int,
+        "number": float,
+        "boolean": bool,
+        "null": type(None),
+    }
+    if type_name in type_mapping:
+        return type_mapping[type_name]  # type: ignore[return-value]
+    elif type_name == "object":
+        # Check if it's a nested model or generic dict
+        if "properties" in field_def:
+            return deserialize_base_model(field_def)
+        else:
+            return dict
+    elif type_name == "array":
+        if "items" in field_def:
+            inner_type = parse_field(field_def["items"])
+            return list[inner_type]
+        else:
+            return list[Any]
+    else:
+        raise ValueError(f"Unsupported type: {type_name}")
+def parse_field(field_def: dict[str, Any]) -> type:
+    """Parse a JSON schema field definition to a Python type.
+    Simplified version with clear separation of concerns.
+    """
+    # Handle union types
+    if "anyOf" in field_def:
+        return _resolve_union_type(field_def["anyOf"])
+    if "oneOf" in field_def:
+        return _resolve_union_type(field_def["oneOf"])
+    # Handle basic types
+    if "type" not in field_def:
+        return Any  # type: ignore[return-value]
+    return _resolve_basic_type(field_def["type"], field_def)
+# ============================================================================
+# Field Information Creation - Centralized logic
+# ============================================================================
+def _create_field_info(description: str | None, default_value: Any, is_required: bool) -> Field:  # type: ignore[type-arg]
+    """Create Field info with consistent logic."""
+    if is_required and default_value is None:
+        # Required field without default
+        return Field(description=description) if description else Field()
+    else:
+        # Optional field or field with default
+        return Field(default=default_value, description=description) if description else Field(default=default_value)
+def _make_optional_if_needed(field_type: type, is_required: bool, has_default: bool) -> type:
+    """Make field type optional if needed."""
+    if is_required or has_default:
+        return field_type
+    # Check if already nullable
+    from typing import Union
+    if hasattr(field_type, "__origin__") and field_type.__origin__ is Union and type(None) in field_type.__args__:
+        return field_type
+    # Make optional
+    return field_type | None  # type: ignore[return-value]
+# ============================================================================
+# Field Processing - Separated enum and regular field logic
+# ============================================================================
+def _process_enum_field(field_name: str, field_def: dict[str, Any], is_required: bool) -> tuple[type, Field]:  # type: ignore[type-arg]
+    """Process enum field with Literal type."""
+    enum_values = field_def["enum"]
+    # Create Literal type
+    if len(enum_values) == 1:
+        literal_type = Literal[enum_values[0]]
+    else:
+        literal_type = Literal[tuple(enum_values)]
+    # Handle optionality
+    description = field_def.get("description")
+    default_value = field_def.get("default")
+    has_default = default_value is not None
+    if not is_required and not has_default:
+        literal_type = literal_type | None  # type: ignore[assignment]
+        default_value = None
+    field_info = _create_field_info(description, default_value, is_required)
+    return literal_type, field_info  # type: ignore[return-value]
+def _process_regular_field(field_name: str, field_def: dict[str, Any], is_required: bool) -> tuple[type, Field]:  # type: ignore[type-arg]
+    """Process regular (non-enum) field."""
+    field_type = parse_field(field_def)
+    description = field_def.get("description")
+    default_value = field_def.get("default")
+    has_default = default_value is not None
+    # Handle optionality
+    field_type = _make_optional_if_needed(field_type, is_required, has_default)
+    if not is_required and not has_default:
+        default_value = None
+    field_info = _create_field_info(description, default_value, is_required)
+    return field_type, field_info
+# ============================================================================
+# Main Schema Processing - Clean and focused
+# ============================================================================
+def deserialize_base_model(json_schema: dict[str, Any]) -> type[BaseModel]:
+    """Deserialize a JSON schema to a Pydantic BaseModel class.
+    Refactored version with clear separation of concerns and simplified logic.
+    """
+    # Basic setup
+    title = json_schema.get("title", "DynamicModel")
+    dereferenced_schema = dereference_json_schema(json_schema)
+    properties = dereferenced_schema.get("properties", {})
+    required_fields = set(dereferenced_schema.get("required", []))
+    # Process each field
+    fields = {}
+    for field_name, field_def in properties.items():
+        is_required = field_name in required_fields
+        if "enum" in field_def:
+            field_type, field_info = _process_enum_field(field_name, field_def, is_required)
+        else:
+            field_type, field_info = _process_regular_field(field_name, field_def, is_required)
+        fields[field_name] = (field_type, field_info)
+    return create_model(title, **fields)

openaivec/{util.py → _util.py} RENAMED Viewed

@@ -2,12 +2,14 @@ import asyncio
 import functools
 import re
 import time
+from collections.abc import Awaitable, Callable
 from dataclasses import dataclass
-from typing import Awaitable, Callable, Dict, List, TypeVar
+from typing import TypeVar
 import numpy as np
 import tiktoken
+__all__ = []
 T = TypeVar("T")
 U = TypeVar("U")
@@ -34,24 +36,28 @@ def get_exponential_with_cutoff(scale: float) -> float:
             return v
-def backoff(exception: type[Exception], scale: int | None = None, max_retries: int | None = None) -> Callable[..., V]:
+def backoff(
+    exceptions: list[type[Exception]],
+    scale: int | None = None,
+    max_retries: int | None = None,
+) -> Callable[..., V]:
     """Decorator implementing exponential back‑off retry logic.
     Args:
-        exception (type[Exception]): Exception type that triggers a retry.
+        exceptions (list[type[Exception]]): List of exception types that trigger a retry.
         scale (int | None): Initial scale parameter for the exponential jitter.
             This scale is used as the mean for the first delay's exponential
             distribution and doubles with each subsequent retry. If ``None``,
             an initial scale of 1.0 is used.
-        max_retries (Optional[int]): Maximum number of retries. ``None`` means
+        max_retries (int | None): Maximum number of retries. ``None`` means
             retry indefinitely.
     Returns:
         Callable[..., V]: A decorated function that retries on the specified
-            exception with exponential back‑off.
+            exceptions with exponential back‑off.
     Raises:
-        exception: Re‑raised when the maximum number of retries is exceeded.
+        Exception: Re‑raised when the maximum number of retries is exceeded.
     """
     def decorator(func: Callable[..., V]) -> Callable[..., V]:
@@ -65,7 +71,7 @@ def backoff(exception: type[Exception], scale: int | None = None, max_retries: i
             while True:
                 try:
                     return func(*args, **kwargs)
-                except exception:
+                except tuple(exceptions):
                     attempt += 1
                     if max_retries is not None and attempt >= max_retries:
                         raise
@@ -79,16 +85,18 @@ def backoff(exception: type[Exception], scale: int | None = None, max_retries: i
         return wrapper
-    return decorator
+    return decorator  # type: ignore[return-value]
 def backoff_async(
-    exception: type[Exception], scale: int | None = None, max_retries: int | None = None
+    exceptions: list[type[Exception]],
+    scale: int | None = None,
+    max_retries: int | None = None,
 ) -> Callable[..., Awaitable[V]]:
     """Asynchronous version of the backoff decorator.
     Args:
-        exception (type[Exception]): Exception type that triggers a retry.
+        exceptions (list[type[Exception]]): List of exception types that trigger a retry.
         scale (int | None): Initial scale parameter for the exponential jitter.
             This scale is used as the mean for the first delay's exponential
             distribution and doubles with each subsequent retry. If ``None``,
@@ -98,10 +106,10 @@ def backoff_async(
     Returns:
         Callable[..., Awaitable[V]]: A decorated asynchronous function that
-            retries on the specified exception with exponential back‑off.
+            retries on the specified exceptions with exponential back‑off.
     Raises:
-        exception: Re‑raised when the maximum number of retries is exceeded.
+        Exception: Re‑raised when the maximum number of retries is exceeded.
     """
     def decorator(func: Callable[..., Awaitable[V]]) -> Callable[..., Awaitable[V]]:
@@ -115,7 +123,7 @@ def backoff_async(
             while True:
                 try:
                     return await func(*args, **kwargs)
-                except exception:
+                except tuple(exceptions):
                     attempt += 1
                     if max_retries is not None and attempt >= max_retries:
                         raise
@@ -129,7 +137,7 @@ def backoff_async(
         return wrapper
-    return decorator
+    return decorator  # type: ignore[return-value]
 @dataclass(frozen=True)
@@ -138,7 +146,7 @@ class TextChunker:
     enc: tiktoken.Encoding
-    def split(self, original: str, max_tokens: int, sep: List[str]) -> List[str]:
+    def split(self, original: str, max_tokens: int, sep: list[str]) -> list[str]:
         """Token‑aware sentence segmentation.
         The text is first split by the given separators, then greedily packed
@@ -147,11 +155,11 @@ class TextChunker:
         Args:
             original (str): Original text to split.
             max_tokens (int): Maximum number of tokens allowed per chunk.
-            sep (List[str]): List of separator patterns used by
+            sep (list[str]): List of separator patterns used by
                 :pyfunc:`re.split`.
         Returns:
-            List[str]: List of text chunks respecting the ``max_tokens`` limit.
+            list[str]: List of text chunks respecting the ``max_tokens`` limit.
         """
         sentences = re.split(f"({'|'.join(sep)})", original)
         sentences = [s.strip() for s in sentences if s.strip()]
@@ -174,71 +182,3 @@ class TextChunker:
             chunks.append(sentence)
         return chunks
-async def map_async(inputs: List[T], f: Callable[[List[T]], Awaitable[List[U]]], batch_size: int = 128) -> List[U]:
-    """Asynchronously map a function `f` over a list of inputs in batches.
-    This function divides the input list into smaller batches and applies the
-    asynchronous function `f` to each batch concurrently. It gathers the results
-    and returns them in the same order as the original inputs.
-    Args:
-        inputs (List[T]): List of inputs to be processed.
-        f (Callable[[List[T]], Awaitable[List[U]]]): Asynchronous function to apply.
-            It takes a batch of inputs (List[T]) and must return a list of
-            corresponding outputs (List[U]) of the same size.
-        batch_size (int): Size of each batch for processing.
-    Returns:
-        List[U]: List of outputs corresponding to the original inputs, in order.
-    """
-    original_hashes: List[int] = [hash(str(v)) for v in inputs]  # Use str(v) for hash if T is not hashable
-    hash_inputs: Dict[int, T] = {k: v for k, v in zip(original_hashes, inputs)}
-    unique_hashes: List[int] = list(hash_inputs.keys())
-    unique_inputs: List[T] = list(hash_inputs.values())
-    input_batches: List[List[T]] = [unique_inputs[i : i + batch_size] for i in range(0, len(unique_inputs), batch_size)]
-    # Ensure f is awaited correctly within gather
-    tasks = [f(batch) for batch in input_batches]
-    output_batches: List[List[U]] = await asyncio.gather(*tasks)
-    unique_outputs: List[U] = [u for batch in output_batches for u in batch]
-    if len(unique_hashes) != len(unique_outputs):
-        raise ValueError(
-            f"Number of unique inputs ({len(unique_hashes)}) does not match number of unique outputs ({len(unique_outputs)}). Check the function f."
-        )
-    hash_outputs: Dict[int, U] = {k: v for k, v in zip(unique_hashes, unique_outputs)}
-    outputs: List[U] = [hash_outputs[k] for k in original_hashes]
-    return outputs
-def map(inputs: List[T], f: Callable[[List[T]], List[U]], batch_size: int = 128) -> List[U]:
-    """Map a function `f` over a list of inputs in batches.
-    This function divides the input list into smaller batches and applies the
-    function `f` to each batch. It gathers the results and returns them in the
-    same order as the original inputs.
-    Args:
-        inputs (List[T]): List of inputs to be processed.
-        f (Callable[[List[T]], List[U]]): Function to apply. It takes a batch of
-            inputs (List[T]) and must return a list of corresponding outputs
-            (List[U]) of the same size.
-        batch_size (int): Size of each batch for processing.
-    Returns:
-        List[U]: List of outputs corresponding to the original inputs, in order.
-    """
-    original_hashes: List[int] = [hash(str(v)) for v in inputs]  # Use str(v) for hash if T is not hashable
-    hash_inputs: Dict[int, T] = {k: v for k, v in zip(original_hashes, inputs)}
-    unique_hashes: List[int] = list(hash_inputs.keys())
-    unique_inputs: List[T] = list(hash_inputs.values())
-    input_batches: List[List[T]] = [unique_inputs[i : i + batch_size] for i in range(0, len(unique_inputs), batch_size)]
-    output_batches: List[List[U]] = [f(batch) for batch in input_batches]
-    unique_outputs: List[U] = [u for batch in output_batches for u in batch]
-    if len(unique_hashes) != len(unique_outputs):
-        raise ValueError(
-            f"Number of unique inputs ({len(unique_hashes)}) does not match number of unique outputs ({len(unique_outputs)}). Check the function f."
-        )
-    hash_outputs: Dict[int, U] = {k: v for k, v in zip(unique_hashes, unique_outputs)}
-    outputs: List[U] = [hash_outputs[k] for k in original_hashes]
-    return outputs

openaivec 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl

openaivec 0.12.5py3-none-any.whl → 1.0.10py3-none-any.whl