PyPI - openai-sdk-helpers - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

openai-sdk-helpers 0.4.3py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

openai_sdk_helpers/__init__.py +41 -7
openai_sdk_helpers/agent/__init__.py +1 -2
openai_sdk_helpers/agent/base.py +169 -190
openai_sdk_helpers/agent/configuration.py +12 -20
openai_sdk_helpers/agent/coordinator.py +14 -17
openai_sdk_helpers/agent/runner.py +3 -45
openai_sdk_helpers/agent/search/base.py +49 -71
openai_sdk_helpers/agent/search/vector.py +82 -110
openai_sdk_helpers/agent/search/web.py +103 -81
openai_sdk_helpers/agent/summarizer.py +20 -28
openai_sdk_helpers/agent/translator.py +17 -23
openai_sdk_helpers/agent/validator.py +17 -23
openai_sdk_helpers/errors.py +9 -0
openai_sdk_helpers/extract/__init__.py +23 -0
openai_sdk_helpers/extract/extractor.py +157 -0
openai_sdk_helpers/extract/generator.py +476 -0
openai_sdk_helpers/files_api.py +1 -0
openai_sdk_helpers/logging.py +12 -1
openai_sdk_helpers/prompt/extractor_config_agent_instructions.jinja +6 -0
openai_sdk_helpers/prompt/extractor_config_generator.jinja +37 -0
openai_sdk_helpers/prompt/extractor_config_generator_instructions.jinja +9 -0
openai_sdk_helpers/prompt/extractor_prompt_optimizer_agent_instructions.jinja +4 -0
openai_sdk_helpers/prompt/extractor_prompt_optimizer_request.jinja +11 -0
openai_sdk_helpers/response/__init__.py +2 -6
openai_sdk_helpers/response/base.py +233 -164
openai_sdk_helpers/response/configuration.py +39 -14
openai_sdk_helpers/response/files.py +41 -2
openai_sdk_helpers/response/runner.py +1 -48
openai_sdk_helpers/response/tool_call.py +0 -141
openai_sdk_helpers/response/vector_store.py +8 -5
openai_sdk_helpers/streamlit_app/app.py +1 -9
openai_sdk_helpers/structure/__init__.py +16 -0
openai_sdk_helpers/structure/base.py +239 -278
openai_sdk_helpers/structure/extraction.py +1228 -0
openai_sdk_helpers/structure/plan/plan.py +0 -20
openai_sdk_helpers/structure/plan/task.py +0 -33
openai_sdk_helpers/structure/prompt.py +16 -0
openai_sdk_helpers/structure/responses.py +2 -2
openai_sdk_helpers/structure/web_search.py +0 -10
openai_sdk_helpers/tools.py +346 -99
openai_sdk_helpers/utils/__init__.py +7 -0
openai_sdk_helpers/utils/json/base_model.py +315 -32
openai_sdk_helpers/utils/langextract.py +194 -0
openai_sdk_helpers/vector_storage/cleanup.py +7 -2
openai_sdk_helpers/vector_storage/storage.py +37 -7
{openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/METADATA +21 -6
openai_sdk_helpers-0.5.1.dist-info/RECORD +95 -0
openai_sdk_helpers/streamlit_app/streamlit_web_search.py +0 -75
openai_sdk_helpers-0.4.3.dist-info/RECORD +0 -86
{openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/WHEEL +0 -0
{openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/entry_points.txt +0 -0
{openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/licenses/LICENSE +0 -0

openai_sdk_helpers/response/configuration.py CHANGED Viewed

@@ -8,7 +8,8 @@ from typing import Generic, Optional, Sequence, Type, TypeVar
 from ..settings import OpenAISettings
 from ..structure.base import StructureBase
-from .base import ResponseBase, ToolHandler
+from .base import ResponseBase
+from ..tools import ToolHandlerRegistration
 from ..utils.json.data_class import DataclassJSONSerializable
 from ..utils.registry import RegistryBase
 from ..utils.instructions import resolve_instructions_from_path
@@ -23,6 +24,21 @@ class ResponseRegistry(RegistryBase["ResponseConfiguration"]):
     Inherits from RegistryBase to provide centralized storage and retrieval
     of response configurations, enabling reusable response specs across the application.
+    Methods
+    -------
+    register(configuration)
+        Add a configuration to the registry.
+    get(name)
+        Retrieve a configuration by name.
+    list_names()
+        Return all registered configuration names.
+    clear()
+        Remove all registered configurations.
+    save_to_directory(path)
+        Export all registered configurations to JSON files.
+    load_from_directory(path, config_class)
+        Load configurations from JSON files in a directory.
     Examples
     --------
     >>> registry = ResponseRegistry()
@@ -61,12 +77,11 @@ def get_default_registry() -> ResponseRegistry:
 @dataclass(frozen=True, slots=True)
 class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
-    """
-    Represent an immutable configuration describing input and output structures.
+    """Represent an immutable configuration describing input and output structures.
     Encapsulate all metadata required to define how a request is interpreted and
     how a response is structured, while enforcing strict type and runtime safety.
-    Inherits from DataclassJSONSerializable to support serialization to JSON format.
+    Inherit from DataclassJSONSerializable to support serialization to JSON format.
     Parameters
     ----------
@@ -87,9 +102,11 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
     system_vector_store : list[str], optional
         Optional list of vector store names to attach as system context.
         Default is None.
-    data_path : Path, str, or None, optional
-        Optional absolute directory path for storing artifacts. If not provided,
-        defaults to get_data_path(class_name). Default is None.
+    add_output_instructions : bool, optional
+        Whether to append output structure instructions to the prompt.
+        Default is False.
+    add_web_search_tool : bool, optional
+        Whether to append a web_search tool to the tool list. Default is False.
     Raises
     ------
@@ -108,6 +125,12 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
     -------
     __post_init__()
         Validate configuration invariants and enforce StructureBase subclassing.
+    get_resolved_instructions()
+        Return instructions with optional output structure guidance appended.
+    get_resolved_tools()
+        Return tools list with optional web_search tool appended.
+    gen_response(openai_settings, data_path=None, tool_handlers=None)
+        Build a ResponseBase instance from this configuration.
     to_json()
         Return a JSON-compatible dict representation (inherited from JSONSerializable).
     to_json_file(filepath)
@@ -119,7 +142,7 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
     Examples
     --------
-    >>> configuration = Configuration(
+    >>> configuration = ResponseConfiguration(
     ...     name="targeting_to_plan",
     ...     tools=None,
     ...     input_structure=PromptStructure,
@@ -139,8 +162,7 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
     add_web_search_tool: bool = False
     def __post_init__(self) -> None:
-        """
-        Validate configuration invariants after initialization.
+        """Validate configuration invariants after initialization.
         Enforce non-empty naming, correct typing of structures, and ensure that
         any declared structure subclasses StructureBase.
@@ -222,7 +244,7 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
         *,
         openai_settings: OpenAISettings,
         data_path: Optional[Path] = None,
-        tool_handlers: dict[str, ToolHandler] | None = None,
+        tool_handlers: dict[str, ToolHandlerRegistration] | None = None,
     ) -> ResponseBase[TOut]:
         """Generate a ResponseBase instance based on the configuration.
@@ -230,9 +252,12 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
         ----------
         openai_settings : OpenAISettings
             Authentication and model settings applied to the generated
-            :class:`ResponseBase`.
-        tool_handlers : dict[str, Callable], optional
-            Mapping of tool names to handler callables. Defaults to an empty
+            ResponseBase.
+        data_path : Path or None, default None
+            Optional override for the response artifact directory.
+        tool_handlers : dict[str, ToolHandlerRegistration], optional
+            Mapping of tool names to handler registrations. Registrations can include
+            ToolSpec metadata to parse tool outputs by name. Defaults to an empty
             dictionary when not provided.
         Returns

openai_sdk_helpers/response/files.py CHANGED Viewed

@@ -66,6 +66,12 @@ def process_files(
         2. Base64-encoded file content (ResponseInputFileContentParam)
         3. Base64-encoded image content (ResponseInputImageContentParam)
+    Notes
+    -----
+    Inline ``input_file`` attachments only support PDF documents. For other
+    document formats, use ``use_vector_store=True`` or convert to PDF before
+    calling this helper.
     Examples
     --------
     >>> from openai_sdk_helpers.response import process_files
@@ -93,6 +99,9 @@ def process_files(
         else:
             document_files.append(file_path)
+    if document_files and not use_vector_store:
+        _validate_inline_document_files(document_files)
     # Handle document files (vector store or base64)
     vector_file_refs: list[ResponseInputFileParam] = []
     base64_files: list[ResponseInputFileContentParam] = []
@@ -113,6 +122,34 @@ def process_files(
     return vector_file_refs, base64_files, image_contents
+def _validate_inline_document_files(document_files: list[str]) -> None:
+    """Validate document files for inline ``input_file`` usage.
+    Parameters
+    ----------
+    document_files : list[str]
+        Document file paths that will be sent as inline ``input_file``
+        attachments.
+    Raises
+    ------
+    ValueError
+        If any document file is not a PDF.
+    """
+    unsupported_files = [
+        file_path
+        for file_path in document_files
+        if Path(file_path).suffix.lower() != ".pdf"
+    ]
+    if unsupported_files:
+        filenames = ", ".join(Path(path).name for path in unsupported_files)
+        raise ValueError(
+            "Inline input_file attachments support PDFs only. "
+            f"Unsupported files: {filenames}. "
+            "Convert to PDF or set use_vector_store=True."
+        )
 def _upload_to_vector_store(
     response: ResponseBase[Any], document_files: list[str]
 ) -> list[ResponseInputFileParam]:
@@ -151,6 +188,8 @@ def _upload_to_vector_store(
             model=response._model,
         )
         user_vector_storage = cast(Any, response._user_vector_storage)
+        if response._tools is None:
+            response._tools = []
         if not any(tool.get("type") == "file_search" for tool in response._tools):
             response._tools.append(
                 {
@@ -281,7 +320,7 @@ def _encode_documents_base64_batch(
                     base64_files.append(result)
                 except Exception as exc:
                     file_path = future_to_file[future]
-                    log(f"Error encoding document {file_path}: {exc}")
+                    log(f"Error encoding document {file_path}: {exc}", exc=exc)
                     raise
     return base64_files
@@ -383,7 +422,7 @@ def _encode_images_base64_batch(
                     image_contents.append(result)
                 except Exception as exc:
                     image_path = future_to_file[future]
-                    log(f"Error encoding image {image_path}: {exc}")
+                    log(f"Error encoding image {image_path}: {exc}", exc=exc)
                     raise
     return image_contents

openai_sdk_helpers/response/runner.py CHANGED Viewed

@@ -7,12 +7,10 @@ They simplify common usage patterns for both synchronous and asynchronous contex
 from __future__ import annotations
-import asyncio
 from typing import Any, TypeVar
 from .base import ResponseBase
 R = TypeVar("R", bound=ResponseBase[Any])
@@ -100,49 +98,4 @@ async def run_async(
         response.close()
-def run_streamed(
-    response_cls: type[R],
-    *,
-    content: str,
-    response_kwargs: dict[str, Any] | None = None,
-) -> Any:
-    """Execute a response workflow and return the awaited result.
-    Provides API compatibility with agent interfaces. Streaming responses
-    are not currently fully supported, so this executes run_async and
-    awaits the result.
-    Parameters
-    ----------
-    response_cls : type[ResponseBase]
-        Response class to instantiate for the workflow.
-    content : str
-        Prompt text to send to the OpenAI API.
-    response_kwargs : dict[str, Any] or None, default None
-        Optional keyword arguments forwarded to response_cls constructor.
-    Returns
-    -------
-    Any
-        Parsed response from run_async, typically a structured output or None.
-    Notes
-    -----
-    This function exists for API consistency but does not currently provide
-    true streaming functionality.
-    Examples
-    --------
-    >>> from openai_sdk_helpers.response import run_streamed
-    >>> result = run_streamed(
-    ...     MyResponse,
-    ...     content="Process this text",
-    ...     response_kwargs={"openai_settings": settings}
-    ... )
-    """
-    return asyncio.run(
-        run_async(response_cls, content=content, response_kwargs=response_kwargs)
-    )
-__all__ = ["run_sync", "run_async", "run_streamed"]
+__all__ = ["run_sync", "run_async"]

openai_sdk_helpers/response/tool_call.py CHANGED Viewed

@@ -7,9 +7,6 @@ and robust argument parsing.
 from __future__ import annotations
-import ast
-import json
-import re
 from dataclasses import dataclass
 from openai.types.responses.response_function_tool_call_param import (
@@ -94,141 +91,3 @@ class ResponseToolCall(DataclassJSONSerializable):
             },
         )
         return function_call, function_call_output
-def _to_snake_case(name: str) -> str:
-    """Convert a PascalCase or camelCase string to snake_case.
-    Parameters
-    ----------
-    name : str
-        The name to convert.
-    Returns
-    -------
-    str
-        The snake_case version of the name.
-    Examples
-    --------
-    >>> _to_snake_case("ExampleStructure")
-    'example_structure'
-    >>> _to_snake_case("MyToolName")
-    'my_tool_name'
-    """
-    # First regex: Insert underscore before uppercase letters followed by
-    # lowercase letters (e.g., "Tool" in "ExampleTool" becomes "_Tool")
-    s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
-    # Second regex: Insert underscore between lowercase/digit and uppercase
-    # (e.g., "e3" followed by "T" becomes "e3_T")
-    return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
-def _unwrap_arguments(parsed: dict, tool_name: str) -> dict:
-    """Unwrap arguments if wrapped in a single-key dict.
-    Some responses wrap arguments under a key matching the structure class
-    name (e.g., {"ExampleStructure": {...}}) or snake_case variant
-    (e.g., {"example_structure": {...}}). This function detects and unwraps
-    such wrappers to normalize the payload.
-    Parameters
-    ----------
-    parsed : dict
-        The parsed arguments dictionary.
-    tool_name : str
-        The tool name, used to match potential wrapper keys.
-    Returns
-    -------
-    dict
-        Unwrapped arguments dictionary, or original if no wrapper detected.
-    Examples
-    --------
-    >>> _unwrap_arguments({"ExampleTool": {"arg": "value"}}, "ExampleTool")
-    {'arg': 'value'}
-    >>> _unwrap_arguments({"example_tool": {"arg": "value"}}, "ExampleTool")
-    {'arg': 'value'}
-    >>> _unwrap_arguments({"arg": "value"}, "ExampleTool")
-    {'arg': 'value'}
-    """
-    # Only unwrap if dict has exactly one key
-    if not isinstance(parsed, dict) or len(parsed) != 1:
-        return parsed
-    wrapper_key = next(iter(parsed))
-    wrapped_value = parsed[wrapper_key]
-    # Only unwrap if the value is also a dict
-    if not isinstance(wrapped_value, dict):
-        return parsed
-    # Check if wrapper key matches tool name (case-insensitive or snake_case)
-    tool_name_lower = tool_name.lower()
-    tool_name_snake = _to_snake_case(tool_name)
-    wrapper_key_lower = wrapper_key.lower()
-    if wrapper_key_lower in (tool_name_lower, tool_name_snake):
-        return wrapped_value
-    return parsed
-def parse_tool_arguments(arguments: str, tool_name: str) -> dict:
-    """Parse tool call arguments with fallback for malformed JSON.
-    Attempts to parse arguments as JSON first, then falls back to
-    ast.literal_eval for cases where the OpenAI API returns minor
-    formatting issues like single quotes instead of double quotes.
-    Provides clear error context including tool name and raw payload.
-    Also handles unwrapping of arguments that are wrapped in a single-key
-    dictionary matching the tool name (e.g., {"ExampleStructure": {...}}).
-    Parameters
-    ----------
-    arguments : str
-        Raw argument string from a tool call, expected to be JSON.
-    tool_name : str
-        Tool name for improved error context (required).
-    Returns
-    -------
-    dict
-        Parsed dictionary of tool arguments, with wrapper unwrapped if present.
-    Raises
-    ------
-    ValueError
-        If the arguments cannot be parsed as valid JSON or Python literal.
-        Error message includes tool name and payload excerpt for debugging.
-    Examples
-    --------
-    >>> parse_tool_arguments('{"key": "value"}', tool_name="search")
-    {'key': 'value'}
-    >>> parse_tool_arguments("{'key': 'value'}", tool_name="search")
-    {'key': 'value'}
-    >>> parse_tool_arguments('{"ExampleTool": {"arg": "value"}}', "ExampleTool")
-    {'arg': 'value'}
-    """
-    try:
-        parsed = json.loads(arguments)
-    except json.JSONDecodeError:
-        try:
-            parsed = ast.literal_eval(arguments)
-        except Exception as exc:  # noqa: BLE001
-            # Build informative error message with context
-            payload_preview = (
-                arguments[:100] + "..." if len(arguments) > 100 else arguments
-            )
-            raise ValueError(
-                f"Failed to parse tool arguments for tool '{tool_name}'. "
-                f"Raw payload: {payload_preview}"
-            ) from exc
-    # Unwrap if wrapped in a single-key dict matching tool name
-    return _unwrap_arguments(parsed, tool_name)

openai_sdk_helpers/response/vector_store.py CHANGED Viewed

@@ -73,13 +73,16 @@ def attach_vector_store(
             raise ValueError(f"Vector store '{store}' not found.")
         if match not in resolved_ids:
             resolved_ids.append(match)
-    file_search_tool = next(
-        (tool for tool in response._tools if tool.get("type") == "file_search"),
-        None,
-    )
+    file_search_tool = None
+    if response._tools is not None:
+        file_search_tool = next(
+            (tool for tool in response._tools if tool.get("type") == "file_search"),
+            None,
+        )
     if file_search_tool is None:
+        if response._tools is None:
+            response._tools = []
         response._tools.append(
             {"type": "file_search", "vector_store_ids": resolved_ids}
         )

openai_sdk_helpers/streamlit_app/app.py CHANGED Viewed

@@ -32,20 +32,12 @@ from openai_sdk_helpers.utils import (
 # Supported file extensions for OpenAI Assistants file search and vision
 SUPPORTED_FILE_EXTENSIONS = (
-    ".csv",
-    ".docx",
     ".gif",
-    ".html",
-    ".json",
     ".jpeg",
     ".jpg",
-    ".md",
     ".pdf",
     ".png",
-    ".pptx",
-    ".txt",
     ".webp",
-    ".xlsx",
 )
@@ -177,7 +169,7 @@ def _render_summary(result: Any, response: ResponseBase[Any]) -> str:
     the result cannot be formatted directly.
     """
     if isinstance(result, StructureBase):
-        return result.print()
+        return str(result)
     if isinstance(result, str):
         return result
     if isinstance(result, dict):

openai_sdk_helpers/structure/__init__.py CHANGED Viewed

@@ -53,6 +53,10 @@ VectorSearchReportStructure
     Complete vector search report.
 ValidationResultStructure
     Validation results with pass/fail status.
+ExtractionItem
+    Extracted item with source span data.
+ExtractionResult
+    Structured extraction results for a document.
 Functions
 ---------
@@ -72,6 +76,13 @@ from __future__ import annotations
 from .agent_blueprint import AgentBlueprint
 from .base import *
+from .extraction import (
+    AnnotatedDocumentStructure,
+    AttributeStructure,
+    DocumentStructure,
+    ExampleDataStructure,
+    ExtractionStructure,
+)
 from .plan import *
 from .prompt import PromptStructure
 from .responses import *
@@ -109,6 +120,11 @@ __all__ = [
     "VectorSearchPlanStructure",
     "VectorSearchStructure",
     "ValidationResultStructure",
+    "AnnotatedDocumentStructure",
+    "AttributeStructure",
+    "DocumentStructure",
+    "ExampleDataStructure",
+    "ExtractionStructure",
     "assistant_tool_definition",
     "assistant_format",
     "response_tool_definition",

openai-sdk-helpers 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

openai-sdk-helpers 0.4.3py3-none-any.whl → 0.5.1py3-none-any.whl