openai-sdk-helpers 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. openai_sdk_helpers/__init__.py +41 -7
  2. openai_sdk_helpers/agent/__init__.py +1 -2
  3. openai_sdk_helpers/agent/base.py +169 -190
  4. openai_sdk_helpers/agent/configuration.py +12 -20
  5. openai_sdk_helpers/agent/coordinator.py +14 -17
  6. openai_sdk_helpers/agent/runner.py +3 -45
  7. openai_sdk_helpers/agent/search/base.py +49 -71
  8. openai_sdk_helpers/agent/search/vector.py +82 -110
  9. openai_sdk_helpers/agent/search/web.py +103 -81
  10. openai_sdk_helpers/agent/summarizer.py +20 -28
  11. openai_sdk_helpers/agent/translator.py +17 -23
  12. openai_sdk_helpers/agent/validator.py +17 -23
  13. openai_sdk_helpers/errors.py +9 -0
  14. openai_sdk_helpers/extract/__init__.py +23 -0
  15. openai_sdk_helpers/extract/extractor.py +157 -0
  16. openai_sdk_helpers/extract/generator.py +476 -0
  17. openai_sdk_helpers/files_api.py +1 -0
  18. openai_sdk_helpers/logging.py +12 -1
  19. openai_sdk_helpers/prompt/extractor_config_agent_instructions.jinja +6 -0
  20. openai_sdk_helpers/prompt/extractor_config_generator.jinja +37 -0
  21. openai_sdk_helpers/prompt/extractor_config_generator_instructions.jinja +9 -0
  22. openai_sdk_helpers/prompt/extractor_prompt_optimizer_agent_instructions.jinja +4 -0
  23. openai_sdk_helpers/prompt/extractor_prompt_optimizer_request.jinja +11 -0
  24. openai_sdk_helpers/response/__init__.py +2 -6
  25. openai_sdk_helpers/response/base.py +233 -164
  26. openai_sdk_helpers/response/configuration.py +39 -14
  27. openai_sdk_helpers/response/files.py +41 -2
  28. openai_sdk_helpers/response/runner.py +1 -48
  29. openai_sdk_helpers/response/tool_call.py +0 -141
  30. openai_sdk_helpers/response/vector_store.py +8 -5
  31. openai_sdk_helpers/streamlit_app/app.py +1 -9
  32. openai_sdk_helpers/structure/__init__.py +16 -0
  33. openai_sdk_helpers/structure/base.py +239 -278
  34. openai_sdk_helpers/structure/extraction.py +1228 -0
  35. openai_sdk_helpers/structure/plan/plan.py +0 -20
  36. openai_sdk_helpers/structure/plan/task.py +0 -33
  37. openai_sdk_helpers/structure/prompt.py +16 -0
  38. openai_sdk_helpers/structure/responses.py +2 -2
  39. openai_sdk_helpers/structure/web_search.py +0 -10
  40. openai_sdk_helpers/tools.py +346 -99
  41. openai_sdk_helpers/utils/__init__.py +7 -0
  42. openai_sdk_helpers/utils/json/base_model.py +315 -32
  43. openai_sdk_helpers/utils/langextract.py +194 -0
  44. openai_sdk_helpers/vector_storage/cleanup.py +7 -2
  45. openai_sdk_helpers/vector_storage/storage.py +37 -7
  46. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/METADATA +21 -6
  47. openai_sdk_helpers-0.5.1.dist-info/RECORD +95 -0
  48. openai_sdk_helpers/streamlit_app/streamlit_web_search.py +0 -75
  49. openai_sdk_helpers-0.4.3.dist-info/RECORD +0 -86
  50. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/WHEEL +0 -0
  51. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/entry_points.txt +0 -0
  52. {openai_sdk_helpers-0.4.3.dist-info → openai_sdk_helpers-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -8,7 +8,8 @@ from typing import Generic, Optional, Sequence, Type, TypeVar
8
8
 
9
9
  from ..settings import OpenAISettings
10
10
  from ..structure.base import StructureBase
11
- from .base import ResponseBase, ToolHandler
11
+ from .base import ResponseBase
12
+ from ..tools import ToolHandlerRegistration
12
13
  from ..utils.json.data_class import DataclassJSONSerializable
13
14
  from ..utils.registry import RegistryBase
14
15
  from ..utils.instructions import resolve_instructions_from_path
@@ -23,6 +24,21 @@ class ResponseRegistry(RegistryBase["ResponseConfiguration"]):
23
24
  Inherits from RegistryBase to provide centralized storage and retrieval
24
25
  of response configurations, enabling reusable response specs across the application.
25
26
 
27
+ Methods
28
+ -------
29
+ register(configuration)
30
+ Add a configuration to the registry.
31
+ get(name)
32
+ Retrieve a configuration by name.
33
+ list_names()
34
+ Return all registered configuration names.
35
+ clear()
36
+ Remove all registered configurations.
37
+ save_to_directory(path)
38
+ Export all registered configurations to JSON files.
39
+ load_from_directory(path, config_class)
40
+ Load configurations from JSON files in a directory.
41
+
26
42
  Examples
27
43
  --------
28
44
  >>> registry = ResponseRegistry()
@@ -61,12 +77,11 @@ def get_default_registry() -> ResponseRegistry:
61
77
 
62
78
  @dataclass(frozen=True, slots=True)
63
79
  class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
64
- """
65
- Represent an immutable configuration describing input and output structures.
80
+ """Represent an immutable configuration describing input and output structures.
66
81
 
67
82
  Encapsulate all metadata required to define how a request is interpreted and
68
83
  how a response is structured, while enforcing strict type and runtime safety.
69
- Inherits from DataclassJSONSerializable to support serialization to JSON format.
84
+ Inherit from DataclassJSONSerializable to support serialization to JSON format.
70
85
 
71
86
  Parameters
72
87
  ----------
@@ -87,9 +102,11 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
87
102
  system_vector_store : list[str], optional
88
103
  Optional list of vector store names to attach as system context.
89
104
  Default is None.
90
- data_path : Path, str, or None, optional
91
- Optional absolute directory path for storing artifacts. If not provided,
92
- defaults to get_data_path(class_name). Default is None.
105
+ add_output_instructions : bool, optional
106
+ Whether to append output structure instructions to the prompt.
107
+ Default is False.
108
+ add_web_search_tool : bool, optional
109
+ Whether to append a web_search tool to the tool list. Default is False.
93
110
 
94
111
  Raises
95
112
  ------
@@ -108,6 +125,12 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
108
125
  -------
109
126
  __post_init__()
110
127
  Validate configuration invariants and enforce StructureBase subclassing.
128
+ get_resolved_instructions()
129
+ Return instructions with optional output structure guidance appended.
130
+ get_resolved_tools()
131
+ Return tools list with optional web_search tool appended.
132
+ gen_response(openai_settings, data_path=None, tool_handlers=None)
133
+ Build a ResponseBase instance from this configuration.
111
134
  to_json()
112
135
  Return a JSON-compatible dict representation (inherited from JSONSerializable).
113
136
  to_json_file(filepath)
@@ -119,7 +142,7 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
119
142
 
120
143
  Examples
121
144
  --------
122
- >>> configuration = Configuration(
145
+ >>> configuration = ResponseConfiguration(
123
146
  ... name="targeting_to_plan",
124
147
  ... tools=None,
125
148
  ... input_structure=PromptStructure,
@@ -139,8 +162,7 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
139
162
  add_web_search_tool: bool = False
140
163
 
141
164
  def __post_init__(self) -> None:
142
- """
143
- Validate configuration invariants after initialization.
165
+ """Validate configuration invariants after initialization.
144
166
 
145
167
  Enforce non-empty naming, correct typing of structures, and ensure that
146
168
  any declared structure subclasses StructureBase.
@@ -222,7 +244,7 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
222
244
  *,
223
245
  openai_settings: OpenAISettings,
224
246
  data_path: Optional[Path] = None,
225
- tool_handlers: dict[str, ToolHandler] | None = None,
247
+ tool_handlers: dict[str, ToolHandlerRegistration] | None = None,
226
248
  ) -> ResponseBase[TOut]:
227
249
  """Generate a ResponseBase instance based on the configuration.
228
250
 
@@ -230,9 +252,12 @@ class ResponseConfiguration(DataclassJSONSerializable, Generic[TIn, TOut]):
230
252
  ----------
231
253
  openai_settings : OpenAISettings
232
254
  Authentication and model settings applied to the generated
233
- :class:`ResponseBase`.
234
- tool_handlers : dict[str, Callable], optional
235
- Mapping of tool names to handler callables. Defaults to an empty
255
+ ResponseBase.
256
+ data_path : Path or None, default None
257
+ Optional override for the response artifact directory.
258
+ tool_handlers : dict[str, ToolHandlerRegistration], optional
259
+ Mapping of tool names to handler registrations. Registrations can include
260
+ ToolSpec metadata to parse tool outputs by name. Defaults to an empty
236
261
  dictionary when not provided.
237
262
 
238
263
  Returns
@@ -66,6 +66,12 @@ def process_files(
66
66
  2. Base64-encoded file content (ResponseInputFileContentParam)
67
67
  3. Base64-encoded image content (ResponseInputImageContentParam)
68
68
 
69
+ Notes
70
+ -----
71
+ Inline ``input_file`` attachments only support PDF documents. For other
72
+ document formats, use ``use_vector_store=True`` or convert to PDF before
73
+ calling this helper.
74
+
69
75
  Examples
70
76
  --------
71
77
  >>> from openai_sdk_helpers.response import process_files
@@ -93,6 +99,9 @@ def process_files(
93
99
  else:
94
100
  document_files.append(file_path)
95
101
 
102
+ if document_files and not use_vector_store:
103
+ _validate_inline_document_files(document_files)
104
+
96
105
  # Handle document files (vector store or base64)
97
106
  vector_file_refs: list[ResponseInputFileParam] = []
98
107
  base64_files: list[ResponseInputFileContentParam] = []
@@ -113,6 +122,34 @@ def process_files(
113
122
  return vector_file_refs, base64_files, image_contents
114
123
 
115
124
 
125
+ def _validate_inline_document_files(document_files: list[str]) -> None:
126
+ """Validate document files for inline ``input_file`` usage.
127
+
128
+ Parameters
129
+ ----------
130
+ document_files : list[str]
131
+ Document file paths that will be sent as inline ``input_file``
132
+ attachments.
133
+
134
+ Raises
135
+ ------
136
+ ValueError
137
+ If any document file is not a PDF.
138
+ """
139
+ unsupported_files = [
140
+ file_path
141
+ for file_path in document_files
142
+ if Path(file_path).suffix.lower() != ".pdf"
143
+ ]
144
+ if unsupported_files:
145
+ filenames = ", ".join(Path(path).name for path in unsupported_files)
146
+ raise ValueError(
147
+ "Inline input_file attachments support PDFs only. "
148
+ f"Unsupported files: {filenames}. "
149
+ "Convert to PDF or set use_vector_store=True."
150
+ )
151
+
152
+
116
153
  def _upload_to_vector_store(
117
154
  response: ResponseBase[Any], document_files: list[str]
118
155
  ) -> list[ResponseInputFileParam]:
@@ -151,6 +188,8 @@ def _upload_to_vector_store(
151
188
  model=response._model,
152
189
  )
153
190
  user_vector_storage = cast(Any, response._user_vector_storage)
191
+ if response._tools is None:
192
+ response._tools = []
154
193
  if not any(tool.get("type") == "file_search" for tool in response._tools):
155
194
  response._tools.append(
156
195
  {
@@ -281,7 +320,7 @@ def _encode_documents_base64_batch(
281
320
  base64_files.append(result)
282
321
  except Exception as exc:
283
322
  file_path = future_to_file[future]
284
- log(f"Error encoding document {file_path}: {exc}")
323
+ log(f"Error encoding document {file_path}: {exc}", exc=exc)
285
324
  raise
286
325
 
287
326
  return base64_files
@@ -383,7 +422,7 @@ def _encode_images_base64_batch(
383
422
  image_contents.append(result)
384
423
  except Exception as exc:
385
424
  image_path = future_to_file[future]
386
- log(f"Error encoding image {image_path}: {exc}")
425
+ log(f"Error encoding image {image_path}: {exc}", exc=exc)
387
426
  raise
388
427
 
389
428
  return image_contents
@@ -7,12 +7,10 @@ They simplify common usage patterns for both synchronous and asynchronous contex
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- import asyncio
11
10
  from typing import Any, TypeVar
12
11
 
13
12
  from .base import ResponseBase
14
13
 
15
-
16
14
  R = TypeVar("R", bound=ResponseBase[Any])
17
15
 
18
16
 
@@ -100,49 +98,4 @@ async def run_async(
100
98
  response.close()
101
99
 
102
100
 
103
- def run_streamed(
104
- response_cls: type[R],
105
- *,
106
- content: str,
107
- response_kwargs: dict[str, Any] | None = None,
108
- ) -> Any:
109
- """Execute a response workflow and return the awaited result.
110
-
111
- Provides API compatibility with agent interfaces. Streaming responses
112
- are not currently fully supported, so this executes run_async and
113
- awaits the result.
114
-
115
- Parameters
116
- ----------
117
- response_cls : type[ResponseBase]
118
- Response class to instantiate for the workflow.
119
- content : str
120
- Prompt text to send to the OpenAI API.
121
- response_kwargs : dict[str, Any] or None, default None
122
- Optional keyword arguments forwarded to response_cls constructor.
123
-
124
- Returns
125
- -------
126
- Any
127
- Parsed response from run_async, typically a structured output or None.
128
-
129
- Notes
130
- -----
131
- This function exists for API consistency but does not currently provide
132
- true streaming functionality.
133
-
134
- Examples
135
- --------
136
- >>> from openai_sdk_helpers.response import run_streamed
137
- >>> result = run_streamed(
138
- ... MyResponse,
139
- ... content="Process this text",
140
- ... response_kwargs={"openai_settings": settings}
141
- ... )
142
- """
143
- return asyncio.run(
144
- run_async(response_cls, content=content, response_kwargs=response_kwargs)
145
- )
146
-
147
-
148
- __all__ = ["run_sync", "run_async", "run_streamed"]
101
+ __all__ = ["run_sync", "run_async"]
@@ -7,9 +7,6 @@ and robust argument parsing.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- import ast
11
- import json
12
- import re
13
10
  from dataclasses import dataclass
14
11
 
15
12
  from openai.types.responses.response_function_tool_call_param import (
@@ -94,141 +91,3 @@ class ResponseToolCall(DataclassJSONSerializable):
94
91
  },
95
92
  )
96
93
  return function_call, function_call_output
97
-
98
-
99
- def _to_snake_case(name: str) -> str:
100
- """Convert a PascalCase or camelCase string to snake_case.
101
-
102
- Parameters
103
- ----------
104
- name : str
105
- The name to convert.
106
-
107
- Returns
108
- -------
109
- str
110
- The snake_case version of the name.
111
-
112
- Examples
113
- --------
114
- >>> _to_snake_case("ExampleStructure")
115
- 'example_structure'
116
- >>> _to_snake_case("MyToolName")
117
- 'my_tool_name'
118
- """
119
- # First regex: Insert underscore before uppercase letters followed by
120
- # lowercase letters (e.g., "Tool" in "ExampleTool" becomes "_Tool")
121
- s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
122
- # Second regex: Insert underscore between lowercase/digit and uppercase
123
- # (e.g., "e3" followed by "T" becomes "e3_T")
124
- return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
125
-
126
-
127
- def _unwrap_arguments(parsed: dict, tool_name: str) -> dict:
128
- """Unwrap arguments if wrapped in a single-key dict.
129
-
130
- Some responses wrap arguments under a key matching the structure class
131
- name (e.g., {"ExampleStructure": {...}}) or snake_case variant
132
- (e.g., {"example_structure": {...}}). This function detects and unwraps
133
- such wrappers to normalize the payload.
134
-
135
- Parameters
136
- ----------
137
- parsed : dict
138
- The parsed arguments dictionary.
139
- tool_name : str
140
- The tool name, used to match potential wrapper keys.
141
-
142
- Returns
143
- -------
144
- dict
145
- Unwrapped arguments dictionary, or original if no wrapper detected.
146
-
147
- Examples
148
- --------
149
- >>> _unwrap_arguments({"ExampleTool": {"arg": "value"}}, "ExampleTool")
150
- {'arg': 'value'}
151
- >>> _unwrap_arguments({"example_tool": {"arg": "value"}}, "ExampleTool")
152
- {'arg': 'value'}
153
- >>> _unwrap_arguments({"arg": "value"}, "ExampleTool")
154
- {'arg': 'value'}
155
- """
156
- # Only unwrap if dict has exactly one key
157
- if not isinstance(parsed, dict) or len(parsed) != 1:
158
- return parsed
159
-
160
- wrapper_key = next(iter(parsed))
161
- wrapped_value = parsed[wrapper_key]
162
-
163
- # Only unwrap if the value is also a dict
164
- if not isinstance(wrapped_value, dict):
165
- return parsed
166
-
167
- # Check if wrapper key matches tool name (case-insensitive or snake_case)
168
- tool_name_lower = tool_name.lower()
169
- tool_name_snake = _to_snake_case(tool_name)
170
- wrapper_key_lower = wrapper_key.lower()
171
-
172
- if wrapper_key_lower in (tool_name_lower, tool_name_snake):
173
- return wrapped_value
174
-
175
- return parsed
176
-
177
-
178
- def parse_tool_arguments(arguments: str, tool_name: str) -> dict:
179
- """Parse tool call arguments with fallback for malformed JSON.
180
-
181
- Attempts to parse arguments as JSON first, then falls back to
182
- ast.literal_eval for cases where the OpenAI API returns minor
183
- formatting issues like single quotes instead of double quotes.
184
- Provides clear error context including tool name and raw payload.
185
-
186
- Also handles unwrapping of arguments that are wrapped in a single-key
187
- dictionary matching the tool name (e.g., {"ExampleStructure": {...}}).
188
-
189
- Parameters
190
- ----------
191
- arguments : str
192
- Raw argument string from a tool call, expected to be JSON.
193
- tool_name : str
194
- Tool name for improved error context (required).
195
-
196
- Returns
197
- -------
198
- dict
199
- Parsed dictionary of tool arguments, with wrapper unwrapped if present.
200
-
201
- Raises
202
- ------
203
- ValueError
204
- If the arguments cannot be parsed as valid JSON or Python literal.
205
- Error message includes tool name and payload excerpt for debugging.
206
-
207
- Examples
208
- --------
209
- >>> parse_tool_arguments('{"key": "value"}', tool_name="search")
210
- {'key': 'value'}
211
-
212
- >>> parse_tool_arguments("{'key': 'value'}", tool_name="search")
213
- {'key': 'value'}
214
-
215
- >>> parse_tool_arguments('{"ExampleTool": {"arg": "value"}}', "ExampleTool")
216
- {'arg': 'value'}
217
- """
218
- try:
219
- parsed = json.loads(arguments)
220
- except json.JSONDecodeError:
221
- try:
222
- parsed = ast.literal_eval(arguments)
223
- except Exception as exc: # noqa: BLE001
224
- # Build informative error message with context
225
- payload_preview = (
226
- arguments[:100] + "..." if len(arguments) > 100 else arguments
227
- )
228
- raise ValueError(
229
- f"Failed to parse tool arguments for tool '{tool_name}'. "
230
- f"Raw payload: {payload_preview}"
231
- ) from exc
232
-
233
- # Unwrap if wrapped in a single-key dict matching tool name
234
- return _unwrap_arguments(parsed, tool_name)
@@ -73,13 +73,16 @@ def attach_vector_store(
73
73
  raise ValueError(f"Vector store '{store}' not found.")
74
74
  if match not in resolved_ids:
75
75
  resolved_ids.append(match)
76
-
77
- file_search_tool = next(
78
- (tool for tool in response._tools if tool.get("type") == "file_search"),
79
- None,
80
- )
76
+ file_search_tool = None
77
+ if response._tools is not None:
78
+ file_search_tool = next(
79
+ (tool for tool in response._tools if tool.get("type") == "file_search"),
80
+ None,
81
+ )
81
82
 
82
83
  if file_search_tool is None:
84
+ if response._tools is None:
85
+ response._tools = []
83
86
  response._tools.append(
84
87
  {"type": "file_search", "vector_store_ids": resolved_ids}
85
88
  )
@@ -32,20 +32,12 @@ from openai_sdk_helpers.utils import (
32
32
 
33
33
  # Supported file extensions for OpenAI Assistants file search and vision
34
34
  SUPPORTED_FILE_EXTENSIONS = (
35
- ".csv",
36
- ".docx",
37
35
  ".gif",
38
- ".html",
39
- ".json",
40
36
  ".jpeg",
41
37
  ".jpg",
42
- ".md",
43
38
  ".pdf",
44
39
  ".png",
45
- ".pptx",
46
- ".txt",
47
40
  ".webp",
48
- ".xlsx",
49
41
  )
50
42
 
51
43
 
@@ -177,7 +169,7 @@ def _render_summary(result: Any, response: ResponseBase[Any]) -> str:
177
169
  the result cannot be formatted directly.
178
170
  """
179
171
  if isinstance(result, StructureBase):
180
- return result.print()
172
+ return str(result)
181
173
  if isinstance(result, str):
182
174
  return result
183
175
  if isinstance(result, dict):
@@ -53,6 +53,10 @@ VectorSearchReportStructure
53
53
  Complete vector search report.
54
54
  ValidationResultStructure
55
55
  Validation results with pass/fail status.
56
+ ExtractionItem
57
+ Extracted item with source span data.
58
+ ExtractionResult
59
+ Structured extraction results for a document.
56
60
 
57
61
  Functions
58
62
  ---------
@@ -72,6 +76,13 @@ from __future__ import annotations
72
76
 
73
77
  from .agent_blueprint import AgentBlueprint
74
78
  from .base import *
79
+ from .extraction import (
80
+ AnnotatedDocumentStructure,
81
+ AttributeStructure,
82
+ DocumentStructure,
83
+ ExampleDataStructure,
84
+ ExtractionStructure,
85
+ )
75
86
  from .plan import *
76
87
  from .prompt import PromptStructure
77
88
  from .responses import *
@@ -109,6 +120,11 @@ __all__ = [
109
120
  "VectorSearchPlanStructure",
110
121
  "VectorSearchStructure",
111
122
  "ValidationResultStructure",
123
+ "AnnotatedDocumentStructure",
124
+ "AttributeStructure",
125
+ "DocumentStructure",
126
+ "ExampleDataStructure",
127
+ "ExtractionStructure",
112
128
  "assistant_tool_definition",
113
129
  "assistant_format",
114
130
  "response_tool_definition",