openai-sdk-helpers 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/__init__.py +44 -7
- openai_sdk_helpers/agent/base.py +5 -1
- openai_sdk_helpers/agent/coordination.py +4 -5
- openai_sdk_helpers/agent/runner.py +4 -1
- openai_sdk_helpers/agent/search/base.py +1 -0
- openai_sdk_helpers/agent/search/vector.py +2 -0
- openai_sdk_helpers/cli.py +265 -0
- openai_sdk_helpers/config.py +93 -2
- openai_sdk_helpers/context_manager.py +1 -1
- openai_sdk_helpers/deprecation.py +167 -0
- openai_sdk_helpers/environment.py +3 -2
- openai_sdk_helpers/errors.py +0 -12
- openai_sdk_helpers/files_api.py +373 -0
- openai_sdk_helpers/logging_config.py +24 -95
- openai_sdk_helpers/prompt/base.py +1 -1
- openai_sdk_helpers/response/__init__.py +7 -3
- openai_sdk_helpers/response/base.py +217 -147
- openai_sdk_helpers/response/config.py +16 -1
- openai_sdk_helpers/response/files.py +392 -0
- openai_sdk_helpers/response/messages.py +1 -0
- openai_sdk_helpers/retry.py +1 -1
- openai_sdk_helpers/streamlit_app/app.py +97 -7
- openai_sdk_helpers/streamlit_app/streamlit_web_search.py +15 -8
- openai_sdk_helpers/structure/base.py +6 -6
- openai_sdk_helpers/structure/plan/helpers.py +1 -0
- openai_sdk_helpers/structure/plan/task.py +7 -7
- openai_sdk_helpers/tools.py +116 -13
- openai_sdk_helpers/utils/__init__.py +100 -35
- openai_sdk_helpers/{async_utils.py → utils/async_utils.py} +5 -6
- openai_sdk_helpers/utils/coercion.py +138 -0
- openai_sdk_helpers/utils/deprecation.py +167 -0
- openai_sdk_helpers/utils/encoding.py +189 -0
- openai_sdk_helpers/utils/json_utils.py +98 -0
- openai_sdk_helpers/utils/output_validation.py +448 -0
- openai_sdk_helpers/utils/path_utils.py +46 -0
- openai_sdk_helpers/{validation.py → utils/validation.py} +7 -3
- openai_sdk_helpers/vector_storage/storage.py +59 -28
- {openai_sdk_helpers-0.1.0.dist-info → openai_sdk_helpers-0.1.2.dist-info}/METADATA +152 -3
- openai_sdk_helpers-0.1.2.dist-info/RECORD +79 -0
- openai_sdk_helpers-0.1.2.dist-info/entry_points.txt +2 -0
- openai_sdk_helpers/utils/core.py +0 -596
- openai_sdk_helpers-0.1.0.dist-info/RECORD +0 -69
- {openai_sdk_helpers-0.1.0.dist-info → openai_sdk_helpers-0.1.2.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.1.0.dist-info → openai_sdk_helpers-0.1.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
"""File attachment utilities for responses.
|
|
2
|
+
|
|
3
|
+
This module provides functions for processing file attachments, automatically
|
|
4
|
+
detecting file types (images vs documents), and preparing them for the OpenAI API
|
|
5
|
+
with appropriate encoding (base64 or vector store). Supports both individual and
|
|
6
|
+
batch file processing.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
14
|
+
|
|
15
|
+
from openai.types.responses.response_input_file_content_param import (
|
|
16
|
+
ResponseInputFileContentParam,
|
|
17
|
+
)
|
|
18
|
+
from openai.types.responses.response_input_file_param import ResponseInputFileParam
|
|
19
|
+
from openai.types.responses.response_input_image_content_param import (
|
|
20
|
+
ResponseInputImageContentParam,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from ..utils import create_file_data_url, create_image_data_url, is_image_file, log
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
26
|
+
from .base import BaseResponse
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def process_files(
|
|
30
|
+
response: BaseResponse[Any],
|
|
31
|
+
files: list[str],
|
|
32
|
+
use_vector_store: bool = False,
|
|
33
|
+
batch_size: int = 10,
|
|
34
|
+
max_workers: int = 5,
|
|
35
|
+
) -> tuple[
|
|
36
|
+
list[ResponseInputFileParam],
|
|
37
|
+
list[ResponseInputFileContentParam],
|
|
38
|
+
list[ResponseInputImageContentParam],
|
|
39
|
+
]:
|
|
40
|
+
"""Process file attachments and prepare them for OpenAI API.
|
|
41
|
+
|
|
42
|
+
Automatically categorizes files by type (images vs documents) and
|
|
43
|
+
processes them appropriately. Supports concurrent processing for efficient
|
|
44
|
+
handling of multiple files.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
response : BaseResponse[Any]
|
|
49
|
+
Response instance that will use the processed files.
|
|
50
|
+
files : list[str]
|
|
51
|
+
List of file paths to process.
|
|
52
|
+
use_vector_store : bool, default False
|
|
53
|
+
If True, non-image files are uploaded to a vector store for
|
|
54
|
+
RAG-enabled file search instead of inline base64 encoding.
|
|
55
|
+
batch_size : int, default 10
|
|
56
|
+
Maximum number of files to submit to thread pool at once.
|
|
57
|
+
Processes files in chunks to avoid overwhelming the executor.
|
|
58
|
+
max_workers : int, default 5
|
|
59
|
+
Maximum number of concurrent workers for processing.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
tuple[list, list, list]
|
|
64
|
+
Three lists containing:
|
|
65
|
+
1. Vector store file references (ResponseInputFileParam)
|
|
66
|
+
2. Base64-encoded file content (ResponseInputFileContentParam)
|
|
67
|
+
3. Base64-encoded image content (ResponseInputImageContentParam)
|
|
68
|
+
|
|
69
|
+
Examples
|
|
70
|
+
--------
|
|
71
|
+
>>> from openai_sdk_helpers.response import process_files
|
|
72
|
+
>>> vector_files, base64_files, images = process_files(
|
|
73
|
+
... response,
|
|
74
|
+
... ["photo.jpg", "document.pdf"],
|
|
75
|
+
... use_vector_store=False
|
|
76
|
+
... )
|
|
77
|
+
|
|
78
|
+
>>> # Batch process many files
|
|
79
|
+
>>> vector_files, base64_files, images = process_files(
|
|
80
|
+
... response,
|
|
81
|
+
... ["file1.pdf", "file2.pdf", ...], # Many files
|
|
82
|
+
... batch_size=20,
|
|
83
|
+
... max_workers=10
|
|
84
|
+
... )
|
|
85
|
+
"""
|
|
86
|
+
# Categorize files by type
|
|
87
|
+
image_files: list[str] = []
|
|
88
|
+
document_files: list[str] = []
|
|
89
|
+
|
|
90
|
+
for file_path in files:
|
|
91
|
+
if is_image_file(file_path):
|
|
92
|
+
image_files.append(file_path)
|
|
93
|
+
else:
|
|
94
|
+
document_files.append(file_path)
|
|
95
|
+
|
|
96
|
+
# Handle document files (vector store or base64)
|
|
97
|
+
vector_file_refs: list[ResponseInputFileParam] = []
|
|
98
|
+
base64_files: list[ResponseInputFileContentParam] = []
|
|
99
|
+
|
|
100
|
+
if document_files:
|
|
101
|
+
if use_vector_store:
|
|
102
|
+
# Upload to vector store (sequential for now)
|
|
103
|
+
vector_file_refs = _upload_to_vector_store(response, document_files)
|
|
104
|
+
else:
|
|
105
|
+
# Use batch processing for base64 encoding
|
|
106
|
+
base64_files = _encode_documents_base64_batch(
|
|
107
|
+
document_files, batch_size, max_workers
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Handle images (always base64) with batch processing
|
|
111
|
+
image_contents = _encode_images_base64_batch(image_files, batch_size, max_workers)
|
|
112
|
+
|
|
113
|
+
return vector_file_refs, base64_files, image_contents
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _upload_to_vector_store(
|
|
117
|
+
response: BaseResponse[Any], document_files: list[str]
|
|
118
|
+
) -> list[ResponseInputFileParam]:
|
|
119
|
+
"""Upload documents to vector store and return file references.
|
|
120
|
+
|
|
121
|
+
Uploads user files with purpose="user_data" for proper categorization
|
|
122
|
+
and cleanup according to OpenAI Files API conventions.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
response : BaseResponse[Any]
|
|
127
|
+
Response instance with vector storage.
|
|
128
|
+
document_files : list[str]
|
|
129
|
+
List of document file paths to upload.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
list[ResponseInputFileParam]
|
|
134
|
+
List of file references for vector store files.
|
|
135
|
+
|
|
136
|
+
Notes
|
|
137
|
+
-----
|
|
138
|
+
Files are uploaded with purpose="user_data" to distinguish them
|
|
139
|
+
from assistant files. All user files are automatically deleted
|
|
140
|
+
when the response is closed via the vector store cleanup.
|
|
141
|
+
"""
|
|
142
|
+
file_refs: list[ResponseInputFileParam] = []
|
|
143
|
+
|
|
144
|
+
if response._user_vector_storage is None:
|
|
145
|
+
from openai_sdk_helpers.vector_storage import VectorStorage
|
|
146
|
+
|
|
147
|
+
store_name = f"{response.__class__.__name__.lower()}_{response._name}_{response.uuid}_user"
|
|
148
|
+
response._user_vector_storage = VectorStorage(
|
|
149
|
+
store_name=store_name,
|
|
150
|
+
client=response._client,
|
|
151
|
+
model=response._model,
|
|
152
|
+
)
|
|
153
|
+
user_vector_storage = cast(Any, response._user_vector_storage)
|
|
154
|
+
if not any(tool.get("type") == "file_search" for tool in response._tools):
|
|
155
|
+
response._tools.append(
|
|
156
|
+
{
|
|
157
|
+
"type": "file_search",
|
|
158
|
+
"vector_store_ids": [user_vector_storage.id],
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
user_vector_storage = cast(Any, response._user_vector_storage)
|
|
163
|
+
for file_path in document_files:
|
|
164
|
+
# Upload with purpose="user_data" for user-uploaded files
|
|
165
|
+
uploaded_file = user_vector_storage.upload_file(file_path, purpose="user_data")
|
|
166
|
+
file_refs.append(
|
|
167
|
+
ResponseInputFileParam(type="input_file", file_id=uploaded_file.id)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Best-effort tracking with FilesAPIManager (if available on the response)
|
|
171
|
+
files_manager = getattr(response, "_files_manager", None)
|
|
172
|
+
if files_manager is not None:
|
|
173
|
+
# Prefer tracking by file ID; fall back to full object if needed.
|
|
174
|
+
try:
|
|
175
|
+
files_manager.track_file(uploaded_file.id)
|
|
176
|
+
except AttributeError:
|
|
177
|
+
try:
|
|
178
|
+
files_manager.track_file(uploaded_file)
|
|
179
|
+
except AttributeError:
|
|
180
|
+
# If the manager does not support tracking in either form,
|
|
181
|
+
# silently skip to avoid breaking existing behavior.
|
|
182
|
+
pass
|
|
183
|
+
return file_refs
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _encode_documents_base64(
|
|
187
|
+
document_files: list[str],
|
|
188
|
+
) -> list[ResponseInputFileContentParam]:
|
|
189
|
+
"""Encode documents as base64 for inline attachment.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
document_files : list[str]
|
|
194
|
+
List of document file paths to encode.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
list[ResponseInputFileContentParam]
|
|
199
|
+
List of base64-encoded file content parameters.
|
|
200
|
+
"""
|
|
201
|
+
base64_files: list[ResponseInputFileContentParam] = []
|
|
202
|
+
|
|
203
|
+
for file_path in document_files:
|
|
204
|
+
file_data_url = create_file_data_url(file_path)
|
|
205
|
+
filename = Path(file_path).name
|
|
206
|
+
base64_files.append(
|
|
207
|
+
ResponseInputFileContentParam(
|
|
208
|
+
type="input_file",
|
|
209
|
+
file_data=file_data_url,
|
|
210
|
+
filename=filename,
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return base64_files
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _encode_documents_base64_batch(
|
|
218
|
+
document_files: list[str],
|
|
219
|
+
batch_size: int = 10,
|
|
220
|
+
max_workers: int = 5,
|
|
221
|
+
) -> list[ResponseInputFileContentParam]:
|
|
222
|
+
"""Encode documents as base64 with batch processing.
|
|
223
|
+
|
|
224
|
+
Uses thread pool for concurrent encoding of multiple files.
|
|
225
|
+
|
|
226
|
+
Parameters
|
|
227
|
+
----------
|
|
228
|
+
document_files : list[str]
|
|
229
|
+
List of document file paths to encode.
|
|
230
|
+
batch_size : int, default 10
|
|
231
|
+
Number of files to process in each batch.
|
|
232
|
+
max_workers : int, default 5
|
|
233
|
+
Maximum number of concurrent workers.
|
|
234
|
+
|
|
235
|
+
Returns
|
|
236
|
+
-------
|
|
237
|
+
list[ResponseInputFileContentParam]
|
|
238
|
+
List of base64-encoded file content parameters.
|
|
239
|
+
"""
|
|
240
|
+
if not document_files:
|
|
241
|
+
return []
|
|
242
|
+
|
|
243
|
+
# If small number of files, process sequentially
|
|
244
|
+
if len(document_files) <= 3:
|
|
245
|
+
return _encode_documents_base64(document_files)
|
|
246
|
+
|
|
247
|
+
base64_files: list[ResponseInputFileContentParam] = []
|
|
248
|
+
|
|
249
|
+
def encode_single_document(file_path: str) -> ResponseInputFileContentParam:
|
|
250
|
+
"""Encode a single document file."""
|
|
251
|
+
file_data_url = create_file_data_url(file_path)
|
|
252
|
+
filename = Path(file_path).name
|
|
253
|
+
return ResponseInputFileContentParam(
|
|
254
|
+
type="input_file",
|
|
255
|
+
file_data=file_data_url,
|
|
256
|
+
filename=filename,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Process files concurrently in batches using thread pool
|
|
260
|
+
log(
|
|
261
|
+
f"Processing {len(document_files)} documents in batches of {batch_size} "
|
|
262
|
+
f"with {max_workers} workers"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
266
|
+
# Process files in batches to avoid overwhelming the executor
|
|
267
|
+
for batch_start in range(0, len(document_files), batch_size):
|
|
268
|
+
batch_end = min(batch_start + batch_size, len(document_files))
|
|
269
|
+
batch = document_files[batch_start:batch_end]
|
|
270
|
+
|
|
271
|
+
# Submit this batch of tasks
|
|
272
|
+
future_to_file = {
|
|
273
|
+
executor.submit(encode_single_document, file_path): file_path
|
|
274
|
+
for file_path in batch
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
# Collect results as they complete
|
|
278
|
+
for future in as_completed(future_to_file):
|
|
279
|
+
try:
|
|
280
|
+
result = future.result()
|
|
281
|
+
base64_files.append(result)
|
|
282
|
+
except Exception as exc:
|
|
283
|
+
file_path = future_to_file[future]
|
|
284
|
+
log(f"Error encoding document {file_path}: {exc}")
|
|
285
|
+
raise
|
|
286
|
+
|
|
287
|
+
return base64_files
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _encode_images_base64(
|
|
291
|
+
image_files: list[str],
|
|
292
|
+
) -> list[ResponseInputImageContentParam]:
|
|
293
|
+
"""Encode images as base64 for inline attachment.
|
|
294
|
+
|
|
295
|
+
Parameters
|
|
296
|
+
----------
|
|
297
|
+
image_files : list[str]
|
|
298
|
+
List of image file paths to encode.
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
list[ResponseInputImageContentParam]
|
|
303
|
+
List of base64-encoded image content parameters.
|
|
304
|
+
"""
|
|
305
|
+
image_contents: list[ResponseInputImageContentParam] = []
|
|
306
|
+
|
|
307
|
+
for image_path in image_files:
|
|
308
|
+
image_url, detail = create_image_data_url(image_path, detail="auto")
|
|
309
|
+
image_contents.append(
|
|
310
|
+
ResponseInputImageContentParam(
|
|
311
|
+
type="input_image",
|
|
312
|
+
image_url=image_url,
|
|
313
|
+
detail=detail,
|
|
314
|
+
)
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
return image_contents
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _encode_images_base64_batch(
|
|
321
|
+
image_files: list[str],
|
|
322
|
+
batch_size: int = 10,
|
|
323
|
+
max_workers: int = 5,
|
|
324
|
+
) -> list[ResponseInputImageContentParam]:
|
|
325
|
+
"""Encode images as base64 with batch processing.
|
|
326
|
+
|
|
327
|
+
Uses thread pool for concurrent encoding of multiple images.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
image_files : list[str]
|
|
332
|
+
List of image file paths to encode.
|
|
333
|
+
batch_size : int, default 10
|
|
334
|
+
Number of images to process in each batch.
|
|
335
|
+
max_workers : int, default 5
|
|
336
|
+
Maximum number of concurrent workers.
|
|
337
|
+
|
|
338
|
+
Returns
|
|
339
|
+
-------
|
|
340
|
+
list[ResponseInputImageContentParam]
|
|
341
|
+
List of base64-encoded image content parameters.
|
|
342
|
+
"""
|
|
343
|
+
if not image_files:
|
|
344
|
+
return []
|
|
345
|
+
|
|
346
|
+
# If small number of files, process sequentially
|
|
347
|
+
if len(image_files) <= 3:
|
|
348
|
+
return _encode_images_base64(image_files)
|
|
349
|
+
|
|
350
|
+
image_contents: list[ResponseInputImageContentParam] = []
|
|
351
|
+
|
|
352
|
+
def encode_single_image(image_path: str) -> ResponseInputImageContentParam:
|
|
353
|
+
"""Encode a single image file."""
|
|
354
|
+
image_url, detail = create_image_data_url(image_path, detail="auto")
|
|
355
|
+
return ResponseInputImageContentParam(
|
|
356
|
+
type="input_image",
|
|
357
|
+
image_url=image_url,
|
|
358
|
+
detail=detail,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
# Process images concurrently in batches using thread pool
|
|
362
|
+
log(
|
|
363
|
+
f"Processing {len(image_files)} images in batches of {batch_size} "
|
|
364
|
+
f"with {max_workers} workers"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
368
|
+
# Process images in batches to avoid overwhelming the executor
|
|
369
|
+
for batch_start in range(0, len(image_files), batch_size):
|
|
370
|
+
batch_end = min(batch_start + batch_size, len(image_files))
|
|
371
|
+
batch = image_files[batch_start:batch_end]
|
|
372
|
+
|
|
373
|
+
# Submit this batch of tasks
|
|
374
|
+
future_to_file = {
|
|
375
|
+
executor.submit(encode_single_image, image_path): image_path
|
|
376
|
+
for image_path in batch
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
# Collect results as they complete
|
|
380
|
+
for future in as_completed(future_to_file):
|
|
381
|
+
try:
|
|
382
|
+
result = future.result()
|
|
383
|
+
image_contents.append(result)
|
|
384
|
+
except Exception as exc:
|
|
385
|
+
image_path = future_to_file[future]
|
|
386
|
+
log(f"Error encoding image {image_path}: {exc}")
|
|
387
|
+
raise
|
|
388
|
+
|
|
389
|
+
return image_contents
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
__all__ = ["process_files"]
|
openai_sdk_helpers/retry.py
CHANGED
|
@@ -15,7 +15,7 @@ from typing import Any, Callable, ParamSpec, TypeVar
|
|
|
15
15
|
from openai import APIError, RateLimitError
|
|
16
16
|
|
|
17
17
|
from openai_sdk_helpers.errors import AsyncExecutionError
|
|
18
|
-
from openai_sdk_helpers.
|
|
18
|
+
from openai_sdk_helpers.logging_config import log
|
|
19
19
|
|
|
20
20
|
P = ParamSpec("P")
|
|
21
21
|
T = TypeVar("T")
|
|
@@ -8,6 +8,7 @@ rendering, response execution, and resource cleanup.
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import json
|
|
11
|
+
import tempfile
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
from typing import Any
|
|
13
14
|
|
|
@@ -22,7 +23,42 @@ from openai_sdk_helpers.streamlit_app import (
|
|
|
22
23
|
_load_configuration,
|
|
23
24
|
)
|
|
24
25
|
from openai_sdk_helpers.structure.base import BaseStructure
|
|
25
|
-
from openai_sdk_helpers.utils import
|
|
26
|
+
from openai_sdk_helpers.utils import (
|
|
27
|
+
coerce_jsonable,
|
|
28
|
+
customJSONEncoder,
|
|
29
|
+
ensure_list,
|
|
30
|
+
log,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Supported file extensions for OpenAI Assistants file search
|
|
34
|
+
SUPPORTED_FILE_EXTENSIONS = (
|
|
35
|
+
".csv",
|
|
36
|
+
".docx",
|
|
37
|
+
".html",
|
|
38
|
+
".json",
|
|
39
|
+
".md",
|
|
40
|
+
".pdf",
|
|
41
|
+
".pptx",
|
|
42
|
+
".txt",
|
|
43
|
+
".xlsx",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _validate_file_type(filename: str) -> bool:
|
|
48
|
+
"""Check if a file has a supported extension for vector storage.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
filename : str
|
|
53
|
+
Name of the file to validate.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
bool
|
|
58
|
+
True if the file extension is supported, False otherwise.
|
|
59
|
+
"""
|
|
60
|
+
file_ext = Path(filename).suffix.lower()
|
|
61
|
+
return file_ext in SUPPORTED_FILE_EXTENSIONS
|
|
26
62
|
|
|
27
63
|
|
|
28
64
|
def _extract_assistant_text(response: BaseResponse[Any]) -> str:
|
|
@@ -90,10 +126,16 @@ def _render_summary(result: Any, response: BaseResponse[Any]) -> str:
|
|
|
90
126
|
"""
|
|
91
127
|
if isinstance(result, BaseStructure):
|
|
92
128
|
return result.print()
|
|
129
|
+
if isinstance(result, str):
|
|
130
|
+
return result
|
|
93
131
|
if isinstance(result, dict):
|
|
94
|
-
return json.dumps(result, indent=2)
|
|
132
|
+
return json.dumps(coerce_jsonable(result), indent=2, cls=customJSONEncoder)
|
|
95
133
|
if result:
|
|
96
|
-
|
|
134
|
+
coerced = coerce_jsonable(result)
|
|
135
|
+
try:
|
|
136
|
+
return json.dumps(coerced, indent=2, cls=customJSONEncoder)
|
|
137
|
+
except TypeError:
|
|
138
|
+
return str(result)
|
|
97
139
|
|
|
98
140
|
fallback_text = _extract_assistant_text(response)
|
|
99
141
|
if fallback_text:
|
|
@@ -216,6 +258,8 @@ def _init_session_state() -> None:
|
|
|
216
258
|
"""
|
|
217
259
|
if "chat_history" not in st.session_state:
|
|
218
260
|
st.session_state["chat_history"] = []
|
|
261
|
+
if "uploaded_files" not in st.session_state:
|
|
262
|
+
st.session_state["uploaded_files"] = []
|
|
219
263
|
|
|
220
264
|
|
|
221
265
|
def _render_chat_history() -> None:
|
|
@@ -241,9 +285,16 @@ def _render_chat_history() -> None:
|
|
|
241
285
|
st.json(raw_output)
|
|
242
286
|
else:
|
|
243
287
|
st.markdown(message.get("content", ""))
|
|
288
|
+
attachments = message.get("attachments", [])
|
|
289
|
+
if attachments:
|
|
290
|
+
st.caption(
|
|
291
|
+
f"📎 {len(attachments)} file(s) attached: {', '.join(attachments)}"
|
|
292
|
+
)
|
|
244
293
|
|
|
245
294
|
|
|
246
|
-
def _handle_user_message(
|
|
295
|
+
def _handle_user_message(
|
|
296
|
+
prompt: str, config: StreamlitAppConfig, attachment_paths: list[str] | None = None
|
|
297
|
+
) -> None:
|
|
247
298
|
"""Process user input and generate assistant response.
|
|
248
299
|
|
|
249
300
|
Appends the user message to chat history, executes the response
|
|
@@ -256,6 +307,8 @@ def _handle_user_message(prompt: str, config: StreamlitAppConfig) -> None:
|
|
|
256
307
|
User-entered text to send to the assistant.
|
|
257
308
|
config : StreamlitAppConfig
|
|
258
309
|
Loaded configuration with response handler definition.
|
|
310
|
+
attachment_paths : list[str] or None, default None
|
|
311
|
+
Optional list of file paths to attach to the message.
|
|
259
312
|
|
|
260
313
|
Notes
|
|
261
314
|
-----
|
|
@@ -263,7 +316,12 @@ def _handle_user_message(prompt: str, config: StreamlitAppConfig) -> None:
|
|
|
263
316
|
chat transcript rather than crashing the application. The function
|
|
264
317
|
triggers a Streamlit rerun after successful response generation.
|
|
265
318
|
"""
|
|
266
|
-
|
|
319
|
+
attachment_names = (
|
|
320
|
+
[Path(p).name for p in attachment_paths] if attachment_paths else []
|
|
321
|
+
)
|
|
322
|
+
st.session_state["chat_history"].append(
|
|
323
|
+
{"role": "user", "content": prompt, "attachments": attachment_names}
|
|
324
|
+
)
|
|
267
325
|
try:
|
|
268
326
|
response = _get_response_instance(config)
|
|
269
327
|
except Exception as exc: # pragma: no cover - surfaced in UI
|
|
@@ -272,7 +330,7 @@ def _handle_user_message(prompt: str, config: StreamlitAppConfig) -> None:
|
|
|
272
330
|
|
|
273
331
|
try:
|
|
274
332
|
with st.spinner("Thinking..."):
|
|
275
|
-
result = response.run_sync(content=prompt)
|
|
333
|
+
result = response.run_sync(content=prompt, files=attachment_paths)
|
|
276
334
|
summary = _render_summary(result, response)
|
|
277
335
|
raw_output = _build_raw_output(result, response)
|
|
278
336
|
st.session_state["chat_history"].append(
|
|
@@ -330,9 +388,41 @@ def main(config_path: Path) -> None:
|
|
|
330
388
|
|
|
331
389
|
_render_chat_history()
|
|
332
390
|
|
|
391
|
+
# File uploader for attachments
|
|
392
|
+
uploaded_files = st.file_uploader(
|
|
393
|
+
"Attach files (optional)",
|
|
394
|
+
accept_multiple_files=True,
|
|
395
|
+
key="file_uploader",
|
|
396
|
+
help=f"Supported formats: {', '.join(sorted(SUPPORTED_FILE_EXTENSIONS))}",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Save uploaded files to temporary directory and track paths
|
|
400
|
+
attachment_paths: list[str] = []
|
|
401
|
+
if uploaded_files:
|
|
402
|
+
invalid_files = []
|
|
403
|
+
for uploaded_file in uploaded_files:
|
|
404
|
+
if not _validate_file_type(uploaded_file.name):
|
|
405
|
+
invalid_files.append(uploaded_file.name)
|
|
406
|
+
continue
|
|
407
|
+
with tempfile.NamedTemporaryFile(
|
|
408
|
+
delete=False, suffix=Path(uploaded_file.name).suffix
|
|
409
|
+
) as tmp_file:
|
|
410
|
+
tmp_file.write(uploaded_file.getbuffer())
|
|
411
|
+
attachment_paths.append(tmp_file.name)
|
|
412
|
+
|
|
413
|
+
if invalid_files:
|
|
414
|
+
st.warning(
|
|
415
|
+
f"⚠️ Unsupported file types skipped: {', '.join(invalid_files)}. "
|
|
416
|
+
f"Supported formats: {', '.join(sorted(SUPPORTED_FILE_EXTENSIONS))}"
|
|
417
|
+
)
|
|
418
|
+
if attachment_paths:
|
|
419
|
+
st.caption(f"📎 {len(attachment_paths)} file(s) ready to attach")
|
|
420
|
+
|
|
333
421
|
prompt = st.chat_input("Message the assistant")
|
|
334
422
|
if prompt:
|
|
335
|
-
_handle_user_message(
|
|
423
|
+
_handle_user_message(
|
|
424
|
+
prompt, config, attachment_paths if attachment_paths else None
|
|
425
|
+
)
|
|
336
426
|
|
|
337
427
|
|
|
338
428
|
if __name__ == "__main__":
|
|
@@ -6,7 +6,8 @@ from openai_sdk_helpers.config import OpenAISettings
|
|
|
6
6
|
from openai_sdk_helpers.response.base import BaseResponse
|
|
7
7
|
from openai_sdk_helpers.structure.web_search import WebSearchStructure
|
|
8
8
|
from openai_sdk_helpers.structure.prompt import PromptStructure
|
|
9
|
-
from openai_sdk_helpers.
|
|
9
|
+
from openai_sdk_helpers.tools import ToolSpec, build_tool_definitions
|
|
10
|
+
from openai_sdk_helpers.utils import coerce_jsonable, customJSONEncoder
|
|
10
11
|
from openai_sdk_helpers.environment import DEFAULT_MODEL
|
|
11
12
|
|
|
12
13
|
|
|
@@ -24,13 +25,18 @@ class StreamlitWebSearch(BaseResponse[WebSearchStructure]):
|
|
|
24
25
|
if not settings.default_model:
|
|
25
26
|
settings = settings.model_copy(update={"default_model": DEFAULT_MODEL})
|
|
26
27
|
super().__init__(
|
|
28
|
+
name="streamlit_web_search",
|
|
27
29
|
instructions="Perform web searches and generate reports.",
|
|
28
|
-
tools=
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
tools=build_tool_definitions(
|
|
31
|
+
[
|
|
32
|
+
ToolSpec(
|
|
33
|
+
structure=PromptStructure,
|
|
34
|
+
tool_name="perform_search",
|
|
35
|
+
tool_description="Tool to perform web searches and generate reports.",
|
|
36
|
+
output_structure=WebSearchStructure,
|
|
37
|
+
)
|
|
38
|
+
]
|
|
39
|
+
),
|
|
34
40
|
output_structure=WebSearchStructure,
|
|
35
41
|
tool_handlers={"perform_search": perform_search},
|
|
36
42
|
openai_settings=settings,
|
|
@@ -43,7 +49,8 @@ async def perform_search(tool) -> str:
|
|
|
43
49
|
web_result = await WebAgentSearch(default_model=DEFAULT_MODEL).run_web_agent_async(
|
|
44
50
|
structured_data.prompt
|
|
45
51
|
)
|
|
46
|
-
|
|
52
|
+
payload = coerce_jsonable(web_result)
|
|
53
|
+
return json.dumps(payload, cls=customJSONEncoder)
|
|
47
54
|
|
|
48
55
|
|
|
49
56
|
APP_CONFIG = {
|
|
@@ -233,7 +233,7 @@ class BaseStructure(BaseModel):
|
|
|
233
233
|
return prompt_lines
|
|
234
234
|
|
|
235
235
|
@classmethod
|
|
236
|
-
def assistant_tool_definition(cls, name: str, description: str) -> dict:
|
|
236
|
+
def assistant_tool_definition(cls, name: str, *, description: str) -> dict:
|
|
237
237
|
"""Build an Assistant API function tool definition for this structure.
|
|
238
238
|
|
|
239
239
|
Creates a tool definition compatible with the OpenAI Assistant API,
|
|
@@ -255,7 +255,7 @@ class BaseStructure(BaseModel):
|
|
|
255
255
|
--------
|
|
256
256
|
>>> tool = MyStructure.assistant_tool_definition(
|
|
257
257
|
... "analyze_data",
|
|
258
|
-
... "Analyze the provided data"
|
|
258
|
+
... description="Analyze the provided data"
|
|
259
259
|
... )
|
|
260
260
|
"""
|
|
261
261
|
from .responses import assistant_tool_definition
|
|
@@ -283,7 +283,7 @@ class BaseStructure(BaseModel):
|
|
|
283
283
|
return assistant_format(cls)
|
|
284
284
|
|
|
285
285
|
@classmethod
|
|
286
|
-
def response_tool_definition(cls, tool_name: str, tool_description: str) -> dict:
|
|
286
|
+
def response_tool_definition(cls, tool_name: str, *, tool_description: str) -> dict:
|
|
287
287
|
"""Build a chat completion tool definition for this structure.
|
|
288
288
|
|
|
289
289
|
Creates a function tool definition compatible with the chat
|
|
@@ -305,7 +305,7 @@ class BaseStructure(BaseModel):
|
|
|
305
305
|
--------
|
|
306
306
|
>>> tool = MyStructure.response_tool_definition(
|
|
307
307
|
... "process_data",
|
|
308
|
-
... "Process the input data"
|
|
308
|
+
... tool_description="Process the input data"
|
|
309
309
|
... )
|
|
310
310
|
"""
|
|
311
311
|
from .responses import response_tool_definition
|
|
@@ -725,7 +725,7 @@ class BaseStructure(BaseModel):
|
|
|
725
725
|
return cls.from_raw_input(structured_data)
|
|
726
726
|
|
|
727
727
|
@staticmethod
|
|
728
|
-
def format_output(label: str, value: Any) -> str:
|
|
728
|
+
def format_output(label: str, *, value: Any) -> str:
|
|
729
729
|
"""
|
|
730
730
|
Format a label and value for string output.
|
|
731
731
|
|
|
@@ -772,7 +772,7 @@ class BaseStructure(BaseModel):
|
|
|
772
772
|
"""
|
|
773
773
|
return "\n".join(
|
|
774
774
|
[
|
|
775
|
-
BaseStructure.format_output(field, value)
|
|
775
|
+
BaseStructure.format_output(field, value=value)
|
|
776
776
|
for field, value in self.model_dump().items()
|
|
777
777
|
]
|
|
778
778
|
)
|