openai-sdk-helpers 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/__init__.py +3 -0
- openai_sdk_helpers/files_api.py +373 -0
- openai_sdk_helpers/response/__init__.py +7 -3
- openai_sdk_helpers/response/base.py +166 -65
- openai_sdk_helpers/response/config.py +16 -1
- openai_sdk_helpers/response/files.py +392 -0
- openai_sdk_helpers/streamlit_app/app.py +186 -12
- openai_sdk_helpers/utils/__init__.py +18 -0
- openai_sdk_helpers/utils/encoding.py +189 -0
- openai_sdk_helpers/vector_storage/storage.py +50 -22
- {openai_sdk_helpers-0.1.1.dist-info → openai_sdk_helpers-0.1.4.dist-info}/METADATA +94 -1
- {openai_sdk_helpers-0.1.1.dist-info → openai_sdk_helpers-0.1.4.dist-info}/RECORD +15 -12
- {openai_sdk_helpers-0.1.1.dist-info → openai_sdk_helpers-0.1.4.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.1.1.dist-info → openai_sdk_helpers-0.1.4.dist-info}/entry_points.txt +0 -0
- {openai_sdk_helpers-0.1.1.dist-info → openai_sdk_helpers-0.1.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
"""File attachment utilities for responses.
|
|
2
|
+
|
|
3
|
+
This module provides functions for processing file attachments, automatically
|
|
4
|
+
detecting file types (images vs documents), and preparing them for the OpenAI API
|
|
5
|
+
with appropriate encoding (base64 or vector store). Supports both individual and
|
|
6
|
+
batch file processing.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
14
|
+
|
|
15
|
+
from openai.types.responses.response_input_file_content_param import (
|
|
16
|
+
ResponseInputFileContentParam,
|
|
17
|
+
)
|
|
18
|
+
from openai.types.responses.response_input_file_param import ResponseInputFileParam
|
|
19
|
+
from openai.types.responses.response_input_image_content_param import (
|
|
20
|
+
ResponseInputImageContentParam,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from ..utils import create_file_data_url, create_image_data_url, is_image_file, log
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
26
|
+
from .base import BaseResponse
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def process_files(
|
|
30
|
+
response: BaseResponse[Any],
|
|
31
|
+
files: list[str],
|
|
32
|
+
use_vector_store: bool = False,
|
|
33
|
+
batch_size: int = 10,
|
|
34
|
+
max_workers: int = 5,
|
|
35
|
+
) -> tuple[
|
|
36
|
+
list[ResponseInputFileParam],
|
|
37
|
+
list[ResponseInputFileContentParam],
|
|
38
|
+
list[ResponseInputImageContentParam],
|
|
39
|
+
]:
|
|
40
|
+
"""Process file attachments and prepare them for OpenAI API.
|
|
41
|
+
|
|
42
|
+
Automatically categorizes files by type (images vs documents) and
|
|
43
|
+
processes them appropriately. Supports concurrent processing for efficient
|
|
44
|
+
handling of multiple files.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
response : BaseResponse[Any]
|
|
49
|
+
Response instance that will use the processed files.
|
|
50
|
+
files : list[str]
|
|
51
|
+
List of file paths to process.
|
|
52
|
+
use_vector_store : bool, default False
|
|
53
|
+
If True, non-image files are uploaded to a vector store for
|
|
54
|
+
RAG-enabled file search instead of inline base64 encoding.
|
|
55
|
+
batch_size : int, default 10
|
|
56
|
+
Maximum number of files to submit to thread pool at once.
|
|
57
|
+
Processes files in chunks to avoid overwhelming the executor.
|
|
58
|
+
max_workers : int, default 5
|
|
59
|
+
Maximum number of concurrent workers for processing.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
tuple[list, list, list]
|
|
64
|
+
Three lists containing:
|
|
65
|
+
1. Vector store file references (ResponseInputFileParam)
|
|
66
|
+
2. Base64-encoded file content (ResponseInputFileContentParam)
|
|
67
|
+
3. Base64-encoded image content (ResponseInputImageContentParam)
|
|
68
|
+
|
|
69
|
+
Examples
|
|
70
|
+
--------
|
|
71
|
+
>>> from openai_sdk_helpers.response import process_files
|
|
72
|
+
>>> vector_files, base64_files, images = process_files(
|
|
73
|
+
... response,
|
|
74
|
+
... ["photo.jpg", "document.pdf"],
|
|
75
|
+
... use_vector_store=False
|
|
76
|
+
... )
|
|
77
|
+
|
|
78
|
+
>>> # Batch process many files
|
|
79
|
+
>>> vector_files, base64_files, images = process_files(
|
|
80
|
+
... response,
|
|
81
|
+
... ["file1.pdf", "file2.pdf", ...], # Many files
|
|
82
|
+
... batch_size=20,
|
|
83
|
+
... max_workers=10
|
|
84
|
+
... )
|
|
85
|
+
"""
|
|
86
|
+
# Categorize files by type
|
|
87
|
+
image_files: list[str] = []
|
|
88
|
+
document_files: list[str] = []
|
|
89
|
+
|
|
90
|
+
for file_path in files:
|
|
91
|
+
if is_image_file(file_path):
|
|
92
|
+
image_files.append(file_path)
|
|
93
|
+
else:
|
|
94
|
+
document_files.append(file_path)
|
|
95
|
+
|
|
96
|
+
# Handle document files (vector store or base64)
|
|
97
|
+
vector_file_refs: list[ResponseInputFileParam] = []
|
|
98
|
+
base64_files: list[ResponseInputFileContentParam] = []
|
|
99
|
+
|
|
100
|
+
if document_files:
|
|
101
|
+
if use_vector_store:
|
|
102
|
+
# Upload to vector store (sequential for now)
|
|
103
|
+
vector_file_refs = _upload_to_vector_store(response, document_files)
|
|
104
|
+
else:
|
|
105
|
+
# Use batch processing for base64 encoding
|
|
106
|
+
base64_files = _encode_documents_base64_batch(
|
|
107
|
+
document_files, batch_size, max_workers
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Handle images (always base64) with batch processing
|
|
111
|
+
image_contents = _encode_images_base64_batch(image_files, batch_size, max_workers)
|
|
112
|
+
|
|
113
|
+
return vector_file_refs, base64_files, image_contents
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _upload_to_vector_store(
|
|
117
|
+
response: BaseResponse[Any], document_files: list[str]
|
|
118
|
+
) -> list[ResponseInputFileParam]:
|
|
119
|
+
"""Upload documents to vector store and return file references.
|
|
120
|
+
|
|
121
|
+
Uploads user files with purpose="user_data" for proper categorization
|
|
122
|
+
and cleanup according to OpenAI Files API conventions.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
response : BaseResponse[Any]
|
|
127
|
+
Response instance with vector storage.
|
|
128
|
+
document_files : list[str]
|
|
129
|
+
List of document file paths to upload.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
list[ResponseInputFileParam]
|
|
134
|
+
List of file references for vector store files.
|
|
135
|
+
|
|
136
|
+
Notes
|
|
137
|
+
-----
|
|
138
|
+
Files are uploaded with purpose="user_data" to distinguish them
|
|
139
|
+
from assistant files. All user files are automatically deleted
|
|
140
|
+
when the response is closed via the vector store cleanup.
|
|
141
|
+
"""
|
|
142
|
+
file_refs: list[ResponseInputFileParam] = []
|
|
143
|
+
|
|
144
|
+
if response._user_vector_storage is None:
|
|
145
|
+
from openai_sdk_helpers.vector_storage import VectorStorage
|
|
146
|
+
|
|
147
|
+
store_name = f"{response.__class__.__name__.lower()}_{response._name}_{response.uuid}_user"
|
|
148
|
+
response._user_vector_storage = VectorStorage(
|
|
149
|
+
store_name=store_name,
|
|
150
|
+
client=response._client,
|
|
151
|
+
model=response._model,
|
|
152
|
+
)
|
|
153
|
+
user_vector_storage = cast(Any, response._user_vector_storage)
|
|
154
|
+
if not any(tool.get("type") == "file_search" for tool in response._tools):
|
|
155
|
+
response._tools.append(
|
|
156
|
+
{
|
|
157
|
+
"type": "file_search",
|
|
158
|
+
"vector_store_ids": [user_vector_storage.id],
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
user_vector_storage = cast(Any, response._user_vector_storage)
|
|
163
|
+
for file_path in document_files:
|
|
164
|
+
# Upload with purpose="user_data" for user-uploaded files
|
|
165
|
+
uploaded_file = user_vector_storage.upload_file(file_path, purpose="user_data")
|
|
166
|
+
file_refs.append(
|
|
167
|
+
ResponseInputFileParam(type="input_file", file_id=uploaded_file.id)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Best-effort tracking with FilesAPIManager (if available on the response)
|
|
171
|
+
files_manager = getattr(response, "_files_manager", None)
|
|
172
|
+
if files_manager is not None:
|
|
173
|
+
# Prefer tracking by file ID; fall back to full object if needed.
|
|
174
|
+
try:
|
|
175
|
+
files_manager.track_file(uploaded_file.id)
|
|
176
|
+
except AttributeError:
|
|
177
|
+
try:
|
|
178
|
+
files_manager.track_file(uploaded_file)
|
|
179
|
+
except AttributeError:
|
|
180
|
+
# If the manager does not support tracking in either form,
|
|
181
|
+
# silently skip to avoid breaking existing behavior.
|
|
182
|
+
pass
|
|
183
|
+
return file_refs
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _encode_documents_base64(
|
|
187
|
+
document_files: list[str],
|
|
188
|
+
) -> list[ResponseInputFileContentParam]:
|
|
189
|
+
"""Encode documents as base64 for inline attachment.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
document_files : list[str]
|
|
194
|
+
List of document file paths to encode.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
list[ResponseInputFileContentParam]
|
|
199
|
+
List of base64-encoded file content parameters.
|
|
200
|
+
"""
|
|
201
|
+
base64_files: list[ResponseInputFileContentParam] = []
|
|
202
|
+
|
|
203
|
+
for file_path in document_files:
|
|
204
|
+
file_data_url = create_file_data_url(file_path)
|
|
205
|
+
filename = Path(file_path).name
|
|
206
|
+
base64_files.append(
|
|
207
|
+
ResponseInputFileContentParam(
|
|
208
|
+
type="input_file",
|
|
209
|
+
file_data=file_data_url,
|
|
210
|
+
filename=filename,
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return base64_files
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _encode_documents_base64_batch(
|
|
218
|
+
document_files: list[str],
|
|
219
|
+
batch_size: int = 10,
|
|
220
|
+
max_workers: int = 5,
|
|
221
|
+
) -> list[ResponseInputFileContentParam]:
|
|
222
|
+
"""Encode documents as base64 with batch processing.
|
|
223
|
+
|
|
224
|
+
Uses thread pool for concurrent encoding of multiple files.
|
|
225
|
+
|
|
226
|
+
Parameters
|
|
227
|
+
----------
|
|
228
|
+
document_files : list[str]
|
|
229
|
+
List of document file paths to encode.
|
|
230
|
+
batch_size : int, default 10
|
|
231
|
+
Number of files to process in each batch.
|
|
232
|
+
max_workers : int, default 5
|
|
233
|
+
Maximum number of concurrent workers.
|
|
234
|
+
|
|
235
|
+
Returns
|
|
236
|
+
-------
|
|
237
|
+
list[ResponseInputFileContentParam]
|
|
238
|
+
List of base64-encoded file content parameters.
|
|
239
|
+
"""
|
|
240
|
+
if not document_files:
|
|
241
|
+
return []
|
|
242
|
+
|
|
243
|
+
# If small number of files, process sequentially
|
|
244
|
+
if len(document_files) <= 3:
|
|
245
|
+
return _encode_documents_base64(document_files)
|
|
246
|
+
|
|
247
|
+
base64_files: list[ResponseInputFileContentParam] = []
|
|
248
|
+
|
|
249
|
+
def encode_single_document(file_path: str) -> ResponseInputFileContentParam:
|
|
250
|
+
"""Encode a single document file."""
|
|
251
|
+
file_data_url = create_file_data_url(file_path)
|
|
252
|
+
filename = Path(file_path).name
|
|
253
|
+
return ResponseInputFileContentParam(
|
|
254
|
+
type="input_file",
|
|
255
|
+
file_data=file_data_url,
|
|
256
|
+
filename=filename,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Process files concurrently in batches using thread pool
|
|
260
|
+
log(
|
|
261
|
+
f"Processing {len(document_files)} documents in batches of {batch_size} "
|
|
262
|
+
f"with {max_workers} workers"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
266
|
+
# Process files in batches to avoid overwhelming the executor
|
|
267
|
+
for batch_start in range(0, len(document_files), batch_size):
|
|
268
|
+
batch_end = min(batch_start + batch_size, len(document_files))
|
|
269
|
+
batch = document_files[batch_start:batch_end]
|
|
270
|
+
|
|
271
|
+
# Submit this batch of tasks
|
|
272
|
+
future_to_file = {
|
|
273
|
+
executor.submit(encode_single_document, file_path): file_path
|
|
274
|
+
for file_path in batch
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
# Collect results as they complete
|
|
278
|
+
for future in as_completed(future_to_file):
|
|
279
|
+
try:
|
|
280
|
+
result = future.result()
|
|
281
|
+
base64_files.append(result)
|
|
282
|
+
except Exception as exc:
|
|
283
|
+
file_path = future_to_file[future]
|
|
284
|
+
log(f"Error encoding document {file_path}: {exc}")
|
|
285
|
+
raise
|
|
286
|
+
|
|
287
|
+
return base64_files
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _encode_images_base64(
|
|
291
|
+
image_files: list[str],
|
|
292
|
+
) -> list[ResponseInputImageContentParam]:
|
|
293
|
+
"""Encode images as base64 for inline attachment.
|
|
294
|
+
|
|
295
|
+
Parameters
|
|
296
|
+
----------
|
|
297
|
+
image_files : list[str]
|
|
298
|
+
List of image file paths to encode.
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
list[ResponseInputImageContentParam]
|
|
303
|
+
List of base64-encoded image content parameters.
|
|
304
|
+
"""
|
|
305
|
+
image_contents: list[ResponseInputImageContentParam] = []
|
|
306
|
+
|
|
307
|
+
for image_path in image_files:
|
|
308
|
+
image_url, detail = create_image_data_url(image_path, detail="auto")
|
|
309
|
+
image_contents.append(
|
|
310
|
+
ResponseInputImageContentParam(
|
|
311
|
+
type="input_image",
|
|
312
|
+
image_url=image_url,
|
|
313
|
+
detail=detail,
|
|
314
|
+
)
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
return image_contents
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _encode_images_base64_batch(
|
|
321
|
+
image_files: list[str],
|
|
322
|
+
batch_size: int = 10,
|
|
323
|
+
max_workers: int = 5,
|
|
324
|
+
) -> list[ResponseInputImageContentParam]:
|
|
325
|
+
"""Encode images as base64 with batch processing.
|
|
326
|
+
|
|
327
|
+
Uses thread pool for concurrent encoding of multiple images.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
image_files : list[str]
|
|
332
|
+
List of image file paths to encode.
|
|
333
|
+
batch_size : int, default 10
|
|
334
|
+
Number of images to process in each batch.
|
|
335
|
+
max_workers : int, default 5
|
|
336
|
+
Maximum number of concurrent workers.
|
|
337
|
+
|
|
338
|
+
Returns
|
|
339
|
+
-------
|
|
340
|
+
list[ResponseInputImageContentParam]
|
|
341
|
+
List of base64-encoded image content parameters.
|
|
342
|
+
"""
|
|
343
|
+
if not image_files:
|
|
344
|
+
return []
|
|
345
|
+
|
|
346
|
+
# If small number of files, process sequentially
|
|
347
|
+
if len(image_files) <= 3:
|
|
348
|
+
return _encode_images_base64(image_files)
|
|
349
|
+
|
|
350
|
+
image_contents: list[ResponseInputImageContentParam] = []
|
|
351
|
+
|
|
352
|
+
def encode_single_image(image_path: str) -> ResponseInputImageContentParam:
|
|
353
|
+
"""Encode a single image file."""
|
|
354
|
+
image_url, detail = create_image_data_url(image_path, detail="auto")
|
|
355
|
+
return ResponseInputImageContentParam(
|
|
356
|
+
type="input_image",
|
|
357
|
+
image_url=image_url,
|
|
358
|
+
detail=detail,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
# Process images concurrently in batches using thread pool
|
|
362
|
+
log(
|
|
363
|
+
f"Processing {len(image_files)} images in batches of {batch_size} "
|
|
364
|
+
f"with {max_workers} workers"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
368
|
+
# Process images in batches to avoid overwhelming the executor
|
|
369
|
+
for batch_start in range(0, len(image_files), batch_size):
|
|
370
|
+
batch_end = min(batch_start + batch_size, len(image_files))
|
|
371
|
+
batch = image_files[batch_start:batch_end]
|
|
372
|
+
|
|
373
|
+
# Submit this batch of tasks
|
|
374
|
+
future_to_file = {
|
|
375
|
+
executor.submit(encode_single_image, image_path): image_path
|
|
376
|
+
for image_path in batch
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
# Collect results as they complete
|
|
380
|
+
for future in as_completed(future_to_file):
|
|
381
|
+
try:
|
|
382
|
+
result = future.result()
|
|
383
|
+
image_contents.append(result)
|
|
384
|
+
except Exception as exc:
|
|
385
|
+
image_path = future_to_file[future]
|
|
386
|
+
log(f"Error encoding image {image_path}: {exc}")
|
|
387
|
+
raise
|
|
388
|
+
|
|
389
|
+
return image_contents
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
__all__ = ["process_files"]
|
|
@@ -8,6 +8,8 @@ rendering, response execution, and resource cleanup.
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import json
|
|
11
|
+
import os
|
|
12
|
+
import tempfile
|
|
11
13
|
from pathlib import Path
|
|
12
14
|
from typing import Any
|
|
13
15
|
|
|
@@ -29,6 +31,70 @@ from openai_sdk_helpers.utils import (
|
|
|
29
31
|
log,
|
|
30
32
|
)
|
|
31
33
|
|
|
34
|
+
# Supported file extensions for OpenAI Assistants file search and vision
|
|
35
|
+
SUPPORTED_FILE_EXTENSIONS = (
|
|
36
|
+
".csv",
|
|
37
|
+
".docx",
|
|
38
|
+
".gif",
|
|
39
|
+
".html",
|
|
40
|
+
".json",
|
|
41
|
+
".jpeg",
|
|
42
|
+
".jpg",
|
|
43
|
+
".md",
|
|
44
|
+
".pdf",
|
|
45
|
+
".png",
|
|
46
|
+
".pptx",
|
|
47
|
+
".txt",
|
|
48
|
+
".webp",
|
|
49
|
+
".xlsx",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _validate_file_type(filename: str) -> bool:
|
|
54
|
+
"""Check if a file has a supported extension for upload.
|
|
55
|
+
|
|
56
|
+
Supports both document formats (for file search) and image formats
|
|
57
|
+
(for vision analysis).
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
filename : str
|
|
62
|
+
Name of the file to validate.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
bool
|
|
67
|
+
True if the file extension is supported, False otherwise.
|
|
68
|
+
"""
|
|
69
|
+
file_ext = Path(filename).suffix.lower()
|
|
70
|
+
return file_ext in SUPPORTED_FILE_EXTENSIONS
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _cleanup_temp_files(file_paths: list[str] | None = None) -> None:
|
|
74
|
+
"""Delete temporary files that were created for uploads.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
file_paths : list[str] or None, default None
|
|
79
|
+
Specific file paths to delete. If None, deletes all tracked
|
|
80
|
+
temporary files from session state.
|
|
81
|
+
|
|
82
|
+
Notes
|
|
83
|
+
-----
|
|
84
|
+
Silently ignores errors when deleting files that may have already
|
|
85
|
+
been removed or are inaccessible.
|
|
86
|
+
"""
|
|
87
|
+
paths_to_delete = file_paths or st.session_state.get("temp_file_paths", [])
|
|
88
|
+
for path in paths_to_delete:
|
|
89
|
+
try:
|
|
90
|
+
if os.path.exists(path):
|
|
91
|
+
os.remove(path)
|
|
92
|
+
except (OSError, IOError):
|
|
93
|
+
pass # Silently ignore if file already deleted or inaccessible
|
|
94
|
+
|
|
95
|
+
if file_paths is None:
|
|
96
|
+
st.session_state["temp_file_paths"] = []
|
|
97
|
+
|
|
32
98
|
|
|
33
99
|
def _extract_assistant_text(response: BaseResponse[Any]) -> str:
|
|
34
100
|
"""Extract the latest assistant message as readable text.
|
|
@@ -55,15 +121,33 @@ def _extract_assistant_text(response: BaseResponse[Any]) -> str:
|
|
|
55
121
|
if message is None:
|
|
56
122
|
return ""
|
|
57
123
|
|
|
124
|
+
# Check if the message content has output_text attribute
|
|
125
|
+
output_text = getattr(message.content, "output_text", None)
|
|
126
|
+
if output_text:
|
|
127
|
+
return str(output_text)
|
|
128
|
+
|
|
58
129
|
content = getattr(message.content, "content", None)
|
|
59
130
|
if content is None:
|
|
60
131
|
return ""
|
|
61
132
|
|
|
62
133
|
text_parts: list[str] = []
|
|
63
134
|
for part in ensure_list(content):
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
135
|
+
# Handle both dict-like parts and object-like parts
|
|
136
|
+
text_content = None
|
|
137
|
+
if hasattr(part, "text"):
|
|
138
|
+
text_content = getattr(part, "text", None)
|
|
139
|
+
elif isinstance(part, dict):
|
|
140
|
+
text_content = part.get("text")
|
|
141
|
+
|
|
142
|
+
if text_content:
|
|
143
|
+
# If text_content is a string, use it directly (dict-style)
|
|
144
|
+
if isinstance(text_content, str):
|
|
145
|
+
text_parts.append(text_content)
|
|
146
|
+
# If text_content is an object with a value attribute, extract that value (object-style)
|
|
147
|
+
else:
|
|
148
|
+
text_value = getattr(text_content, "value", None)
|
|
149
|
+
if text_value:
|
|
150
|
+
text_parts.append(text_value)
|
|
67
151
|
if text_parts:
|
|
68
152
|
return "\n\n".join(text_parts)
|
|
69
153
|
return ""
|
|
@@ -192,7 +276,8 @@ def _reset_chat(close_response: bool = True) -> None:
|
|
|
192
276
|
"""Clear conversation and optionally close the response session.
|
|
193
277
|
|
|
194
278
|
Saves the current conversation to disk, closes the response to clean
|
|
195
|
-
up resources, and clears the chat history from session state.
|
|
279
|
+
up resources, and clears the chat history from session state. Also
|
|
280
|
+
cleans up any temporary files that were created for uploads.
|
|
196
281
|
|
|
197
282
|
Parameters
|
|
198
283
|
----------
|
|
@@ -203,13 +288,17 @@ def _reset_chat(close_response: bool = True) -> None:
|
|
|
203
288
|
Notes
|
|
204
289
|
-----
|
|
205
290
|
This function mutates st.session_state in-place, clearing the
|
|
206
|
-
chat_history and
|
|
291
|
+
chat_history, response_instance, and temp_file_paths keys.
|
|
207
292
|
"""
|
|
208
293
|
response = st.session_state.get("response_instance")
|
|
209
294
|
if close_response and isinstance(response, BaseResponse):
|
|
210
295
|
filepath = f"./data/{response.name}.{response.uuid}.json"
|
|
211
296
|
response.save(filepath)
|
|
212
297
|
response.close()
|
|
298
|
+
|
|
299
|
+
# Clean up temporary files
|
|
300
|
+
_cleanup_temp_files()
|
|
301
|
+
|
|
213
302
|
st.session_state["chat_history"] = []
|
|
214
303
|
st.session_state.pop("response_instance", None)
|
|
215
304
|
|
|
@@ -218,7 +307,8 @@ def _init_session_state() -> None:
|
|
|
218
307
|
"""Initialize Streamlit session state for chat functionality.
|
|
219
308
|
|
|
220
309
|
Creates the chat_history list in session state if it doesn't exist,
|
|
221
|
-
enabling conversation persistence across Streamlit reruns.
|
|
310
|
+
enabling conversation persistence across Streamlit reruns. Also
|
|
311
|
+
initializes a list for tracking temporary file paths that need cleanup.
|
|
222
312
|
|
|
223
313
|
Notes
|
|
224
314
|
-----
|
|
@@ -227,6 +317,12 @@ def _init_session_state() -> None:
|
|
|
227
317
|
"""
|
|
228
318
|
if "chat_history" not in st.session_state:
|
|
229
319
|
st.session_state["chat_history"] = []
|
|
320
|
+
if "temp_file_paths" not in st.session_state:
|
|
321
|
+
st.session_state["temp_file_paths"] = []
|
|
322
|
+
if "current_attachments" not in st.session_state:
|
|
323
|
+
st.session_state["current_attachments"] = []
|
|
324
|
+
if "attachment_names" not in st.session_state:
|
|
325
|
+
st.session_state["attachment_names"] = []
|
|
230
326
|
|
|
231
327
|
|
|
232
328
|
def _render_chat_history() -> None:
|
|
@@ -252,9 +348,19 @@ def _render_chat_history() -> None:
|
|
|
252
348
|
st.json(raw_output)
|
|
253
349
|
else:
|
|
254
350
|
st.markdown(message.get("content", ""))
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
351
|
+
attachments = message.get("attachments", [])
|
|
352
|
+
if attachments:
|
|
353
|
+
st.caption(
|
|
354
|
+
f"📎 {len(attachments)} file(s) attached: {', '.join(attachments)}"
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _handle_user_message(
|
|
359
|
+
prompt: str,
|
|
360
|
+
config: StreamlitAppConfig,
|
|
361
|
+
attachment_paths: list[str] | None = None,
|
|
362
|
+
attachment_names: list[str] | None = None,
|
|
363
|
+
) -> None:
|
|
258
364
|
"""Process user input and generate assistant response.
|
|
259
365
|
|
|
260
366
|
Appends the user message to chat history, executes the response
|
|
@@ -267,6 +373,10 @@ def _handle_user_message(prompt: str, config: StreamlitAppConfig) -> None:
|
|
|
267
373
|
User-entered text to send to the assistant.
|
|
268
374
|
config : StreamlitAppConfig
|
|
269
375
|
Loaded configuration with response handler definition.
|
|
376
|
+
attachment_paths : list[str] or None, default None
|
|
377
|
+
Optional list of file paths to attach to the message.
|
|
378
|
+
attachment_names : list[str] or None, default None
|
|
379
|
+
Optional list of original filenames for display purposes.
|
|
270
380
|
|
|
271
381
|
Notes
|
|
272
382
|
-----
|
|
@@ -274,7 +384,16 @@ def _handle_user_message(prompt: str, config: StreamlitAppConfig) -> None:
|
|
|
274
384
|
chat transcript rather than crashing the application. The function
|
|
275
385
|
triggers a Streamlit rerun after successful response generation.
|
|
276
386
|
"""
|
|
277
|
-
|
|
387
|
+
# Use provided display names or fall back to extracting from paths
|
|
388
|
+
display_names = (
|
|
389
|
+
attachment_names
|
|
390
|
+
if attachment_names
|
|
391
|
+
else [Path(p).name for p in attachment_paths] if attachment_paths else []
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
st.session_state["chat_history"].append(
|
|
395
|
+
{"role": "user", "content": prompt, "attachments": display_names}
|
|
396
|
+
)
|
|
278
397
|
try:
|
|
279
398
|
response = _get_response_instance(config)
|
|
280
399
|
except Exception as exc: # pragma: no cover - surfaced in UI
|
|
@@ -283,7 +402,7 @@ def _handle_user_message(prompt: str, config: StreamlitAppConfig) -> None:
|
|
|
283
402
|
|
|
284
403
|
try:
|
|
285
404
|
with st.spinner("Thinking..."):
|
|
286
|
-
result = response.run_sync(content=prompt)
|
|
405
|
+
result = response.run_sync(content=prompt, files=attachment_paths)
|
|
287
406
|
summary = _render_summary(result, response)
|
|
288
407
|
raw_output = _build_raw_output(result, response)
|
|
289
408
|
st.session_state["chat_history"].append(
|
|
@@ -341,9 +460,64 @@ def main(config_path: Path) -> None:
|
|
|
341
460
|
|
|
342
461
|
_render_chat_history()
|
|
343
462
|
|
|
463
|
+
# File uploader form - auto-clears on submit
|
|
464
|
+
with st.form("file_upload_form", clear_on_submit=True):
|
|
465
|
+
uploaded_files = st.file_uploader(
|
|
466
|
+
"Attach files (optional)",
|
|
467
|
+
accept_multiple_files=True,
|
|
468
|
+
help=f"Supported formats: {', '.join(sorted(SUPPORTED_FILE_EXTENSIONS))}",
|
|
469
|
+
)
|
|
470
|
+
submit_files = st.form_submit_button("Attach files")
|
|
471
|
+
|
|
472
|
+
# Process uploaded files if form was submitted
|
|
473
|
+
attachment_paths: list[str] = []
|
|
474
|
+
original_filenames: list[str] = []
|
|
475
|
+
if submit_files and uploaded_files:
|
|
476
|
+
invalid_files = []
|
|
477
|
+
for uploaded_file in uploaded_files:
|
|
478
|
+
if not _validate_file_type(uploaded_file.name):
|
|
479
|
+
invalid_files.append(uploaded_file.name)
|
|
480
|
+
continue
|
|
481
|
+
|
|
482
|
+
# Create temporary file with the uploaded content
|
|
483
|
+
with tempfile.NamedTemporaryFile(
|
|
484
|
+
delete=False, suffix=Path(uploaded_file.name).suffix
|
|
485
|
+
) as tmp_file:
|
|
486
|
+
tmp_file.write(uploaded_file.getbuffer())
|
|
487
|
+
tmp_file.flush()
|
|
488
|
+
attachment_paths.append(tmp_file.name)
|
|
489
|
+
original_filenames.append(uploaded_file.name)
|
|
490
|
+
# Track for cleanup
|
|
491
|
+
if tmp_file.name not in st.session_state.get("temp_file_paths", []):
|
|
492
|
+
st.session_state["temp_file_paths"].append(tmp_file.name)
|
|
493
|
+
|
|
494
|
+
if invalid_files:
|
|
495
|
+
st.warning(
|
|
496
|
+
f"⚠️ Unsupported file types: {', '.join(invalid_files)}. "
|
|
497
|
+
f"Supported: {', '.join(sorted(SUPPORTED_FILE_EXTENSIONS))}"
|
|
498
|
+
)
|
|
499
|
+
if attachment_paths:
|
|
500
|
+
st.session_state["current_attachments"] = attachment_paths
|
|
501
|
+
st.session_state["attachment_names"] = original_filenames
|
|
502
|
+
st.info(f"📎 {len(attachment_paths)} file(s) attached")
|
|
503
|
+
|
|
504
|
+
# Get attachment paths from session state if they were previously attached
|
|
505
|
+
attachment_paths = st.session_state.get("current_attachments", [])
|
|
506
|
+
attachment_display_names = st.session_state.get("attachment_names", [])
|
|
507
|
+
if attachment_paths:
|
|
508
|
+
st.caption(f"Ready to send: {', '.join(attachment_display_names)}")
|
|
509
|
+
|
|
344
510
|
prompt = st.chat_input("Message the assistant")
|
|
345
511
|
if prompt:
|
|
346
|
-
|
|
512
|
+
# Clear attachments before rerun to prevent them from being sent again
|
|
513
|
+
st.session_state["current_attachments"] = []
|
|
514
|
+
st.session_state["attachment_names"] = []
|
|
515
|
+
_handle_user_message(
|
|
516
|
+
prompt,
|
|
517
|
+
config,
|
|
518
|
+
attachment_paths or None,
|
|
519
|
+
attachment_display_names or None,
|
|
520
|
+
)
|
|
347
521
|
|
|
348
522
|
|
|
349
523
|
if __name__ == "__main__":
|