openai-sdk-helpers 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openai_sdk_helpers/__init__.py +85 -10
- openai_sdk_helpers/agent/__init__.py +8 -4
- openai_sdk_helpers/agent/base.py +81 -46
- openai_sdk_helpers/agent/config.py +6 -4
- openai_sdk_helpers/agent/{project_manager.py → coordination.py} +29 -45
- openai_sdk_helpers/agent/prompt_utils.py +7 -1
- openai_sdk_helpers/agent/runner.py +67 -141
- openai_sdk_helpers/agent/search/__init__.py +33 -0
- openai_sdk_helpers/agent/search/base.py +297 -0
- openai_sdk_helpers/agent/{vector_search.py → search/vector.py} +89 -157
- openai_sdk_helpers/agent/{web_search.py → search/web.py} +82 -162
- openai_sdk_helpers/agent/summarizer.py +29 -8
- openai_sdk_helpers/agent/translator.py +40 -13
- openai_sdk_helpers/agent/validation.py +32 -8
- openai_sdk_helpers/async_utils.py +132 -0
- openai_sdk_helpers/config.py +74 -36
- openai_sdk_helpers/context_manager.py +241 -0
- openai_sdk_helpers/enums/__init__.py +9 -1
- openai_sdk_helpers/enums/base.py +67 -8
- openai_sdk_helpers/environment.py +33 -6
- openai_sdk_helpers/errors.py +133 -0
- openai_sdk_helpers/logging_config.py +105 -0
- openai_sdk_helpers/prompt/__init__.py +10 -71
- openai_sdk_helpers/prompt/base.py +172 -0
- openai_sdk_helpers/response/__init__.py +37 -5
- openai_sdk_helpers/response/base.py +427 -189
- openai_sdk_helpers/response/config.py +176 -0
- openai_sdk_helpers/response/messages.py +104 -40
- openai_sdk_helpers/response/runner.py +79 -35
- openai_sdk_helpers/response/tool_call.py +75 -12
- openai_sdk_helpers/response/vector_store.py +29 -16
- openai_sdk_helpers/retry.py +175 -0
- openai_sdk_helpers/streamlit_app/__init__.py +30 -0
- openai_sdk_helpers/streamlit_app/app.py +345 -0
- openai_sdk_helpers/streamlit_app/config.py +502 -0
- openai_sdk_helpers/streamlit_app/streamlit_web_search.py +68 -0
- openai_sdk_helpers/structure/__init__.py +69 -3
- openai_sdk_helpers/structure/agent_blueprint.py +82 -19
- openai_sdk_helpers/structure/base.py +245 -91
- openai_sdk_helpers/structure/plan/__init__.py +15 -1
- openai_sdk_helpers/structure/plan/enum.py +41 -5
- openai_sdk_helpers/structure/plan/plan.py +101 -45
- openai_sdk_helpers/structure/plan/task.py +38 -6
- openai_sdk_helpers/structure/prompt.py +21 -2
- openai_sdk_helpers/structure/responses.py +52 -11
- openai_sdk_helpers/structure/summary.py +55 -7
- openai_sdk_helpers/structure/validation.py +34 -6
- openai_sdk_helpers/structure/vector_search.py +132 -18
- openai_sdk_helpers/structure/web_search.py +128 -12
- openai_sdk_helpers/types.py +57 -0
- openai_sdk_helpers/utils/__init__.py +32 -1
- openai_sdk_helpers/utils/core.py +200 -32
- openai_sdk_helpers/validation.py +302 -0
- openai_sdk_helpers/vector_storage/__init__.py +21 -1
- openai_sdk_helpers/vector_storage/cleanup.py +25 -13
- openai_sdk_helpers/vector_storage/storage.py +124 -66
- openai_sdk_helpers/vector_storage/types.py +20 -19
- openai_sdk_helpers-0.0.9.dist-info/METADATA +550 -0
- openai_sdk_helpers-0.0.9.dist-info/RECORD +66 -0
- openai_sdk_helpers-0.0.7.dist-info/METADATA +0 -193
- openai_sdk_helpers-0.0.7.dist-info/RECORD +0 -51
- {openai_sdk_helpers-0.0.7.dist-info → openai_sdk_helpers-0.0.9.dist-info}/WHEEL +0 -0
- {openai_sdk_helpers-0.0.7.dist-info → openai_sdk_helpers-0.0.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
"""Wrapper utilities for managing OpenAI vector stores.
|
|
1
|
+
"""Wrapper utilities for managing OpenAI vector stores.
|
|
2
|
+
|
|
3
|
+
This module provides the VectorStorage class for high-level management of
|
|
4
|
+
OpenAI vector stores, including concurrent file uploads, semantic search,
|
|
5
|
+
and batch operations.
|
|
6
|
+
"""
|
|
2
7
|
|
|
3
8
|
from __future__ import annotations
|
|
4
9
|
|
|
@@ -7,7 +12,7 @@ import logging
|
|
|
7
12
|
import mimetypes
|
|
8
13
|
import os
|
|
9
14
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
10
|
-
from typing import
|
|
15
|
+
from typing import cast
|
|
11
16
|
|
|
12
17
|
from openai import OpenAI
|
|
13
18
|
from openai.pagination import SyncPage
|
|
@@ -15,6 +20,7 @@ from openai.types.vector_store import VectorStore
|
|
|
15
20
|
from openai.types.vector_store_search_response import VectorStoreSearchResponse
|
|
16
21
|
from tqdm import tqdm
|
|
17
22
|
|
|
23
|
+
from ..types import OpenAIClient
|
|
18
24
|
from ..utils import ensure_list, log
|
|
19
25
|
from .types import VectorStorageFileInfo, VectorStorageFileStats
|
|
20
26
|
|
|
@@ -43,6 +49,29 @@ ALLOWED_TEXT_MIME_TYPES = {
|
|
|
43
49
|
class VectorStorage:
|
|
44
50
|
"""Manage an OpenAI vector store.
|
|
45
51
|
|
|
52
|
+
This class provides a high-level interface for managing OpenAI vector stores,
|
|
53
|
+
including file uploads, deletions, and semantic search operations. It handles
|
|
54
|
+
file caching, concurrent uploads, and automatic store creation.
|
|
55
|
+
|
|
56
|
+
Examples
|
|
57
|
+
--------
|
|
58
|
+
Basic usage:
|
|
59
|
+
|
|
60
|
+
>>> from openai_sdk_helpers.vector_storage import VectorStorage
|
|
61
|
+
>>> storage = VectorStorage(store_name="documents")
|
|
62
|
+
>>> storage.upload_file("research.pdf")
|
|
63
|
+
>>> results = storage.search("machine learning algorithms", top_k=5)
|
|
64
|
+
|
|
65
|
+
Batch file upload:
|
|
66
|
+
|
|
67
|
+
>>> patterns = ["docs/*.pdf", "papers/*.txt"]
|
|
68
|
+
>>> stats = storage.upload_files(patterns, overwrite=False)
|
|
69
|
+
>>> print(f"Uploaded {stats.uploaded} files")
|
|
70
|
+
|
|
71
|
+
Clean up:
|
|
72
|
+
|
|
73
|
+
>>> storage.delete() # Delete entire store and files
|
|
74
|
+
|
|
46
75
|
Methods
|
|
47
76
|
-------
|
|
48
77
|
id()
|
|
@@ -68,19 +97,24 @@ class VectorStorage:
|
|
|
68
97
|
def __init__(
|
|
69
98
|
self,
|
|
70
99
|
store_name: str,
|
|
71
|
-
client:
|
|
72
|
-
model:
|
|
100
|
+
client: OpenAIClient | None = None,
|
|
101
|
+
model: str | None = None,
|
|
73
102
|
) -> None:
|
|
74
103
|
"""Initialize the vector store helper.
|
|
75
104
|
|
|
105
|
+
Creates or connects to a named vector store using the OpenAI API.
|
|
106
|
+
Requires either a preconfigured client or OPENAI_API_KEY environment
|
|
107
|
+
variable.
|
|
108
|
+
|
|
76
109
|
Parameters
|
|
77
110
|
----------
|
|
78
|
-
store_name
|
|
111
|
+
store_name : str
|
|
79
112
|
Name of the vector store to create or connect to.
|
|
80
|
-
client
|
|
81
|
-
|
|
82
|
-
model
|
|
83
|
-
Embedding model identifier.
|
|
113
|
+
client : OpenAIClient or None, optional
|
|
114
|
+
Preconfigured OpenAI-compatible client, by default None.
|
|
115
|
+
model : str or None, optional
|
|
116
|
+
Embedding model identifier. Reads OPENAI_MODEL env var if None,
|
|
117
|
+
by default None.
|
|
84
118
|
|
|
85
119
|
Raises
|
|
86
120
|
------
|
|
@@ -89,6 +123,7 @@ class VectorStorage:
|
|
|
89
123
|
RuntimeError
|
|
90
124
|
If the OpenAI client cannot be initialized.
|
|
91
125
|
"""
|
|
126
|
+
self._client: OpenAIClient
|
|
92
127
|
if client is None:
|
|
93
128
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
94
129
|
if api_key is None:
|
|
@@ -105,7 +140,7 @@ class VectorStorage:
|
|
|
105
140
|
raise ValueError("OpenAI model is required")
|
|
106
141
|
|
|
107
142
|
self._vector_storage = self._get_or_create_vector_storage(store_name)
|
|
108
|
-
self._existing_files:
|
|
143
|
+
self._existing_files: dict[str, str] | None = {}
|
|
109
144
|
|
|
110
145
|
@property
|
|
111
146
|
def id(self) -> str:
|
|
@@ -121,9 +156,12 @@ class VectorStorage:
|
|
|
121
156
|
def _get_or_create_vector_storage(self, store_name: str) -> VectorStore:
|
|
122
157
|
"""Retrieve an existing vector store or create one if it does not exist.
|
|
123
158
|
|
|
159
|
+
Searches for an existing vector store with the specified name. If not
|
|
160
|
+
found, creates a new one.
|
|
161
|
+
|
|
124
162
|
Parameters
|
|
125
163
|
----------
|
|
126
|
-
store_name
|
|
164
|
+
store_name : str
|
|
127
165
|
Desired name of the vector store.
|
|
128
166
|
|
|
129
167
|
Returns
|
|
@@ -191,28 +229,30 @@ class VectorStorage:
|
|
|
191
229
|
self,
|
|
192
230
|
file_path: str,
|
|
193
231
|
purpose: str = "assistants",
|
|
194
|
-
attributes:
|
|
232
|
+
attributes: dict[str, str | float | bool] | None = None,
|
|
195
233
|
overwrite: bool = False,
|
|
196
234
|
refresh_cache: bool = False,
|
|
197
235
|
) -> VectorStorageFileInfo:
|
|
198
236
|
"""Upload a single file to the vector store.
|
|
199
237
|
|
|
238
|
+
Handles text and binary files with automatic encoding detection.
|
|
239
|
+
Skips upload if file already exists unless overwrite is True.
|
|
240
|
+
|
|
200
241
|
Parameters
|
|
201
242
|
----------
|
|
202
|
-
file_path
|
|
243
|
+
file_path : str
|
|
203
244
|
Local path to the file to upload.
|
|
204
|
-
purpose
|
|
205
|
-
Purpose of the file (
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
checking for duplicates. Default ``False``.
|
|
245
|
+
purpose : str, optional
|
|
246
|
+
Purpose of the file (e.g., "assistants"), by default "assistants".
|
|
247
|
+
attributes : dict[str, str | float | bool] or None, optional
|
|
248
|
+
Custom attributes to associate with the file. The file_name
|
|
249
|
+
attribute is added automatically, by default None.
|
|
250
|
+
overwrite : bool, optional
|
|
251
|
+
When True, re-upload even if a file with the same name exists,
|
|
252
|
+
by default False.
|
|
253
|
+
refresh_cache : bool, optional
|
|
254
|
+
When True, refresh the local cache of existing files before
|
|
255
|
+
checking for duplicates, by default False.
|
|
216
256
|
|
|
217
257
|
Returns
|
|
218
258
|
-------
|
|
@@ -275,25 +315,28 @@ class VectorStorage:
|
|
|
275
315
|
|
|
276
316
|
def upload_files(
|
|
277
317
|
self,
|
|
278
|
-
file_patterns:
|
|
318
|
+
file_patterns: str | list[str],
|
|
279
319
|
purpose: str = "assistants",
|
|
280
|
-
attributes:
|
|
320
|
+
attributes: dict[str, str | float | bool] | None = None,
|
|
281
321
|
overwrite: bool = False,
|
|
282
322
|
) -> VectorStorageFileStats:
|
|
283
|
-
"""Upload files matching glob patterns
|
|
323
|
+
"""Upload files matching glob patterns using a thread pool.
|
|
324
|
+
|
|
325
|
+
Expands glob patterns to find matching files and uploads them
|
|
326
|
+
concurrently using up to 10 worker threads. Shows progress bar
|
|
327
|
+
during upload.
|
|
284
328
|
|
|
285
329
|
Parameters
|
|
286
330
|
----------
|
|
287
|
-
file_patterns
|
|
288
|
-
Glob pattern or list of patterns (
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
already exist. Default ``False``.
|
|
331
|
+
file_patterns : str or list[str]
|
|
332
|
+
Glob pattern or list of patterns (e.g., '/path/**/*.txt').
|
|
333
|
+
purpose : str, optional
|
|
334
|
+
Purpose assigned to uploaded files, by default "assistants".
|
|
335
|
+
attributes : dict[str, str | float | bool] or None, optional
|
|
336
|
+
Custom attributes to associate with each file, by default None.
|
|
337
|
+
overwrite : bool, optional
|
|
338
|
+
When True, re-upload files even if files with the same name
|
|
339
|
+
exist, by default False.
|
|
297
340
|
|
|
298
341
|
Returns
|
|
299
342
|
-------
|
|
@@ -349,16 +392,19 @@ class VectorStorage:
|
|
|
349
392
|
def delete_file(self, file_id: str) -> VectorStorageFileInfo:
|
|
350
393
|
"""Delete a specific file from the vector store.
|
|
351
394
|
|
|
395
|
+
Removes the file from the vector store and updates the local cache.
|
|
396
|
+
The operation is irreversible.
|
|
397
|
+
|
|
352
398
|
Parameters
|
|
353
399
|
----------
|
|
354
|
-
file_id
|
|
400
|
+
file_id : str
|
|
355
401
|
Identifier of the file to delete.
|
|
356
402
|
|
|
357
403
|
Returns
|
|
358
404
|
-------
|
|
359
405
|
VectorStorageFileInfo
|
|
360
406
|
Information about the deletion operation with status
|
|
361
|
-
|
|
407
|
+
"success" or "failed".
|
|
362
408
|
"""
|
|
363
409
|
try:
|
|
364
410
|
self._client.vector_stores.files.delete(
|
|
@@ -378,12 +424,16 @@ class VectorStorage:
|
|
|
378
424
|
name="", id=file_id, status="failed", error=str(exc)
|
|
379
425
|
)
|
|
380
426
|
|
|
381
|
-
def delete_files(self, file_ids:
|
|
382
|
-
"""Delete multiple files
|
|
427
|
+
def delete_files(self, file_ids: list[str]) -> VectorStorageFileStats:
|
|
428
|
+
"""Delete multiple files using a thread pool.
|
|
429
|
+
|
|
430
|
+
Performs concurrent deletions using up to 10 worker threads with
|
|
431
|
+
progress tracking. Updates the local cache for each successful
|
|
432
|
+
deletion.
|
|
383
433
|
|
|
384
434
|
Parameters
|
|
385
435
|
----------
|
|
386
|
-
file_ids
|
|
436
|
+
file_ids : list[str]
|
|
387
437
|
List of file IDs to delete.
|
|
388
438
|
|
|
389
439
|
Returns
|
|
@@ -413,13 +463,11 @@ class VectorStorage:
|
|
|
413
463
|
def delete(self) -> None:
|
|
414
464
|
"""Delete the entire vector store and all associated files.
|
|
415
465
|
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
deleting the store itself.
|
|
466
|
+
Removes each file individually before deleting the store itself.
|
|
467
|
+
The local cache is cleared after deletion.
|
|
419
468
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
None
|
|
469
|
+
Warning: This operation is irreversible and will permanently delete
|
|
470
|
+
the vector store and all its files.
|
|
423
471
|
"""
|
|
424
472
|
try:
|
|
425
473
|
existing_files = list(self.existing_files.items())
|
|
@@ -440,11 +488,14 @@ class VectorStorage:
|
|
|
440
488
|
def download_files(self, output_dir: str) -> VectorStorageFileStats:
|
|
441
489
|
"""Download every file in the vector store to a local directory.
|
|
442
490
|
|
|
491
|
+
Creates the output directory if needed. Uses file names from
|
|
492
|
+
attributes or falls back to file IDs.
|
|
493
|
+
|
|
443
494
|
Parameters
|
|
444
495
|
----------
|
|
445
|
-
output_dir
|
|
446
|
-
Destination directory where
|
|
447
|
-
|
|
496
|
+
output_dir : str
|
|
497
|
+
Destination directory where files will be written. Created if
|
|
498
|
+
it does not exist.
|
|
448
499
|
|
|
449
500
|
Returns
|
|
450
501
|
-------
|
|
@@ -502,20 +553,23 @@ class VectorStorage:
|
|
|
502
553
|
|
|
503
554
|
def search(
|
|
504
555
|
self, query: str, top_k: int = 5
|
|
505
|
-
) ->
|
|
506
|
-
"""Perform a search within the vector store.
|
|
556
|
+
) -> SyncPage[VectorStoreSearchResponse] | None:
|
|
557
|
+
"""Perform a semantic search within the vector store.
|
|
558
|
+
|
|
559
|
+
Uses the configured embedding model to find the most relevant
|
|
560
|
+
documents matching the query.
|
|
507
561
|
|
|
508
562
|
Parameters
|
|
509
563
|
----------
|
|
510
|
-
query
|
|
564
|
+
query : str
|
|
511
565
|
Search query string.
|
|
512
|
-
top_k
|
|
513
|
-
Maximum number of results to return
|
|
566
|
+
top_k : int, optional
|
|
567
|
+
Maximum number of results to return, by default 5.
|
|
514
568
|
|
|
515
569
|
Returns
|
|
516
570
|
-------
|
|
517
|
-
|
|
518
|
-
Page of search results from the OpenAI API, or
|
|
571
|
+
SyncPage[VectorStoreSearchResponse] or None
|
|
572
|
+
Page of search results from the OpenAI API, or None if an
|
|
519
573
|
error occurs.
|
|
520
574
|
"""
|
|
521
575
|
try:
|
|
@@ -529,26 +583,30 @@ class VectorStorage:
|
|
|
529
583
|
log(f"Error searching vector store: {str(exc)}", level=logging.ERROR)
|
|
530
584
|
return None
|
|
531
585
|
|
|
532
|
-
def summarize(self, query: str, top_k: int = 15) ->
|
|
586
|
+
def summarize(self, query: str, top_k: int = 15) -> str | None:
|
|
533
587
|
"""Perform a semantic search and summarize results by topic.
|
|
534
588
|
|
|
589
|
+
Retrieves top search results and generates a summary. This method
|
|
590
|
+
is designed to be overridden in application-specific wrappers.
|
|
591
|
+
|
|
535
592
|
Parameters
|
|
536
593
|
----------
|
|
537
|
-
query
|
|
594
|
+
query : str
|
|
538
595
|
Search query string used for summarization.
|
|
539
|
-
top_k
|
|
540
|
-
Number of top search results to retrieve and summarize
|
|
596
|
+
top_k : int, optional
|
|
597
|
+
Number of top search results to retrieve and summarize,
|
|
598
|
+
by default 15.
|
|
541
599
|
|
|
542
600
|
Returns
|
|
543
601
|
-------
|
|
544
|
-
|
|
545
|
-
Summary generated by the OpenAI model or
|
|
602
|
+
str or None
|
|
603
|
+
Summary generated by the OpenAI model, or None when no results
|
|
546
604
|
are available or an error occurs.
|
|
547
605
|
|
|
548
606
|
Raises
|
|
549
607
|
------
|
|
550
608
|
RuntimeError
|
|
551
|
-
If no summarizer is configured
|
|
609
|
+
If no summarizer is configured (default behavior).
|
|
552
610
|
"""
|
|
553
611
|
response = self.search(query, top_k=top_k)
|
|
554
612
|
if not response or not response.data:
|
|
@@ -1,15 +1,21 @@
|
|
|
1
|
-
"""Type definitions for vector storage.
|
|
1
|
+
"""Type definitions for vector storage.
|
|
2
|
+
|
|
3
|
+
This module defines data structures for tracking file operations and
|
|
4
|
+
statistics when working with OpenAI vector stores.
|
|
5
|
+
"""
|
|
2
6
|
|
|
3
7
|
from __future__ import annotations
|
|
4
8
|
|
|
5
9
|
from dataclasses import dataclass, field
|
|
6
|
-
from typing import List, Optional
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
@dataclass
|
|
10
13
|
class VectorStorageFileInfo:
|
|
11
14
|
"""Information about a file stored in a vector store.
|
|
12
15
|
|
|
16
|
+
Tracks the status and details of a single file operation within a
|
|
17
|
+
vector store, including upload, download, or deletion outcomes.
|
|
18
|
+
|
|
13
19
|
Attributes
|
|
14
20
|
----------
|
|
15
21
|
name : str
|
|
@@ -17,42 +23,37 @@ class VectorStorageFileInfo:
|
|
|
17
23
|
id : str
|
|
18
24
|
Unique identifier of the file in the vector store.
|
|
19
25
|
status : str
|
|
20
|
-
Outcome of the operation (
|
|
21
|
-
error : str, optional
|
|
22
|
-
Error message when the operation fails
|
|
23
|
-
|
|
24
|
-
Methods
|
|
25
|
-
-------
|
|
26
|
-
None
|
|
26
|
+
Outcome of the operation (e.g., "success", "error", "existing").
|
|
27
|
+
error : str or None, optional
|
|
28
|
+
Error message when the operation fails, by default None.
|
|
27
29
|
"""
|
|
28
30
|
|
|
29
31
|
name: str
|
|
30
32
|
id: str
|
|
31
33
|
status: str
|
|
32
|
-
error:
|
|
34
|
+
error: str | None = None
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
@dataclass
|
|
36
38
|
class VectorStorageFileStats:
|
|
37
39
|
"""Aggregate statistics about batch file operations.
|
|
38
40
|
|
|
41
|
+
Tracks outcomes for batch upload, download, or deletion operations
|
|
42
|
+
across multiple files in a vector store.
|
|
43
|
+
|
|
39
44
|
Attributes
|
|
40
45
|
----------
|
|
41
46
|
total : int
|
|
42
|
-
Total number of files processed.
|
|
47
|
+
Total number of files processed, by default 0.
|
|
43
48
|
success : int
|
|
44
|
-
Number of files successfully handled.
|
|
49
|
+
Number of files successfully handled, by default 0.
|
|
45
50
|
fail : int
|
|
46
|
-
Number of files that failed to process.
|
|
51
|
+
Number of files that failed to process, by default 0.
|
|
47
52
|
errors : list[VectorStorageFileInfo]
|
|
48
|
-
Details for each failed file.
|
|
49
|
-
|
|
50
|
-
Methods
|
|
51
|
-
-------
|
|
52
|
-
None
|
|
53
|
+
Details for each failed file, by default empty list.
|
|
53
54
|
"""
|
|
54
55
|
|
|
55
56
|
total: int = 0
|
|
56
57
|
success: int = 0
|
|
57
58
|
fail: int = 0
|
|
58
|
-
errors:
|
|
59
|
+
errors: list[VectorStorageFileInfo] = field(default_factory=list)
|