openai-sdk-helpers 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. openai_sdk_helpers/__init__.py +85 -10
  2. openai_sdk_helpers/agent/__init__.py +8 -4
  3. openai_sdk_helpers/agent/base.py +81 -46
  4. openai_sdk_helpers/agent/config.py +6 -4
  5. openai_sdk_helpers/agent/{project_manager.py → coordination.py} +29 -45
  6. openai_sdk_helpers/agent/prompt_utils.py +7 -1
  7. openai_sdk_helpers/agent/runner.py +67 -141
  8. openai_sdk_helpers/agent/search/__init__.py +33 -0
  9. openai_sdk_helpers/agent/search/base.py +297 -0
  10. openai_sdk_helpers/agent/{vector_search.py → search/vector.py} +89 -157
  11. openai_sdk_helpers/agent/{web_search.py → search/web.py} +82 -162
  12. openai_sdk_helpers/agent/summarizer.py +29 -8
  13. openai_sdk_helpers/agent/translator.py +40 -13
  14. openai_sdk_helpers/agent/validation.py +32 -8
  15. openai_sdk_helpers/async_utils.py +132 -0
  16. openai_sdk_helpers/config.py +74 -36
  17. openai_sdk_helpers/context_manager.py +241 -0
  18. openai_sdk_helpers/enums/__init__.py +9 -1
  19. openai_sdk_helpers/enums/base.py +67 -8
  20. openai_sdk_helpers/environment.py +33 -6
  21. openai_sdk_helpers/errors.py +133 -0
  22. openai_sdk_helpers/logging_config.py +105 -0
  23. openai_sdk_helpers/prompt/__init__.py +10 -71
  24. openai_sdk_helpers/prompt/base.py +172 -0
  25. openai_sdk_helpers/response/__init__.py +37 -5
  26. openai_sdk_helpers/response/base.py +427 -189
  27. openai_sdk_helpers/response/config.py +176 -0
  28. openai_sdk_helpers/response/messages.py +104 -40
  29. openai_sdk_helpers/response/runner.py +79 -35
  30. openai_sdk_helpers/response/tool_call.py +75 -12
  31. openai_sdk_helpers/response/vector_store.py +29 -16
  32. openai_sdk_helpers/retry.py +175 -0
  33. openai_sdk_helpers/streamlit_app/__init__.py +30 -0
  34. openai_sdk_helpers/streamlit_app/app.py +345 -0
  35. openai_sdk_helpers/streamlit_app/config.py +502 -0
  36. openai_sdk_helpers/streamlit_app/streamlit_web_search.py +68 -0
  37. openai_sdk_helpers/structure/__init__.py +69 -3
  38. openai_sdk_helpers/structure/agent_blueprint.py +82 -19
  39. openai_sdk_helpers/structure/base.py +245 -91
  40. openai_sdk_helpers/structure/plan/__init__.py +15 -1
  41. openai_sdk_helpers/structure/plan/enum.py +41 -5
  42. openai_sdk_helpers/structure/plan/plan.py +101 -45
  43. openai_sdk_helpers/structure/plan/task.py +38 -6
  44. openai_sdk_helpers/structure/prompt.py +21 -2
  45. openai_sdk_helpers/structure/responses.py +52 -11
  46. openai_sdk_helpers/structure/summary.py +55 -7
  47. openai_sdk_helpers/structure/validation.py +34 -6
  48. openai_sdk_helpers/structure/vector_search.py +132 -18
  49. openai_sdk_helpers/structure/web_search.py +128 -12
  50. openai_sdk_helpers/types.py +57 -0
  51. openai_sdk_helpers/utils/__init__.py +32 -1
  52. openai_sdk_helpers/utils/core.py +200 -32
  53. openai_sdk_helpers/validation.py +302 -0
  54. openai_sdk_helpers/vector_storage/__init__.py +21 -1
  55. openai_sdk_helpers/vector_storage/cleanup.py +25 -13
  56. openai_sdk_helpers/vector_storage/storage.py +124 -66
  57. openai_sdk_helpers/vector_storage/types.py +20 -19
  58. openai_sdk_helpers-0.0.9.dist-info/METADATA +550 -0
  59. openai_sdk_helpers-0.0.9.dist-info/RECORD +66 -0
  60. openai_sdk_helpers-0.0.7.dist-info/METADATA +0 -193
  61. openai_sdk_helpers-0.0.7.dist-info/RECORD +0 -51
  62. {openai_sdk_helpers-0.0.7.dist-info → openai_sdk_helpers-0.0.9.dist-info}/WHEEL +0 -0
  63. {openai_sdk_helpers-0.0.7.dist-info → openai_sdk_helpers-0.0.9.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,9 @@
1
- """Wrapper utilities for managing OpenAI vector stores."""
1
+ """Wrapper utilities for managing OpenAI vector stores.
2
+
3
+ This module provides the VectorStorage class for high-level management of
4
+ OpenAI vector stores, including concurrent file uploads, semantic search,
5
+ and batch operations.
6
+ """
2
7
 
3
8
  from __future__ import annotations
4
9
 
@@ -7,7 +12,7 @@ import logging
7
12
  import mimetypes
8
13
  import os
9
14
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
- from typing import List, Optional, Union, cast
15
+ from typing import cast
11
16
 
12
17
  from openai import OpenAI
13
18
  from openai.pagination import SyncPage
@@ -15,6 +20,7 @@ from openai.types.vector_store import VectorStore
15
20
  from openai.types.vector_store_search_response import VectorStoreSearchResponse
16
21
  from tqdm import tqdm
17
22
 
23
+ from ..types import OpenAIClient
18
24
  from ..utils import ensure_list, log
19
25
  from .types import VectorStorageFileInfo, VectorStorageFileStats
20
26
 
@@ -43,6 +49,29 @@ ALLOWED_TEXT_MIME_TYPES = {
43
49
  class VectorStorage:
44
50
  """Manage an OpenAI vector store.
45
51
 
52
+ This class provides a high-level interface for managing OpenAI vector stores,
53
+ including file uploads, deletions, and semantic search operations. It handles
54
+ file caching, concurrent uploads, and automatic store creation.
55
+
56
+ Examples
57
+ --------
58
+ Basic usage:
59
+
60
+ >>> from openai_sdk_helpers.vector_storage import VectorStorage
61
+ >>> storage = VectorStorage(store_name="documents")
62
+ >>> storage.upload_file("research.pdf")
63
+ >>> results = storage.search("machine learning algorithms", top_k=5)
64
+
65
+ Batch file upload:
66
+
67
+ >>> patterns = ["docs/*.pdf", "papers/*.txt"]
68
+ >>> stats = storage.upload_files(patterns, overwrite=False)
69
+ >>> print(f"Uploaded {stats.uploaded} files")
70
+
71
+ Clean up:
72
+
73
+ >>> storage.delete() # Delete entire store and files
74
+
46
75
  Methods
47
76
  -------
48
77
  id()
@@ -68,19 +97,24 @@ class VectorStorage:
68
97
  def __init__(
69
98
  self,
70
99
  store_name: str,
71
- client: Optional[OpenAI] = None,
72
- model: Optional[str] = None,
100
+ client: OpenAIClient | None = None,
101
+ model: str | None = None,
73
102
  ) -> None:
74
103
  """Initialize the vector store helper.
75
104
 
105
+ Creates or connects to a named vector store using the OpenAI API.
106
+ Requires either a preconfigured client or OPENAI_API_KEY environment
107
+ variable.
108
+
76
109
  Parameters
77
110
  ----------
78
- store_name
111
+ store_name : str
79
112
  Name of the vector store to create or connect to.
80
- client
81
- Optional preconfigured ``OpenAI`` client. Default ``None``.
82
- model
83
- Embedding model identifier. Default ``None`` to read ``OPENAI_MODEL``.
113
+ client : OpenAIClient or None, optional
114
+ Preconfigured OpenAI-compatible client, by default None.
115
+ model : str or None, optional
116
+ Embedding model identifier. Reads OPENAI_MODEL env var if None,
117
+ by default None.
84
118
 
85
119
  Raises
86
120
  ------
@@ -89,6 +123,7 @@ class VectorStorage:
89
123
  RuntimeError
90
124
  If the OpenAI client cannot be initialized.
91
125
  """
126
+ self._client: OpenAIClient
92
127
  if client is None:
93
128
  api_key = os.getenv("OPENAI_API_KEY")
94
129
  if api_key is None:
@@ -105,7 +140,7 @@ class VectorStorage:
105
140
  raise ValueError("OpenAI model is required")
106
141
 
107
142
  self._vector_storage = self._get_or_create_vector_storage(store_name)
108
- self._existing_files: Optional[dict[str, str]] = {}
143
+ self._existing_files: dict[str, str] | None = {}
109
144
 
110
145
  @property
111
146
  def id(self) -> str:
@@ -121,9 +156,12 @@ class VectorStorage:
121
156
  def _get_or_create_vector_storage(self, store_name: str) -> VectorStore:
122
157
  """Retrieve an existing vector store or create one if it does not exist.
123
158
 
159
+ Searches for an existing vector store with the specified name. If not
160
+ found, creates a new one.
161
+
124
162
  Parameters
125
163
  ----------
126
- store_name
164
+ store_name : str
127
165
  Desired name of the vector store.
128
166
 
129
167
  Returns
@@ -191,28 +229,30 @@ class VectorStorage:
191
229
  self,
192
230
  file_path: str,
193
231
  purpose: str = "assistants",
194
- attributes: Optional[dict[str, str | float | bool]] = None,
232
+ attributes: dict[str, str | float | bool] | None = None,
195
233
  overwrite: bool = False,
196
234
  refresh_cache: bool = False,
197
235
  ) -> VectorStorageFileInfo:
198
236
  """Upload a single file to the vector store.
199
237
 
238
+ Handles text and binary files with automatic encoding detection.
239
+ Skips upload if file already exists unless overwrite is True.
240
+
200
241
  Parameters
201
242
  ----------
202
- file_path
243
+ file_path : str
203
244
  Local path to the file to upload.
204
- purpose
205
- Purpose of the file (for example ``"assistants"``). Default
206
- ``"assistants"``.
207
- attributes
208
- Custom attributes to associate with the file. The ``file_name``
209
- attribute is added automatically. Default ``None``.
210
- overwrite
211
- When ``True``, re-upload even if a file with the same name already
212
- exists. Default ``False``.
213
- refresh_cache
214
- When ``True``, refresh the local cache of existing files before
215
- checking for duplicates. Default ``False``.
245
+ purpose : str, optional
246
+ Purpose of the file (e.g., "assistants"), by default "assistants".
247
+ attributes : dict[str, str | float | bool] or None, optional
248
+ Custom attributes to associate with the file. The file_name
249
+ attribute is added automatically, by default None.
250
+ overwrite : bool, optional
251
+ When True, re-upload even if a file with the same name exists,
252
+ by default False.
253
+ refresh_cache : bool, optional
254
+ When True, refresh the local cache of existing files before
255
+ checking for duplicates, by default False.
216
256
 
217
257
  Returns
218
258
  -------
@@ -275,25 +315,28 @@ class VectorStorage:
275
315
 
276
316
  def upload_files(
277
317
  self,
278
- file_patterns: Union[str, List[str]],
318
+ file_patterns: str | list[str],
279
319
  purpose: str = "assistants",
280
- attributes: Optional[dict[str, str | float | bool]] = None,
320
+ attributes: dict[str, str | float | bool] | None = None,
281
321
  overwrite: bool = False,
282
322
  ) -> VectorStorageFileStats:
283
- """Upload files matching glob patterns to the vector store using a thread pool.
323
+ """Upload files matching glob patterns using a thread pool.
324
+
325
+ Expands glob patterns to find matching files and uploads them
326
+ concurrently using up to 10 worker threads. Shows progress bar
327
+ during upload.
284
328
 
285
329
  Parameters
286
330
  ----------
287
- file_patterns
288
- Glob pattern or list of patterns (for example
289
- ``'/path/to/files/**/*.txt'``).
290
- purpose
291
- Purpose assigned to uploaded files. Default ``"assistants"``.
292
- attributes
293
- Custom attributes to associate with each file. Default ``None``.
294
- overwrite
295
- When ``True``, re-upload files even if files with the same name
296
- already exist. Default ``False``.
331
+ file_patterns : str or list[str]
332
+ Glob pattern or list of patterns (e.g., '/path/**/*.txt').
333
+ purpose : str, optional
334
+ Purpose assigned to uploaded files, by default "assistants".
335
+ attributes : dict[str, str | float | bool] or None, optional
336
+ Custom attributes to associate with each file, by default None.
337
+ overwrite : bool, optional
338
+ When True, re-upload files even if files with the same name
339
+ exist, by default False.
297
340
 
298
341
  Returns
299
342
  -------
@@ -349,16 +392,19 @@ class VectorStorage:
349
392
  def delete_file(self, file_id: str) -> VectorStorageFileInfo:
350
393
  """Delete a specific file from the vector store.
351
394
 
395
+ Removes the file from the vector store and updates the local cache.
396
+ The operation is irreversible.
397
+
352
398
  Parameters
353
399
  ----------
354
- file_id
400
+ file_id : str
355
401
  Identifier of the file to delete.
356
402
 
357
403
  Returns
358
404
  -------
359
405
  VectorStorageFileInfo
360
406
  Information about the deletion operation with status
361
- ``"success"`` or ``"failed"``.
407
+ "success" or "failed".
362
408
  """
363
409
  try:
364
410
  self._client.vector_stores.files.delete(
@@ -378,12 +424,16 @@ class VectorStorage:
378
424
  name="", id=file_id, status="failed", error=str(exc)
379
425
  )
380
426
 
381
- def delete_files(self, file_ids: List[str]) -> VectorStorageFileStats:
382
- """Delete multiple files from the vector store using a thread pool.
427
+ def delete_files(self, file_ids: list[str]) -> VectorStorageFileStats:
428
+ """Delete multiple files using a thread pool.
429
+
430
+ Performs concurrent deletions using up to 10 worker threads with
431
+ progress tracking. Updates the local cache for each successful
432
+ deletion.
383
433
 
384
434
  Parameters
385
435
  ----------
386
- file_ids
436
+ file_ids : list[str]
387
437
  List of file IDs to delete.
388
438
 
389
439
  Returns
@@ -413,13 +463,11 @@ class VectorStorage:
413
463
  def delete(self) -> None:
414
464
  """Delete the entire vector store and all associated files.
415
465
 
416
- This operation is irreversible. It first attempts to delete each file
417
- individually from the store (and updates the local cache) before
418
- deleting the store itself.
466
+ Removes each file individually before deleting the store itself.
467
+ The local cache is cleared after deletion.
419
468
 
420
- Returns
421
- -------
422
- None
469
+ Warning: This operation is irreversible and will permanently delete
470
+ the vector store and all its files.
423
471
  """
424
472
  try:
425
473
  existing_files = list(self.existing_files.items())
@@ -440,11 +488,14 @@ class VectorStorage:
440
488
  def download_files(self, output_dir: str) -> VectorStorageFileStats:
441
489
  """Download every file in the vector store to a local directory.
442
490
 
491
+ Creates the output directory if needed. Uses file names from
492
+ attributes or falls back to file IDs.
493
+
443
494
  Parameters
444
495
  ----------
445
- output_dir
446
- Destination directory where the files will be written. The
447
- directory is created when it does not already exist.
496
+ output_dir : str
497
+ Destination directory where files will be written. Created if
498
+ it does not exist.
448
499
 
449
500
  Returns
450
501
  -------
@@ -502,20 +553,23 @@ class VectorStorage:
502
553
 
503
554
  def search(
504
555
  self, query: str, top_k: int = 5
505
- ) -> Optional[SyncPage[VectorStoreSearchResponse]]:
506
- """Perform a search within the vector store.
556
+ ) -> SyncPage[VectorStoreSearchResponse] | None:
557
+ """Perform a semantic search within the vector store.
558
+
559
+ Uses the configured embedding model to find the most relevant
560
+ documents matching the query.
507
561
 
508
562
  Parameters
509
563
  ----------
510
- query
564
+ query : str
511
565
  Search query string.
512
- top_k
513
- Maximum number of results to return. Default ``5``.
566
+ top_k : int, optional
567
+ Maximum number of results to return, by default 5.
514
568
 
515
569
  Returns
516
570
  -------
517
- Optional[SyncPage[VectorStoreSearchResponse]]
518
- Page of search results from the OpenAI API, or ``None`` if an
571
+ SyncPage[VectorStoreSearchResponse] or None
572
+ Page of search results from the OpenAI API, or None if an
519
573
  error occurs.
520
574
  """
521
575
  try:
@@ -529,26 +583,30 @@ class VectorStorage:
529
583
  log(f"Error searching vector store: {str(exc)}", level=logging.ERROR)
530
584
  return None
531
585
 
532
- def summarize(self, query: str, top_k: int = 15) -> Optional[str]:
586
+ def summarize(self, query: str, top_k: int = 15) -> str | None:
533
587
  """Perform a semantic search and summarize results by topic.
534
588
 
589
+ Retrieves top search results and generates a summary. This method
590
+ is designed to be overridden in application-specific wrappers.
591
+
535
592
  Parameters
536
593
  ----------
537
- query
594
+ query : str
538
595
  Search query string used for summarization.
539
- top_k
540
- Number of top search results to retrieve and summarize. Default ``15``.
596
+ top_k : int, optional
597
+ Number of top search results to retrieve and summarize,
598
+ by default 15.
541
599
 
542
600
  Returns
543
601
  -------
544
- Optional[str]
545
- Summary generated by the OpenAI model or ``None`` when no results
602
+ str or None
603
+ Summary generated by the OpenAI model, or None when no results
546
604
  are available or an error occurs.
547
605
 
548
606
  Raises
549
607
  ------
550
608
  RuntimeError
551
- If no summarizer is configured for this core helper.
609
+ If no summarizer is configured (default behavior).
552
610
  """
553
611
  response = self.search(query, top_k=top_k)
554
612
  if not response or not response.data:
@@ -1,15 +1,21 @@
1
- """Type definitions for vector storage."""
1
+ """Type definitions for vector storage.
2
+
3
+ This module defines data structures for tracking file operations and
4
+ statistics when working with OpenAI vector stores.
5
+ """
2
6
 
3
7
  from __future__ import annotations
4
8
 
5
9
  from dataclasses import dataclass, field
6
- from typing import List, Optional
7
10
 
8
11
 
9
12
  @dataclass
10
13
  class VectorStorageFileInfo:
11
14
  """Information about a file stored in a vector store.
12
15
 
16
+ Tracks the status and details of a single file operation within a
17
+ vector store, including upload, download, or deletion outcomes.
18
+
13
19
  Attributes
14
20
  ----------
15
21
  name : str
@@ -17,42 +23,37 @@ class VectorStorageFileInfo:
17
23
  id : str
18
24
  Unique identifier of the file in the vector store.
19
25
  status : str
20
- Outcome of the operation (for example ``"success"`` or ``"error"``).
21
- error : str, optional
22
- Error message when the operation fails. Default ``None``.
23
-
24
- Methods
25
- -------
26
- None
26
+ Outcome of the operation (e.g., "success", "error", "existing").
27
+ error : str or None, optional
28
+ Error message when the operation fails, by default None.
27
29
  """
28
30
 
29
31
  name: str
30
32
  id: str
31
33
  status: str
32
- error: Optional[str] = None
34
+ error: str | None = None
33
35
 
34
36
 
35
37
  @dataclass
36
38
  class VectorStorageFileStats:
37
39
  """Aggregate statistics about batch file operations.
38
40
 
41
+ Tracks outcomes for batch upload, download, or deletion operations
42
+ across multiple files in a vector store.
43
+
39
44
  Attributes
40
45
  ----------
41
46
  total : int
42
- Total number of files processed.
47
+ Total number of files processed, by default 0.
43
48
  success : int
44
- Number of files successfully handled.
49
+ Number of files successfully handled, by default 0.
45
50
  fail : int
46
- Number of files that failed to process.
51
+ Number of files that failed to process, by default 0.
47
52
  errors : list[VectorStorageFileInfo]
48
- Details for each failed file.
49
-
50
- Methods
51
- -------
52
- None
53
+ Details for each failed file, by default empty list.
53
54
  """
54
55
 
55
56
  total: int = 0
56
57
  success: int = 0
57
58
  fail: int = 0
58
- errors: List[VectorStorageFileInfo] = field(default_factory=list)
59
+ errors: list[VectorStorageFileInfo] = field(default_factory=list)