openai-sdk-helpers 0.0.8__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. openai_sdk_helpers/__init__.py +90 -2
  2. openai_sdk_helpers/agent/__init__.py +8 -4
  3. openai_sdk_helpers/agent/base.py +80 -45
  4. openai_sdk_helpers/agent/config.py +6 -4
  5. openai_sdk_helpers/agent/{project_manager.py → coordination.py} +29 -45
  6. openai_sdk_helpers/agent/prompt_utils.py +7 -1
  7. openai_sdk_helpers/agent/runner.py +67 -141
  8. openai_sdk_helpers/agent/search/__init__.py +33 -0
  9. openai_sdk_helpers/agent/search/base.py +297 -0
  10. openai_sdk_helpers/agent/{vector_search.py → search/vector.py} +89 -157
  11. openai_sdk_helpers/agent/{web_search.py → search/web.py} +77 -156
  12. openai_sdk_helpers/agent/summarizer.py +29 -8
  13. openai_sdk_helpers/agent/translator.py +40 -13
  14. openai_sdk_helpers/agent/validation.py +32 -8
  15. openai_sdk_helpers/async_utils.py +132 -0
  16. openai_sdk_helpers/config.py +101 -65
  17. openai_sdk_helpers/context_manager.py +241 -0
  18. openai_sdk_helpers/enums/__init__.py +9 -1
  19. openai_sdk_helpers/enums/base.py +67 -8
  20. openai_sdk_helpers/environment.py +33 -6
  21. openai_sdk_helpers/errors.py +133 -0
  22. openai_sdk_helpers/logging_config.py +105 -0
  23. openai_sdk_helpers/prompt/__init__.py +10 -71
  24. openai_sdk_helpers/prompt/base.py +222 -0
  25. openai_sdk_helpers/response/__init__.py +38 -3
  26. openai_sdk_helpers/response/base.py +363 -210
  27. openai_sdk_helpers/response/config.py +318 -0
  28. openai_sdk_helpers/response/messages.py +56 -40
  29. openai_sdk_helpers/response/runner.py +77 -33
  30. openai_sdk_helpers/response/tool_call.py +62 -27
  31. openai_sdk_helpers/response/vector_store.py +27 -14
  32. openai_sdk_helpers/retry.py +175 -0
  33. openai_sdk_helpers/streamlit_app/__init__.py +19 -2
  34. openai_sdk_helpers/streamlit_app/app.py +114 -39
  35. openai_sdk_helpers/streamlit_app/config.py +502 -0
  36. openai_sdk_helpers/streamlit_app/streamlit_web_search.py +5 -6
  37. openai_sdk_helpers/structure/__init__.py +72 -3
  38. openai_sdk_helpers/structure/agent_blueprint.py +82 -19
  39. openai_sdk_helpers/structure/base.py +208 -93
  40. openai_sdk_helpers/structure/plan/__init__.py +29 -1
  41. openai_sdk_helpers/structure/plan/enum.py +41 -5
  42. openai_sdk_helpers/structure/plan/helpers.py +172 -0
  43. openai_sdk_helpers/structure/plan/plan.py +109 -49
  44. openai_sdk_helpers/structure/plan/task.py +38 -6
  45. openai_sdk_helpers/structure/plan/types.py +15 -0
  46. openai_sdk_helpers/structure/prompt.py +21 -2
  47. openai_sdk_helpers/structure/responses.py +52 -11
  48. openai_sdk_helpers/structure/summary.py +55 -7
  49. openai_sdk_helpers/structure/validation.py +34 -6
  50. openai_sdk_helpers/structure/vector_search.py +132 -18
  51. openai_sdk_helpers/structure/web_search.py +125 -13
  52. openai_sdk_helpers/tools.py +193 -0
  53. openai_sdk_helpers/types.py +57 -0
  54. openai_sdk_helpers/utils/__init__.py +34 -1
  55. openai_sdk_helpers/utils/core.py +296 -34
  56. openai_sdk_helpers/validation.py +302 -0
  57. openai_sdk_helpers/vector_storage/__init__.py +21 -1
  58. openai_sdk_helpers/vector_storage/cleanup.py +25 -13
  59. openai_sdk_helpers/vector_storage/storage.py +123 -64
  60. openai_sdk_helpers/vector_storage/types.py +20 -19
  61. openai_sdk_helpers-0.1.0.dist-info/METADATA +550 -0
  62. openai_sdk_helpers-0.1.0.dist-info/RECORD +69 -0
  63. openai_sdk_helpers/streamlit_app/configuration.py +0 -324
  64. openai_sdk_helpers-0.0.8.dist-info/METADATA +0 -194
  65. openai_sdk_helpers-0.0.8.dist-info/RECORD +0 -55
  66. {openai_sdk_helpers-0.0.8.dist-info → openai_sdk_helpers-0.1.0.dist-info}/WHEEL +0 -0
  67. {openai_sdk_helpers-0.0.8.dist-info → openai_sdk_helpers-0.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,9 @@
1
- """Wrapper utilities for managing OpenAI vector stores."""
1
+ """Wrapper utilities for managing OpenAI vector stores.
2
+
3
+ This module provides the VectorStorage class for high-level management of
4
+ OpenAI vector stores, including concurrent file uploads, semantic search,
5
+ and batch operations.
6
+ """
2
7
 
3
8
  from __future__ import annotations
4
9
 
@@ -7,7 +12,7 @@ import logging
7
12
  import mimetypes
8
13
  import os
9
14
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
- from typing import List, Optional, Union, cast
15
+ from typing import cast
11
16
 
12
17
  from openai import OpenAI
13
18
  from openai.pagination import SyncPage
@@ -15,6 +20,7 @@ from openai.types.vector_store import VectorStore
15
20
  from openai.types.vector_store_search_response import VectorStoreSearchResponse
16
21
  from tqdm import tqdm
17
22
 
23
+ from ..types import OpenAIClient
18
24
  from ..utils import ensure_list, log
19
25
  from .types import VectorStorageFileInfo, VectorStorageFileStats
20
26
 
@@ -43,6 +49,29 @@ ALLOWED_TEXT_MIME_TYPES = {
43
49
  class VectorStorage:
44
50
  """Manage an OpenAI vector store.
45
51
 
52
+ This class provides a high-level interface for managing OpenAI vector stores,
53
+ including file uploads, deletions, and semantic search operations. It handles
54
+ file caching, concurrent uploads, and automatic store creation.
55
+
56
+ Examples
57
+ --------
58
+ Basic usage:
59
+
60
+ >>> from openai_sdk_helpers.vector_storage import VectorStorage
61
+ >>> storage = VectorStorage(store_name="documents")
62
+ >>> storage.upload_file("research.pdf")
63
+ >>> results = storage.search("machine learning algorithms", top_k=5)
64
+
65
+ Batch file upload:
66
+
67
+ >>> patterns = ["docs/*.pdf", "papers/*.txt"]
68
+ >>> stats = storage.upload_files(patterns, overwrite=False)
69
+ >>> print(f"Uploaded {stats.uploaded} files")
70
+
71
+ Clean up:
72
+
73
+ >>> storage.delete() # Delete entire store and files
74
+
46
75
  Methods
47
76
  -------
48
77
  id()
@@ -68,19 +97,24 @@ class VectorStorage:
68
97
  def __init__(
69
98
  self,
70
99
  store_name: str,
71
- client: Optional[OpenAI] = None,
72
- model: Optional[str] = None,
100
+ client: OpenAIClient | None = None,
101
+ model: str | None = None,
73
102
  ) -> None:
74
103
  """Initialize the vector store helper.
75
104
 
105
+ Creates or connects to a named vector store using the OpenAI API.
106
+ Requires either a preconfigured client or OPENAI_API_KEY environment
107
+ variable.
108
+
76
109
  Parameters
77
110
  ----------
78
- store_name
111
+ store_name : str
79
112
  Name of the vector store to create or connect to.
80
- client
81
- Optional preconfigured ``OpenAI`` client. Default ``None``.
82
- model
83
- Embedding model identifier. Default ``None`` to read ``OPENAI_MODEL``.
113
+ client : OpenAIClient or None, optional
114
+ Preconfigured OpenAI-compatible client, by default None.
115
+ model : str or None, optional
116
+ Embedding model identifier. Reads OPENAI_MODEL env var if None,
117
+ by default None.
84
118
 
85
119
  Raises
86
120
  ------
@@ -89,6 +123,7 @@ class VectorStorage:
89
123
  RuntimeError
90
124
  If the OpenAI client cannot be initialized.
91
125
  """
126
+ self._client: OpenAIClient
92
127
  if client is None:
93
128
  api_key = os.getenv("OPENAI_API_KEY")
94
129
  if api_key is None:
@@ -105,7 +140,7 @@ class VectorStorage:
105
140
  raise ValueError("OpenAI model is required")
106
141
 
107
142
  self._vector_storage = self._get_or_create_vector_storage(store_name)
108
- self._existing_files: Optional[dict[str, str]] = {}
143
+ self._existing_files: dict[str, str] | None = {}
109
144
 
110
145
  @property
111
146
  def id(self) -> str:
@@ -121,9 +156,12 @@ class VectorStorage:
121
156
  def _get_or_create_vector_storage(self, store_name: str) -> VectorStore:
122
157
  """Retrieve an existing vector store or create one if it does not exist.
123
158
 
159
+ Searches for an existing vector store with the specified name. If not
160
+ found, creates a new one.
161
+
124
162
  Parameters
125
163
  ----------
126
- store_name
164
+ store_name : str
127
165
  Desired name of the vector store.
128
166
 
129
167
  Returns
@@ -191,27 +229,30 @@ class VectorStorage:
191
229
  self,
192
230
  file_path: str,
193
231
  purpose: str = "assistants",
194
- attributes: Optional[dict[str, str | float | bool]] = None,
232
+ attributes: dict[str, str | float | bool] | None = None,
195
233
  overwrite: bool = False,
196
234
  refresh_cache: bool = False,
197
235
  ) -> VectorStorageFileInfo:
198
236
  """Upload a single file to the vector store.
199
237
 
238
+ Handles text and binary files with automatic encoding detection.
239
+ Skips upload if file already exists unless overwrite is True.
240
+
200
241
  Parameters
201
242
  ----------
202
243
  file_path : str
203
244
  Local path to the file to upload.
204
- purpose : str, default "assistants"
205
- Purpose of the file (for example ``"assistants"``).
206
- attributes : dict or None, default None
207
- Custom attributes to associate with the file. The ``file_name``
208
- attribute is added automatically.
209
- overwrite : bool, default False
210
- When ``True``, re-upload even if a file with the same name already
211
- exists.
212
- refresh_cache : bool, default False
213
- When ``True``, refresh the local cache of existing files before
214
- checking for duplicates.
245
+ purpose : str, optional
246
+ Purpose of the file (e.g., "assistants"), by default "assistants".
247
+ attributes : dict[str, str | float | bool] or None, optional
248
+ Custom attributes to associate with the file. The file_name
249
+ attribute is added automatically, by default None.
250
+ overwrite : bool, optional
251
+ When True, re-upload even if a file with the same name exists,
252
+ by default False.
253
+ refresh_cache : bool, optional
254
+ When True, refresh the local cache of existing files before
255
+ checking for duplicates, by default False.
215
256
 
216
257
  Returns
217
258
  -------
@@ -274,25 +315,28 @@ class VectorStorage:
274
315
 
275
316
  def upload_files(
276
317
  self,
277
- file_patterns: Union[str, List[str]],
318
+ file_patterns: str | list[str],
278
319
  purpose: str = "assistants",
279
- attributes: Optional[dict[str, str | float | bool]] = None,
320
+ attributes: dict[str, str | float | bool] | None = None,
280
321
  overwrite: bool = False,
281
322
  ) -> VectorStorageFileStats:
282
- """Upload files matching glob patterns to the vector store using a thread pool.
323
+ """Upload files matching glob patterns using a thread pool.
324
+
325
+ Expands glob patterns to find matching files and uploads them
326
+ concurrently using up to 10 worker threads. Shows progress bar
327
+ during upload.
283
328
 
284
329
  Parameters
285
330
  ----------
286
- file_patterns : str or list of str
287
- Glob pattern or list of patterns (for example
288
- ``'/path/to/files/**/*.txt'``).
289
- purpose : str, default "assistants"
290
- Purpose assigned to uploaded files.
291
- attributes : dict or None, default None
292
- Custom attributes to associate with each file.
293
- overwrite : bool, default False
294
- When ``True``, re-upload files even if files with the same name
295
- already exist.
331
+ file_patterns : str or list[str]
332
+ Glob pattern or list of patterns (e.g., '/path/**/*.txt').
333
+ purpose : str, optional
334
+ Purpose assigned to uploaded files, by default "assistants".
335
+ attributes : dict[str, str | float | bool] or None, optional
336
+ Custom attributes to associate with each file, by default None.
337
+ overwrite : bool, optional
338
+ When True, re-upload files even if files with the same name
339
+ exist, by default False.
296
340
 
297
341
  Returns
298
342
  -------
@@ -348,16 +392,19 @@ class VectorStorage:
348
392
  def delete_file(self, file_id: str) -> VectorStorageFileInfo:
349
393
  """Delete a specific file from the vector store.
350
394
 
395
+ Removes the file from the vector store and updates the local cache.
396
+ The operation is irreversible.
397
+
351
398
  Parameters
352
399
  ----------
353
- file_id
400
+ file_id : str
354
401
  Identifier of the file to delete.
355
402
 
356
403
  Returns
357
404
  -------
358
405
  VectorStorageFileInfo
359
406
  Information about the deletion operation with status
360
- ``"success"`` or ``"failed"``.
407
+ "success" or "failed".
361
408
  """
362
409
  try:
363
410
  self._client.vector_stores.files.delete(
@@ -377,12 +424,16 @@ class VectorStorage:
377
424
  name="", id=file_id, status="failed", error=str(exc)
378
425
  )
379
426
 
380
- def delete_files(self, file_ids: List[str]) -> VectorStorageFileStats:
381
- """Delete multiple files from the vector store using a thread pool.
427
+ def delete_files(self, file_ids: list[str]) -> VectorStorageFileStats:
428
+ """Delete multiple files using a thread pool.
429
+
430
+ Performs concurrent deletions using up to 10 worker threads with
431
+ progress tracking. Updates the local cache for each successful
432
+ deletion.
382
433
 
383
434
  Parameters
384
435
  ----------
385
- file_ids
436
+ file_ids : list[str]
386
437
  List of file IDs to delete.
387
438
 
388
439
  Returns
@@ -412,13 +463,11 @@ class VectorStorage:
412
463
  def delete(self) -> None:
413
464
  """Delete the entire vector store and all associated files.
414
465
 
415
- This operation is irreversible. It first attempts to delete each file
416
- individually from the store (and updates the local cache) before
417
- deleting the store itself.
466
+ Removes each file individually before deleting the store itself.
467
+ The local cache is cleared after deletion.
418
468
 
419
- Returns
420
- -------
421
- None
469
+ Warning: This operation is irreversible and will permanently delete
470
+ the vector store and all its files.
422
471
  """
423
472
  try:
424
473
  existing_files = list(self.existing_files.items())
@@ -439,11 +488,14 @@ class VectorStorage:
439
488
  def download_files(self, output_dir: str) -> VectorStorageFileStats:
440
489
  """Download every file in the vector store to a local directory.
441
490
 
491
+ Creates the output directory if needed. Uses file names from
492
+ attributes or falls back to file IDs.
493
+
442
494
  Parameters
443
495
  ----------
444
- output_dir
445
- Destination directory where the files will be written. The
446
- directory is created when it does not already exist.
496
+ output_dir : str
497
+ Destination directory where files will be written. Created if
498
+ it does not exist.
447
499
 
448
500
  Returns
449
501
  -------
@@ -501,20 +553,23 @@ class VectorStorage:
501
553
 
502
554
  def search(
503
555
  self, query: str, top_k: int = 5
504
- ) -> Optional[SyncPage[VectorStoreSearchResponse]]:
505
- """Perform a search within the vector store.
556
+ ) -> SyncPage[VectorStoreSearchResponse] | None:
557
+ """Perform a semantic search within the vector store.
558
+
559
+ Uses the configured embedding model to find the most relevant
560
+ documents matching the query.
506
561
 
507
562
  Parameters
508
563
  ----------
509
- query
564
+ query : str
510
565
  Search query string.
511
- top_k
512
- Maximum number of results to return. Default ``5``.
566
+ top_k : int, optional
567
+ Maximum number of results to return, by default 5.
513
568
 
514
569
  Returns
515
570
  -------
516
- Optional[SyncPage[VectorStoreSearchResponse]]
517
- Page of search results from the OpenAI API, or ``None`` if an
571
+ SyncPage[VectorStoreSearchResponse] or None
572
+ Page of search results from the OpenAI API, or None if an
518
573
  error occurs.
519
574
  """
520
575
  try:
@@ -528,26 +583,30 @@ class VectorStorage:
528
583
  log(f"Error searching vector store: {str(exc)}", level=logging.ERROR)
529
584
  return None
530
585
 
531
- def summarize(self, query: str, top_k: int = 15) -> Optional[str]:
586
+ def summarize(self, query: str, top_k: int = 15) -> str | None:
532
587
  """Perform a semantic search and summarize results by topic.
533
588
 
589
+ Retrieves top search results and generates a summary. This method
590
+ is designed to be overridden in application-specific wrappers.
591
+
534
592
  Parameters
535
593
  ----------
536
- query
594
+ query : str
537
595
  Search query string used for summarization.
538
- top_k
539
- Number of top search results to retrieve and summarize. Default ``15``.
596
+ top_k : int, optional
597
+ Number of top search results to retrieve and summarize,
598
+ by default 15.
540
599
 
541
600
  Returns
542
601
  -------
543
- Optional[str]
544
- Summary generated by the OpenAI model or ``None`` when no results
602
+ str or None
603
+ Summary generated by the OpenAI model, or None when no results
545
604
  are available or an error occurs.
546
605
 
547
606
  Raises
548
607
  ------
549
608
  RuntimeError
550
- If no summarizer is configured for this core helper.
609
+ If no summarizer is configured (default behavior).
551
610
  """
552
611
  response = self.search(query, top_k=top_k)
553
612
  if not response or not response.data:
@@ -1,15 +1,21 @@
1
- """Type definitions for vector storage."""
1
+ """Type definitions for vector storage.
2
+
3
+ This module defines data structures for tracking file operations and
4
+ statistics when working with OpenAI vector stores.
5
+ """
2
6
 
3
7
  from __future__ import annotations
4
8
 
5
9
  from dataclasses import dataclass, field
6
- from typing import List, Optional
7
10
 
8
11
 
9
12
  @dataclass
10
13
  class VectorStorageFileInfo:
11
14
  """Information about a file stored in a vector store.
12
15
 
16
+ Tracks the status and details of a single file operation within a
17
+ vector store, including upload, download, or deletion outcomes.
18
+
13
19
  Attributes
14
20
  ----------
15
21
  name : str
@@ -17,42 +23,37 @@ class VectorStorageFileInfo:
17
23
  id : str
18
24
  Unique identifier of the file in the vector store.
19
25
  status : str
20
- Outcome of the operation (for example ``"success"`` or ``"error"``).
21
- error : str, optional
22
- Error message when the operation fails. Default ``None``.
23
-
24
- Methods
25
- -------
26
- None
26
+ Outcome of the operation (e.g., "success", "error", "existing").
27
+ error : str or None, optional
28
+ Error message when the operation fails, by default None.
27
29
  """
28
30
 
29
31
  name: str
30
32
  id: str
31
33
  status: str
32
- error: Optional[str] = None
34
+ error: str | None = None
33
35
 
34
36
 
35
37
  @dataclass
36
38
  class VectorStorageFileStats:
37
39
  """Aggregate statistics about batch file operations.
38
40
 
41
+ Tracks outcomes for batch upload, download, or deletion operations
42
+ across multiple files in a vector store.
43
+
39
44
  Attributes
40
45
  ----------
41
46
  total : int
42
- Total number of files processed.
47
+ Total number of files processed, by default 0.
43
48
  success : int
44
- Number of files successfully handled.
49
+ Number of files successfully handled, by default 0.
45
50
  fail : int
46
- Number of files that failed to process.
51
+ Number of files that failed to process, by default 0.
47
52
  errors : list[VectorStorageFileInfo]
48
- Details for each failed file.
49
-
50
- Methods
51
- -------
52
- None
53
+ Details for each failed file, by default empty list.
53
54
  """
54
55
 
55
56
  total: int = 0
56
57
  success: int = 0
57
58
  fail: int = 0
58
- errors: List[VectorStorageFileInfo] = field(default_factory=list)
59
+ errors: list[VectorStorageFileInfo] = field(default_factory=list)