openaivec 0.13.6__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {openaivec-0.13.6 → openaivec-0.14.0}/.github/copilot-instructions.md +56 -21
  2. {openaivec-0.13.6 → openaivec-0.14.0}/PKG-INFO +3 -3
  3. {openaivec-0.13.6 → openaivec-0.14.0}/README.md +2 -2
  4. openaivec-0.14.0/docs/api/main.md +118 -0
  5. {openaivec-0.13.6 → openaivec-0.14.0}/mkdocs.yml +1 -5
  6. openaivec-0.14.0/src/openaivec/__init__.py +13 -0
  7. openaivec-0.13.6/src/openaivec/di.py → openaivec-0.14.0/src/openaivec/_di.py +2 -0
  8. openaivec-0.13.6/src/openaivec/embeddings.py → openaivec-0.14.0/src/openaivec/_embeddings.py +3 -3
  9. openaivec-0.13.6/src/openaivec/log.py → openaivec-0.14.0/src/openaivec/_log.py +1 -1
  10. openaivec-0.13.6/src/openaivec/model.py → openaivec-0.14.0/src/openaivec/_model.py +4 -0
  11. openaivec-0.13.6/src/openaivec/optimize.py → openaivec-0.14.0/src/openaivec/_optimize.py +3 -1
  12. openaivec-0.13.6/src/openaivec/prompt.py → openaivec-0.14.0/src/openaivec/_prompt.py +2 -2
  13. openaivec-0.13.6/src/openaivec/provider.py → openaivec-0.14.0/src/openaivec/_provider.py +5 -3
  14. openaivec-0.13.6/src/openaivec/proxy.py → openaivec-0.14.0/src/openaivec/_proxy.py +3 -1
  15. openaivec-0.13.6/src/openaivec/responses.py → openaivec-0.14.0/src/openaivec/_responses.py +4 -4
  16. openaivec-0.13.6/src/openaivec/serialize.py → openaivec-0.14.0/src/openaivec/_serialize.py +1 -1
  17. openaivec-0.13.6/src/openaivec/util.py → openaivec-0.14.0/src/openaivec/_util.py +2 -0
  18. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/pandas_ext.py +25 -18
  19. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/spark.py +13 -4
  20. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/__init__.py +1 -1
  21. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/customer_sentiment.py +2 -2
  22. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/inquiry_classification.py +2 -2
  23. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/inquiry_summary.py +2 -2
  24. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/intent_analysis.py +2 -2
  25. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/response_suggestion.py +2 -2
  26. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/urgency_analysis.py +2 -2
  27. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/dependency_parsing.py +2 -2
  28. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/keyword_extraction.py +2 -2
  29. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/morphological_analysis.py +2 -2
  30. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/named_entity_recognition.py +2 -2
  31. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/sentiment_analysis.py +2 -2
  32. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/translation.py +2 -2
  33. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/table/fillna.py +3 -3
  34. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_di.py +1 -1
  35. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_embeddings.py +1 -1
  36. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_optimize.py +3 -3
  37. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_pandas_ext.py +6 -6
  38. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_prompt.py +1 -1
  39. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_provider.py +3 -3
  40. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_proxy.py +22 -22
  41. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_proxy_suggester.py +1 -1
  42. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_responses.py +1 -1
  43. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_serialize.py +1 -1
  44. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_spark.py +1 -1
  45. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_task.py +1 -1
  46. {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_util.py +1 -1
  47. openaivec-0.13.6/docs/api/di.md +0 -15
  48. openaivec-0.13.6/docs/api/embeddings.md +0 -15
  49. openaivec-0.13.6/docs/api/prompt.md +0 -15
  50. openaivec-0.13.6/docs/api/proxy.md +0 -102
  51. openaivec-0.13.6/docs/api/responses.md +0 -15
  52. openaivec-0.13.6/docs/api/util.md +0 -15
  53. openaivec-0.13.6/src/openaivec/__init__.py +0 -9
  54. {openaivec-0.13.6 → openaivec-0.14.0}/.env.example +0 -0
  55. {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-mkdocs.yml +0 -0
  56. {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-package.yml +0 -0
  57. {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-test.yml +0 -0
  58. {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-update.yml +0 -0
  59. {openaivec-0.13.6 → openaivec-0.14.0}/.gitignore +0 -0
  60. {openaivec-0.13.6 → openaivec-0.14.0}/CODE_OF_CONDUCT.md +0 -0
  61. {openaivec-0.13.6 → openaivec-0.14.0}/LICENSE +0 -0
  62. {openaivec-0.13.6 → openaivec-0.14.0}/SECURITY.md +0 -0
  63. {openaivec-0.13.6 → openaivec-0.14.0}/SUPPORT.md +0 -0
  64. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/pandas_ext.md +0 -0
  65. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/spark.md +0 -0
  66. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/task.md +0 -0
  67. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  68. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  69. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  70. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  71. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  72. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  73. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  74. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  75. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  76. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  77. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  78. {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/translation.md +0 -0
  79. {openaivec-0.13.6 → openaivec-0.14.0}/docs/index.md +0 -0
  80. {openaivec-0.13.6 → openaivec-0.14.0}/docs/robots.txt +0 -0
  81. {openaivec-0.13.6 → openaivec-0.14.0}/pyproject.toml +0 -0
  82. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/__init__.py +0 -0
  83. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/__init__.py +0 -0
  84. {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/table/__init__.py +0 -0
  85. {openaivec-0.13.6 → openaivec-0.14.0}/tests/__init__.py +0 -0
  86. {openaivec-0.13.6 → openaivec-0.14.0}/uv.lock +0 -0
@@ -13,37 +13,40 @@ This repository-wide guide tells GitHub Copilot how to propose code that fits ou
13
13
 
14
14
  ## Architecture and roles
15
15
 
16
- - `src/openaivec/proxy.py`
16
+ - `src/openaivec/_proxy.py` (internal)
17
17
  - Core batching, deduplication, order preservation, and caching
18
18
  - `BatchingMapProxy[S, T]` (sync) / `AsyncBatchingMapProxy[S, T]` (async)
19
19
  - The map_func contract is strict: return a list of the same length and order as the inputs
20
20
  - Progress bars only in notebook environments via `tqdm.auto`, gated by `show_progress=True`
21
- - `src/openaivec/responses.py`
21
+ - `src/openaivec/_responses.py` (internal)
22
22
  - Batched wrapper over OpenAI Responses JSON-mode API
23
23
  - `BatchResponses` / `AsyncBatchResponses` use the proxy internally
24
24
  - Retries via `backoff`/`backoff_async` for transient errors (RateLimit, 5xx)
25
25
  - Reasoning models (o1/o3 family) must use `temperature=None`; helpful guidance on errors
26
- - `src/openaivec/embeddings.py`
26
+ - `src/openaivec/_embeddings.py` (internal)
27
27
  - Batched embeddings (sync/async)
28
- - `src/openaivec/pandas_ext.py`
28
+ - `src/openaivec/pandas_ext.py` (public)
29
29
  - `Series.ai` / `Series.aio` entry points for responses/embeddings
30
- - Uses DI container (`provider.CONTAINER`) to get client and model names
30
+ - Uses DI container (`_provider.CONTAINER`) to get client and model names
31
31
  - Supports batch size, progress, and cache sharing (`*_with_cache`)
32
- - `src/openaivec/spark.py`
32
+ - `src/openaivec/spark.py` (public)
33
33
  - UDF builders: `responses_udf` / `task_udf` / `embeddings_udf` / `count_tokens_udf` / `split_to_chunks_udf`
34
34
  - Per-partition duplicate caching to reduce API calls
35
35
  - Pydantic → Spark StructType schema conversion
36
- - `src/openaivec/provider.py`
36
+ - `src/openaivec/_provider.py` (internal)
37
37
  - DI container and automatic OpenAI/Azure OpenAI client provisioning
38
- - Warns if Azure base URL isnt v1 format
39
- - `src/openaivec/util.py`
38
+ - Warns if Azure base URL isn't v1 format
39
+ - `src/openaivec/_util.py` (internal)
40
40
  - `backoff` / `backoff_async` and `TextChunker`
41
- - Additional modules from CLAUDE.md
42
- - `src/openaivec/di.py`: lightweight DI container
43
- - `src/openaivec/log.py`: logging/observe helpers
44
- - `src/openaivec/prompt.py`: few-shot prompt building
45
- - `src/openaivec/serialize.py`: Pydantic schema (de)serialization
46
- - `src/openaivec/task/`: pre-built, structured task library
41
+ - Additional internal modules
42
+ - `src/openaivec/_di.py`: lightweight DI container
43
+ - `src/openaivec/_log.py`: logging/observe helpers
44
+ - `src/openaivec/_prompt.py`: few-shot prompt building
45
+ - `src/openaivec/_serialize.py`: Pydantic schema (de)serialization
46
+ - `src/openaivec/_model.py`: task configuration models
47
+ - `src/openaivec/_optimize.py`: performance optimization
48
+ - `src/openaivec/task/` (public)
49
+ - Pre-built, structured task library
47
50
 
48
51
  ## Dev commands (uv)
49
52
 
@@ -76,17 +79,17 @@ uv run mkdocs serve
76
79
 
77
80
  ## API contracts and critical rules
78
81
 
79
- - Proxy (BatchingMapProxy / AsyncBatchingMapProxy)
82
+ - Proxy (`_proxy.py` - BatchingMapProxy / AsyncBatchingMapProxy)
80
83
  - map_func must return a list with the same length and order as inputs; on mismatch, release events and raise ValueError
81
84
  - Inputs are de-duplicated while preserving first-occurrence order; outputs are restored to the original order
82
85
  - Progress is only shown in notebooks when `show_progress=True`
83
86
  - Async version enforces `max_concurrency` via `asyncio.Semaphore`
84
- - Responses
87
+ - Responses (`_responses.py`)
85
88
  - Use OpenAI Responses JSON mode (`responses.parse`)
86
89
  - For reasoning models (o1/o3 families), you MUST set `temperature=None`; helpful error messaging is built-in
87
90
  - Strongly prefer structured outputs with Pydantic models
88
91
  - Retries with exponential backoff for RateLimit/5xx
89
- - Embeddings
92
+ - Embeddings (`_embeddings.py`)
90
93
  - Return NumPy float32 arrays
91
94
  - pandas extensions
92
95
  - `.ai.responses` / `.ai.embeddings` strictly preserve Series index and length
@@ -97,9 +100,9 @@ uv run mkdocs serve
97
100
  - Convert Pydantic models to Spark schemas; treat Enum/Literal as strings
98
101
  - Reasoning models require `temperature=None`
99
102
  - Provide token counting and text chunking helpers
100
- - Provider/DI and Azure
103
+ - Provider/DI and Azure (`_provider.py` / `_di.py`)
101
104
  - Auto-detect OpenAI vs Azure OpenAI from env vars
102
- - Azure requires v1 base URL (warn otherwise) and uses deployment name as the model
105
+ - Azure requires v1 base URL (warn otherwise) and uses deployment name as the "model"
103
106
 
104
107
  ## Preferred patterns (Do) and Avoid (Don’t)
105
108
 
@@ -138,6 +141,38 @@ Don’t
138
141
  - Use `asyncio.run` in async tests (mirrors existing tests)
139
142
  - Optional integration tests can run with valid API keys; keep unit tests independent of network
140
143
 
144
+ ## Package Visibility Guidelines (`__all__`)
145
+
146
+ ### Public API Modules
147
+ These modules are part of the public API and have appropriate `__all__` declarations:
148
+
149
+ - `pandas_ext.py` - Pandas DataFrame/Series extensions with `.ai/.aio` accessors
150
+ - `spark.py` - Apache Spark UDF builders for distributed processing
151
+ - `task/*` - All task modules (NLP, customer support, table operations)
152
+
153
+ ### Internal Modules (underscore-prefixed)
154
+ These modules are for internal use only and have `__all__ = []`:
155
+
156
+ - `_embeddings.py` - Batch embedding processing (internal implementation)
157
+ - `_model.py` - Task configuration models (internal types)
158
+ - `_prompt.py` - Few-shot prompt building (internal implementation)
159
+ - `_responses.py` - Batch response processing (internal implementation)
160
+ - `_util.py`, `_serialize.py`, `_log.py`, `_provider.py`, `_proxy.py`, `_di.py`, `_optimize.py` - Internal utilities
161
+
162
+ ### Main Package API
163
+ Users access core functionality through `__init__.py` exports:
164
+ - `BatchResponses`, `AsyncBatchResponses`
165
+ - `BatchEmbeddings`, `AsyncBatchEmbeddings`
166
+ - `PreparedTask`, `FewShotPromptBuilder`
167
+
168
+ ### `__all__` Best Practices
169
+
170
+ 1. **Public modules**: Include all classes, functions, and constants intended for external use
171
+ 2. **Internal modules**: Use `__all__ = []` to explicitly mark as internal-only
172
+ 3. **Task modules**: Each task module should export its main classes/functions
173
+ 4. **Package `__init__.py`**: Re-export public API from all public modules
174
+ 5. **Consistency**: Maintain alphabetical ordering within `__all__` lists
175
+
141
176
  ## Documentation (MkDocs)
142
177
 
143
178
  - For new developer-facing APIs, update `docs/api/` and consider a short example under `docs/examples/`
@@ -176,7 +211,7 @@ Don’t
176
211
  - pandas `.ai` with shared cache
177
212
 
178
213
  ```python
179
- from openaivec.proxy import BatchingMapProxy
214
+ from openaivec._proxy import BatchingMapProxy
180
215
 
181
216
  shared = BatchingMapProxy[str, str](batch_size=64)
182
217
  df["text"].ai.responses_with_cache("instructions", cache=shared)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.13.6
3
+ Version: 0.14.0
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -514,7 +514,7 @@ return rendered prompt with XML format.
514
514
  Here is an example:
515
515
 
516
516
  ```python
517
- from openaivec.prompt import FewShotPromptBuilder
517
+ from openaivec import FewShotPromptBuilder
518
518
 
519
519
  prompt: str = (
520
520
  FewShotPromptBuilder()
@@ -577,7 +577,7 @@ Here is an example:
577
577
 
578
578
  ```python
579
579
  from openai import OpenAI
580
- from openaivec.prompt import FewShotPromptBuilder
580
+ from openaivec import FewShotPromptBuilder
581
581
 
582
582
  client = OpenAI(...)
583
583
  model_name = "<your-model-name>"
@@ -488,7 +488,7 @@ return rendered prompt with XML format.
488
488
  Here is an example:
489
489
 
490
490
  ```python
491
- from openaivec.prompt import FewShotPromptBuilder
491
+ from openaivec import FewShotPromptBuilder
492
492
 
493
493
  prompt: str = (
494
494
  FewShotPromptBuilder()
@@ -551,7 +551,7 @@ Here is an example:
551
551
 
552
552
  ```python
553
553
  from openai import OpenAI
554
- from openaivec.prompt import FewShotPromptBuilder
554
+ from openaivec import FewShotPromptBuilder
555
555
 
556
556
  client = OpenAI(...)
557
557
  model_name = "<your-model-name>"
@@ -0,0 +1,118 @@
1
+ # Main Package API
2
+
3
+ The main `openaivec` package provides the core classes for AI-powered data processing.
4
+
5
+ ## Core Classes
6
+
7
+ All core functionality is accessible through the main package imports:
8
+
9
+ ::: openaivec.BatchResponses
10
+ options:
11
+ members:
12
+ - of
13
+ - of_task
14
+ - parse
15
+
16
+ ::: openaivec.AsyncBatchResponses
17
+ options:
18
+ members:
19
+ - of
20
+ - of_task
21
+ - parse
22
+
23
+ ::: openaivec.BatchEmbeddings
24
+ options:
25
+ members:
26
+ - of
27
+ - create
28
+
29
+ ::: openaivec.AsyncBatchEmbeddings
30
+ options:
31
+ members:
32
+ - of
33
+ - create
34
+
35
+ ## Task Configuration
36
+
37
+ ::: openaivec.PreparedTask
38
+
39
+ ## Prompt Building
40
+
41
+ ::: openaivec.FewShotPromptBuilder
42
+ options:
43
+ members:
44
+ - purpose
45
+ - caution
46
+ - example
47
+ - improve
48
+ - build
49
+ - build_json
50
+ - get_object
51
+
52
+ ## Usage Examples
53
+
54
+ ### Basic Batch Processing
55
+
56
+ ```python
57
+ from openaivec import BatchResponses
58
+ from openai import OpenAI
59
+
60
+ # Create batch client
61
+ client = BatchResponses.of(
62
+ client=OpenAI(),
63
+ model_name="gpt-4.1-mini"
64
+ )
65
+
66
+ # Process multiple inputs
67
+ results = client.parse([
68
+ "Translate 'hello' to French",
69
+ "What is 2+2?",
70
+ "Name three colors"
71
+ ])
72
+ ```
73
+
74
+ ### Structured Outputs with Tasks
75
+
76
+ ```python
77
+ from openaivec import BatchResponses, PreparedTask
78
+ from openai import OpenAI
79
+ from pydantic import BaseModel
80
+
81
+ class Sentiment(BaseModel):
82
+ sentiment: str
83
+ confidence: float
84
+
85
+ task = PreparedTask(
86
+ instructions="Analyze sentiment",
87
+ response_format=Sentiment,
88
+ temperature=0.0
89
+ )
90
+
91
+ client = BatchResponses.of_task(
92
+ client=OpenAI(),
93
+ model_name="gpt-4.1-mini",
94
+ task=task
95
+ )
96
+
97
+ results = client.parse([
98
+ "I love this product!",
99
+ "This is terrible quality"
100
+ ])
101
+ ```
102
+
103
+ ### Advanced Prompt Building
104
+
105
+ ```python
106
+ from openaivec import FewShotPromptBuilder
107
+
108
+ prompt = (
109
+ FewShotPromptBuilder()
110
+ .purpose("Classify animals by their habitat")
111
+ .caution("Consider both land and water animals")
112
+ .example("dolphin", "aquatic")
113
+ .example("eagle", "aerial")
114
+ .example("bear", "terrestrial")
115
+ .improve() # AI-powered improvement
116
+ .build()
117
+ )
118
+ ```
@@ -63,13 +63,9 @@ nav:
63
63
  - FAQ Generation: examples/generate_faq.ipynb
64
64
  - Token Count and Processing Time: examples/batch_size.ipynb
65
65
  - API Reference:
66
- - di: api/di.md
66
+ - Main Package: api/main.md
67
67
  - pandas_ext: api/pandas_ext.md
68
68
  - spark: api/spark.md
69
- - prompt: api/prompt.md
70
- - util: api/util.md
71
- - responses: api/responses.md
72
- - embeddings: api/embeddings.md
73
69
  - task: api/task.md
74
70
  - Task Domains:
75
71
  - Natural Language Processing:
@@ -0,0 +1,13 @@
1
+ from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
2
+ from ._model import PreparedTask
3
+ from ._prompt import FewShotPromptBuilder
4
+ from ._responses import AsyncBatchResponses, BatchResponses
5
+
6
+ __all__ = [
7
+ "AsyncBatchEmbeddings",
8
+ "AsyncBatchResponses",
9
+ "BatchEmbeddings",
10
+ "BatchResponses",
11
+ "FewShotPromptBuilder",
12
+ "PreparedTask",
13
+ ]
@@ -2,6 +2,8 @@ from dataclasses import dataclass, field
2
2
  from threading import RLock
3
3
  from typing import Any, Callable, Dict, Set, Type, TypeVar
4
4
 
5
+ __all__ = []
6
+
5
7
  """Simple dependency injection container with singleton lifecycle management.
6
8
 
7
9
  This module provides a lightweight dependency injection container that manages
@@ -6,9 +6,9 @@ import numpy as np
6
6
  from numpy.typing import NDArray
7
7
  from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
8
8
 
9
- from openaivec.log import observe
10
- from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
11
- from openaivec.util import backoff, backoff_async
9
+ from openaivec._log import observe
10
+ from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
11
+ from openaivec._util import backoff, backoff_async
12
12
 
13
13
  __all__ = [
14
14
  "BatchEmbeddings",
@@ -5,7 +5,7 @@ import uuid
5
5
  from logging import Logger
6
6
  from typing import Callable
7
7
 
8
- __all__ = ["observe"]
8
+ __all__ = []
9
9
 
10
10
 
11
11
  def observe(logger: Logger):
@@ -1,6 +1,10 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Generic, Type, TypeVar
3
3
 
4
+ __all__ = [
5
+ "PreparedTask",
6
+ ]
7
+
4
8
  ResponseFormat = TypeVar("ResponseFormat")
5
9
 
6
10
 
@@ -5,6 +5,8 @@ from dataclasses import dataclass, field
5
5
  from datetime import datetime, timezone
6
6
  from typing import List
7
7
 
8
+ __all__ = []
9
+
8
10
 
9
11
  @dataclass(frozen=True)
10
12
  class PerformanceMetric:
@@ -20,7 +22,7 @@ class BatchSizeSuggester:
20
22
  min_batch_size: int = 10
21
23
  min_duration: float = 30.0
22
24
  max_duration: float = 60.0
23
- step_ratio: float = 0.1
25
+ step_ratio: float = 0.2
24
26
  sample_size: int = 4
25
27
  _history: List[PerformanceMetric] = field(default_factory=list)
26
28
  _lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
@@ -51,8 +51,8 @@ from openai import OpenAI
51
51
  from openai.types.responses import ParsedResponse
52
52
  from pydantic import BaseModel
53
53
 
54
- from openaivec.model import ResponsesModelName
55
- from openaivec.provider import CONTAINER
54
+ from openaivec._model import ResponsesModelName
55
+ from openaivec._provider import CONTAINER
56
56
 
57
57
  __all__ = [
58
58
  "FewShotPrompt",
@@ -4,8 +4,8 @@ import warnings
4
4
  import tiktoken
5
5
  from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
6
6
 
7
- from openaivec import di
8
- from openaivec.model import (
7
+ from openaivec import _di as di
8
+ from openaivec._model import (
9
9
  AzureOpenAIAPIKey,
10
10
  AzureOpenAIAPIVersion,
11
11
  AzureOpenAIBaseURL,
@@ -13,7 +13,9 @@ from openaivec.model import (
13
13
  OpenAIAPIKey,
14
14
  ResponsesModelName,
15
15
  )
16
- from openaivec.util import TextChunker
16
+ from openaivec._util import TextChunker
17
+
18
+ __all__ = []
17
19
 
18
20
  CONTAINER = di.Container()
19
21
 
@@ -4,7 +4,9 @@ from collections.abc import Hashable
4
4
  from dataclasses import dataclass, field
5
5
  from typing import Any, Awaitable, Callable, Dict, Generic, List, TypeVar
6
6
 
7
- from openaivec.optimize import BatchSizeSuggester
7
+ from openaivec._optimize import BatchSizeSuggester
8
+
9
+ __all__ = []
8
10
 
9
11
  S = TypeVar("S", bound=Hashable)
10
12
  T = TypeVar("T")
@@ -7,10 +7,10 @@ from openai import AsyncOpenAI, BadRequestError, InternalServerError, OpenAI, Ra
7
7
  from openai.types.responses import ParsedResponse
8
8
  from pydantic import BaseModel
9
9
 
10
- from openaivec.log import observe
11
- from openaivec.model import PreparedTask, ResponseFormat
12
- from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
13
- from openaivec.util import backoff, backoff_async
10
+ from openaivec._log import observe
11
+ from openaivec._model import PreparedTask, ResponseFormat
12
+ from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
13
+ from openaivec._util import backoff, backoff_async
14
14
 
15
15
  __all__ = [
16
16
  "BatchResponses",
@@ -29,7 +29,7 @@ from typing import Any, Dict, List, Literal, Type
29
29
 
30
30
  from pydantic import BaseModel, Field, create_model
31
31
 
32
- __all__ = ["deserialize_base_model", "serialize_base_model"]
32
+ __all__ = []
33
33
 
34
34
 
35
35
  def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
@@ -8,6 +8,8 @@ from typing import Awaitable, Callable, List, Type, TypeVar
8
8
  import numpy as np
9
9
  import tiktoken
10
10
 
11
+ __all__ = []
12
+
11
13
  T = TypeVar("T")
12
14
  U = TypeVar("U")
13
15
  V = TypeVar("V")
@@ -48,13 +48,20 @@ import numpy as np
48
48
  import pandas as pd
49
49
  import tiktoken
50
50
  from openai import AsyncOpenAI, OpenAI
51
+
52
+ __all__ = [
53
+ "embeddings_model",
54
+ "responses_model",
55
+ "use",
56
+ "use_async",
57
+ ]
51
58
  from pydantic import BaseModel
52
59
 
53
- from openaivec.embeddings import AsyncBatchEmbeddings, BatchEmbeddings
54
- from openaivec.model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
55
- from openaivec.provider import CONTAINER, _check_azure_v1_api_url
56
- from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
57
- from openaivec.responses import AsyncBatchResponses, BatchResponses
60
+ from openaivec._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
61
+ from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
62
+ from openaivec._provider import CONTAINER, _check_azure_v1_api_url
63
+ from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
64
+ from openaivec._responses import AsyncBatchResponses, BatchResponses
58
65
  from openaivec.task.table import FillNaResponse, fillna
59
66
 
60
67
  __all__ = [
@@ -192,7 +199,7 @@ class OpenAIVecSeriesAccessor:
192
199
 
193
200
  Example:
194
201
  ```python
195
- from openaivec.proxy import BatchingMapProxy
202
+ from openaivec._proxy import BatchingMapProxy
196
203
  import numpy as np
197
204
 
198
205
  # Create a shared cache with custom batch size
@@ -290,8 +297,8 @@ class OpenAIVecSeriesAccessor:
290
297
 
291
298
  Example:
292
299
  ```python
293
- from openaivec.model import PreparedTask
294
- from openaivec.proxy import BatchingMapProxy
300
+ from openaivec._model import PreparedTask
301
+ from openaivec._proxy import BatchingMapProxy
295
302
 
296
303
  # Create a shared cache with custom batch size
297
304
  shared_cache = BatchingMapProxy(batch_size=64)
@@ -323,7 +330,7 @@ class OpenAIVecSeriesAccessor:
323
330
 
324
331
  Example:
325
332
  ```python
326
- from openaivec.model import PreparedTask
333
+ from openaivec._model import PreparedTask
327
334
 
328
335
  # Assume you have a prepared task for sentiment analysis
329
336
  sentiment_task = PreparedTask(...)
@@ -510,7 +517,7 @@ class OpenAIVecDataFrameAccessor:
510
517
 
511
518
  Example:
512
519
  ```python
513
- from openaivec.proxy import BatchingMapProxy
520
+ from openaivec._proxy import BatchingMapProxy
514
521
 
515
522
  # Create a shared cache with custom batch size
516
523
  shared_cache = BatchingMapProxy(batch_size=64)
@@ -607,7 +614,7 @@ class OpenAIVecDataFrameAccessor:
607
614
 
608
615
  Example:
609
616
  ```python
610
- from openaivec.model import PreparedTask
617
+ from openaivec._model import PreparedTask
611
618
 
612
619
  # Assume you have a prepared task for data analysis
613
620
  analysis_task = PreparedTask(...)
@@ -770,7 +777,7 @@ class AsyncOpenAIVecSeriesAccessor:
770
777
 
771
778
  Example:
772
779
  ```python
773
- from openaivec.proxy import AsyncBatchingMapProxy
780
+ from openaivec._proxy import AsyncBatchingMapProxy
774
781
 
775
782
  # Create a shared cache with custom batch size and concurrency
776
783
  shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
@@ -822,7 +829,7 @@ class AsyncOpenAIVecSeriesAccessor:
822
829
 
823
830
  Example:
824
831
  ```python
825
- from openaivec.proxy import AsyncBatchingMapProxy
832
+ from openaivec._proxy import AsyncBatchingMapProxy
826
833
  import numpy as np
827
834
 
828
835
  # Create a shared cache with custom batch size and concurrency
@@ -878,8 +885,8 @@ class AsyncOpenAIVecSeriesAccessor:
878
885
 
879
886
  Example:
880
887
  ```python
881
- from openaivec.model import PreparedTask
882
- from openaivec.proxy import AsyncBatchingMapProxy
888
+ from openaivec._model import PreparedTask
889
+ from openaivec._proxy import AsyncBatchingMapProxy
883
890
 
884
891
  # Create a shared cache with custom batch size and concurrency
885
892
  shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
@@ -1027,7 +1034,7 @@ class AsyncOpenAIVecSeriesAccessor:
1027
1034
 
1028
1035
  Example:
1029
1036
  ```python
1030
- from openaivec.model import PreparedTask
1037
+ from openaivec._model import PreparedTask
1031
1038
 
1032
1039
  # Assume you have a prepared task for sentiment analysis
1033
1040
  sentiment_task = PreparedTask(...)
@@ -1110,7 +1117,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1110
1117
 
1111
1118
  Example:
1112
1119
  ```python
1113
- from openaivec.proxy import AsyncBatchingMapProxy
1120
+ from openaivec._proxy import AsyncBatchingMapProxy
1114
1121
 
1115
1122
  # Create a shared cache with custom batch size and concurrency
1116
1123
  shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
@@ -1224,7 +1231,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1224
1231
 
1225
1232
  Example:
1226
1233
  ```python
1227
- from openaivec.model import PreparedTask
1234
+ from openaivec._model import PreparedTask
1228
1235
 
1229
1236
  # Assume you have a prepared task for data analysis
1230
1237
  analysis_task = PreparedTask(...)
@@ -12,6 +12,15 @@ improved performance in I/O-bound operations.
12
12
  automatically cache duplicate inputs within each partition, significantly reducing
13
13
  API calls and costs when processing datasets with overlapping content.
14
14
 
15
+ __all__ = [
16
+ "count_tokens_udf",
17
+ "embeddings_udf",
18
+ "responses_udf",
19
+ "similarity_udf",
20
+ "split_to_chunks_udf",
21
+ "task_udf",
22
+ ]
23
+
15
24
  ## Setup
16
25
 
17
26
  First, obtain a Spark session and configure authentication:
@@ -127,10 +136,10 @@ from pyspark.sql.udf import UserDefinedFunction
127
136
  from typing_extensions import Literal
128
137
 
129
138
  from openaivec import pandas_ext
130
- from openaivec.model import PreparedTask, ResponseFormat
131
- from openaivec.proxy import AsyncBatchingMapProxy
132
- from openaivec.serialize import deserialize_base_model, serialize_base_model
133
- from openaivec.util import TextChunker
139
+ from openaivec._model import PreparedTask, ResponseFormat
140
+ from openaivec._proxy import AsyncBatchingMapProxy
141
+ from openaivec._serialize import deserialize_base_model, serialize_base_model
142
+ from openaivec._util import TextChunker
134
143
 
135
144
  __all__ = [
136
145
  "responses_udf",
@@ -32,7 +32,7 @@ Specialized tasks for customer service operations:
32
32
  ### Quick Start with Default Tasks
33
33
  ```python
34
34
  from openai import OpenAI
35
- from openaivec.responses import BatchResponses
35
+ from openaivec._responses import BatchResponses
36
36
  from openaivec.task import nlp, customer_support
37
37
 
38
38
  client = OpenAI()
@@ -9,7 +9,7 @@ Example:
9
9
 
10
10
  ```python
11
11
  from openai import OpenAI
12
- from openaivec.responses import BatchResponses
12
+ from openaivec._responses import BatchResponses
13
13
  from openaivec.task import customer_support
14
14
 
15
15
  client = OpenAI()
@@ -65,7 +65,7 @@ from typing import List, Literal
65
65
 
66
66
  from pydantic import BaseModel, Field
67
67
 
68
- from openaivec.model import PreparedTask
68
+ from openaivec._model import PreparedTask
69
69
 
70
70
  __all__ = ["customer_sentiment"]
71
71