openaivec 0.13.6__tar.gz → 0.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.13.6 → openaivec-0.14.0}/.github/copilot-instructions.md +56 -21
- {openaivec-0.13.6 → openaivec-0.14.0}/PKG-INFO +3 -3
- {openaivec-0.13.6 → openaivec-0.14.0}/README.md +2 -2
- openaivec-0.14.0/docs/api/main.md +118 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/mkdocs.yml +1 -5
- openaivec-0.14.0/src/openaivec/__init__.py +13 -0
- openaivec-0.13.6/src/openaivec/di.py → openaivec-0.14.0/src/openaivec/_di.py +2 -0
- openaivec-0.13.6/src/openaivec/embeddings.py → openaivec-0.14.0/src/openaivec/_embeddings.py +3 -3
- openaivec-0.13.6/src/openaivec/log.py → openaivec-0.14.0/src/openaivec/_log.py +1 -1
- openaivec-0.13.6/src/openaivec/model.py → openaivec-0.14.0/src/openaivec/_model.py +4 -0
- openaivec-0.13.6/src/openaivec/optimize.py → openaivec-0.14.0/src/openaivec/_optimize.py +3 -1
- openaivec-0.13.6/src/openaivec/prompt.py → openaivec-0.14.0/src/openaivec/_prompt.py +2 -2
- openaivec-0.13.6/src/openaivec/provider.py → openaivec-0.14.0/src/openaivec/_provider.py +5 -3
- openaivec-0.13.6/src/openaivec/proxy.py → openaivec-0.14.0/src/openaivec/_proxy.py +3 -1
- openaivec-0.13.6/src/openaivec/responses.py → openaivec-0.14.0/src/openaivec/_responses.py +4 -4
- openaivec-0.13.6/src/openaivec/serialize.py → openaivec-0.14.0/src/openaivec/_serialize.py +1 -1
- openaivec-0.13.6/src/openaivec/util.py → openaivec-0.14.0/src/openaivec/_util.py +2 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/pandas_ext.py +25 -18
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/spark.py +13 -4
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/__init__.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/customer_sentiment.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/inquiry_classification.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/inquiry_summary.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/intent_analysis.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/response_suggestion.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/urgency_analysis.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/dependency_parsing.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/keyword_extraction.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/morphological_analysis.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/named_entity_recognition.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/sentiment_analysis.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/translation.py +2 -2
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/table/fillna.py +3 -3
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_di.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_embeddings.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_optimize.py +3 -3
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_pandas_ext.py +6 -6
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_prompt.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_provider.py +3 -3
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_proxy.py +22 -22
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_proxy_suggester.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_responses.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_serialize.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_spark.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_task.py +1 -1
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/test_util.py +1 -1
- openaivec-0.13.6/docs/api/di.md +0 -15
- openaivec-0.13.6/docs/api/embeddings.md +0 -15
- openaivec-0.13.6/docs/api/prompt.md +0 -15
- openaivec-0.13.6/docs/api/proxy.md +0 -102
- openaivec-0.13.6/docs/api/responses.md +0 -15
- openaivec-0.13.6/docs/api/util.md +0 -15
- openaivec-0.13.6/src/openaivec/__init__.py +0 -9
- {openaivec-0.13.6 → openaivec-0.14.0}/.env.example +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-mkdocs.yml +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-package.yml +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-test.yml +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/.github/workflows/python-update.yml +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/.gitignore +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/LICENSE +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/SECURITY.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/SUPPORT.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/pandas_ext.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/spark.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/task.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/api/tasks/nlp/translation.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/index.md +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/docs/robots.txt +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/pyproject.toml +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/tests/__init__.py +0 -0
- {openaivec-0.13.6 → openaivec-0.14.0}/uv.lock +0 -0
|
@@ -13,37 +13,40 @@ This repository-wide guide tells GitHub Copilot how to propose code that fits ou
|
|
|
13
13
|
|
|
14
14
|
## Architecture and roles
|
|
15
15
|
|
|
16
|
-
- `src/openaivec/
|
|
16
|
+
- `src/openaivec/_proxy.py` (internal)
|
|
17
17
|
- Core batching, deduplication, order preservation, and caching
|
|
18
18
|
- `BatchingMapProxy[S, T]` (sync) / `AsyncBatchingMapProxy[S, T]` (async)
|
|
19
19
|
- The map_func contract is strict: return a list of the same length and order as the inputs
|
|
20
20
|
- Progress bars only in notebook environments via `tqdm.auto`, gated by `show_progress=True`
|
|
21
|
-
- `src/openaivec/
|
|
21
|
+
- `src/openaivec/_responses.py` (internal)
|
|
22
22
|
- Batched wrapper over OpenAI Responses JSON-mode API
|
|
23
23
|
- `BatchResponses` / `AsyncBatchResponses` use the proxy internally
|
|
24
24
|
- Retries via `backoff`/`backoff_async` for transient errors (RateLimit, 5xx)
|
|
25
25
|
- Reasoning models (o1/o3 family) must use `temperature=None`; helpful guidance on errors
|
|
26
|
-
- `src/openaivec/
|
|
26
|
+
- `src/openaivec/_embeddings.py` (internal)
|
|
27
27
|
- Batched embeddings (sync/async)
|
|
28
|
-
- `src/openaivec/pandas_ext.py`
|
|
28
|
+
- `src/openaivec/pandas_ext.py` (public)
|
|
29
29
|
- `Series.ai` / `Series.aio` entry points for responses/embeddings
|
|
30
|
-
- Uses DI container (`
|
|
30
|
+
- Uses DI container (`_provider.CONTAINER`) to get client and model names
|
|
31
31
|
- Supports batch size, progress, and cache sharing (`*_with_cache`)
|
|
32
|
-
- `src/openaivec/spark.py`
|
|
32
|
+
- `src/openaivec/spark.py` (public)
|
|
33
33
|
- UDF builders: `responses_udf` / `task_udf` / `embeddings_udf` / `count_tokens_udf` / `split_to_chunks_udf`
|
|
34
34
|
- Per-partition duplicate caching to reduce API calls
|
|
35
35
|
- Pydantic → Spark StructType schema conversion
|
|
36
|
-
- `src/openaivec/
|
|
36
|
+
- `src/openaivec/_provider.py` (internal)
|
|
37
37
|
- DI container and automatic OpenAI/Azure OpenAI client provisioning
|
|
38
|
-
- Warns if Azure base URL isn
|
|
39
|
-
- `src/openaivec/
|
|
38
|
+
- Warns if Azure base URL isn't v1 format
|
|
39
|
+
- `src/openaivec/_util.py` (internal)
|
|
40
40
|
- `backoff` / `backoff_async` and `TextChunker`
|
|
41
|
-
- Additional modules
|
|
42
|
-
- `src/openaivec/
|
|
43
|
-
- `src/openaivec/
|
|
44
|
-
- `src/openaivec/
|
|
45
|
-
- `src/openaivec/
|
|
46
|
-
- `src/openaivec/task
|
|
41
|
+
- Additional internal modules
|
|
42
|
+
- `src/openaivec/_di.py`: lightweight DI container
|
|
43
|
+
- `src/openaivec/_log.py`: logging/observe helpers
|
|
44
|
+
- `src/openaivec/_prompt.py`: few-shot prompt building
|
|
45
|
+
- `src/openaivec/_serialize.py`: Pydantic schema (de)serialization
|
|
46
|
+
- `src/openaivec/_model.py`: task configuration models
|
|
47
|
+
- `src/openaivec/_optimize.py`: performance optimization
|
|
48
|
+
- `src/openaivec/task/` (public)
|
|
49
|
+
- Pre-built, structured task library
|
|
47
50
|
|
|
48
51
|
## Dev commands (uv)
|
|
49
52
|
|
|
@@ -76,17 +79,17 @@ uv run mkdocs serve
|
|
|
76
79
|
|
|
77
80
|
## API contracts and critical rules
|
|
78
81
|
|
|
79
|
-
- Proxy (BatchingMapProxy / AsyncBatchingMapProxy)
|
|
82
|
+
- Proxy (`_proxy.py` - BatchingMapProxy / AsyncBatchingMapProxy)
|
|
80
83
|
- map_func must return a list with the same length and order as inputs; on mismatch, release events and raise ValueError
|
|
81
84
|
- Inputs are de-duplicated while preserving first-occurrence order; outputs are restored to the original order
|
|
82
85
|
- Progress is only shown in notebooks when `show_progress=True`
|
|
83
86
|
- Async version enforces `max_concurrency` via `asyncio.Semaphore`
|
|
84
|
-
- Responses
|
|
87
|
+
- Responses (`_responses.py`)
|
|
85
88
|
- Use OpenAI Responses JSON mode (`responses.parse`)
|
|
86
89
|
- For reasoning models (o1/o3 families), you MUST set `temperature=None`; helpful error messaging is built-in
|
|
87
90
|
- Strongly prefer structured outputs with Pydantic models
|
|
88
91
|
- Retries with exponential backoff for RateLimit/5xx
|
|
89
|
-
- Embeddings
|
|
92
|
+
- Embeddings (`_embeddings.py`)
|
|
90
93
|
- Return NumPy float32 arrays
|
|
91
94
|
- pandas extensions
|
|
92
95
|
- `.ai.responses` / `.ai.embeddings` strictly preserve Series index and length
|
|
@@ -97,9 +100,9 @@ uv run mkdocs serve
|
|
|
97
100
|
- Convert Pydantic models to Spark schemas; treat Enum/Literal as strings
|
|
98
101
|
- Reasoning models require `temperature=None`
|
|
99
102
|
- Provide token counting and text chunking helpers
|
|
100
|
-
- Provider/DI and Azure
|
|
103
|
+
- Provider/DI and Azure (`_provider.py` / `_di.py`)
|
|
101
104
|
- Auto-detect OpenAI vs Azure OpenAI from env vars
|
|
102
|
-
- Azure requires v1 base URL (warn otherwise) and uses deployment name as the
|
|
105
|
+
- Azure requires v1 base URL (warn otherwise) and uses deployment name as the "model"
|
|
103
106
|
|
|
104
107
|
## Preferred patterns (Do) and Avoid (Don’t)
|
|
105
108
|
|
|
@@ -138,6 +141,38 @@ Don’t
|
|
|
138
141
|
- Use `asyncio.run` in async tests (mirrors existing tests)
|
|
139
142
|
- Optional integration tests can run with valid API keys; keep unit tests independent of network
|
|
140
143
|
|
|
144
|
+
## Package Visibility Guidelines (`__all__`)
|
|
145
|
+
|
|
146
|
+
### Public API Modules
|
|
147
|
+
These modules are part of the public API and have appropriate `__all__` declarations:
|
|
148
|
+
|
|
149
|
+
- `pandas_ext.py` - Pandas DataFrame/Series extensions with `.ai/.aio` accessors
|
|
150
|
+
- `spark.py` - Apache Spark UDF builders for distributed processing
|
|
151
|
+
- `task/*` - All task modules (NLP, customer support, table operations)
|
|
152
|
+
|
|
153
|
+
### Internal Modules (underscore-prefixed)
|
|
154
|
+
These modules are for internal use only and have `__all__ = []`:
|
|
155
|
+
|
|
156
|
+
- `_embeddings.py` - Batch embedding processing (internal implementation)
|
|
157
|
+
- `_model.py` - Task configuration models (internal types)
|
|
158
|
+
- `_prompt.py` - Few-shot prompt building (internal implementation)
|
|
159
|
+
- `_responses.py` - Batch response processing (internal implementation)
|
|
160
|
+
- `_util.py`, `_serialize.py`, `_log.py`, `_provider.py`, `_proxy.py`, `_di.py`, `_optimize.py` - Internal utilities
|
|
161
|
+
|
|
162
|
+
### Main Package API
|
|
163
|
+
Users access core functionality through `__init__.py` exports:
|
|
164
|
+
- `BatchResponses`, `AsyncBatchResponses`
|
|
165
|
+
- `BatchEmbeddings`, `AsyncBatchEmbeddings`
|
|
166
|
+
- `PreparedTask`, `FewShotPromptBuilder`
|
|
167
|
+
|
|
168
|
+
### `__all__` Best Practices
|
|
169
|
+
|
|
170
|
+
1. **Public modules**: Include all classes, functions, and constants intended for external use
|
|
171
|
+
2. **Internal modules**: Use `__all__ = []` to explicitly mark as internal-only
|
|
172
|
+
3. **Task modules**: Each task module should export its main classes/functions
|
|
173
|
+
4. **Package `__init__.py`**: Re-export public API from all public modules
|
|
174
|
+
5. **Consistency**: Maintain alphabetical ordering within `__all__` lists
|
|
175
|
+
|
|
141
176
|
## Documentation (MkDocs)
|
|
142
177
|
|
|
143
178
|
- For new developer-facing APIs, update `docs/api/` and consider a short example under `docs/examples/`
|
|
@@ -176,7 +211,7 @@ Don’t
|
|
|
176
211
|
- pandas `.ai` with shared cache
|
|
177
212
|
|
|
178
213
|
```python
|
|
179
|
-
from openaivec.
|
|
214
|
+
from openaivec._proxy import BatchingMapProxy
|
|
180
215
|
|
|
181
216
|
shared = BatchingMapProxy[str, str](batch_size=64)
|
|
182
217
|
df["text"].ai.responses_with_cache("instructions", cache=shared)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openaivec
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.14.0
|
|
4
4
|
Summary: Generative mutation for tabular calculation
|
|
5
5
|
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
6
|
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
@@ -514,7 +514,7 @@ return rendered prompt with XML format.
|
|
|
514
514
|
Here is an example:
|
|
515
515
|
|
|
516
516
|
```python
|
|
517
|
-
from openaivec
|
|
517
|
+
from openaivec import FewShotPromptBuilder
|
|
518
518
|
|
|
519
519
|
prompt: str = (
|
|
520
520
|
FewShotPromptBuilder()
|
|
@@ -577,7 +577,7 @@ Here is an example:
|
|
|
577
577
|
|
|
578
578
|
```python
|
|
579
579
|
from openai import OpenAI
|
|
580
|
-
from openaivec
|
|
580
|
+
from openaivec import FewShotPromptBuilder
|
|
581
581
|
|
|
582
582
|
client = OpenAI(...)
|
|
583
583
|
model_name = "<your-model-name>"
|
|
@@ -488,7 +488,7 @@ return rendered prompt with XML format.
|
|
|
488
488
|
Here is an example:
|
|
489
489
|
|
|
490
490
|
```python
|
|
491
|
-
from openaivec
|
|
491
|
+
from openaivec import FewShotPromptBuilder
|
|
492
492
|
|
|
493
493
|
prompt: str = (
|
|
494
494
|
FewShotPromptBuilder()
|
|
@@ -551,7 +551,7 @@ Here is an example:
|
|
|
551
551
|
|
|
552
552
|
```python
|
|
553
553
|
from openai import OpenAI
|
|
554
|
-
from openaivec
|
|
554
|
+
from openaivec import FewShotPromptBuilder
|
|
555
555
|
|
|
556
556
|
client = OpenAI(...)
|
|
557
557
|
model_name = "<your-model-name>"
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Main Package API
|
|
2
|
+
|
|
3
|
+
The main `openaivec` package provides the core classes for AI-powered data processing.
|
|
4
|
+
|
|
5
|
+
## Core Classes
|
|
6
|
+
|
|
7
|
+
All core functionality is accessible through the main package imports:
|
|
8
|
+
|
|
9
|
+
::: openaivec.BatchResponses
|
|
10
|
+
options:
|
|
11
|
+
members:
|
|
12
|
+
- of
|
|
13
|
+
- of_task
|
|
14
|
+
- parse
|
|
15
|
+
|
|
16
|
+
::: openaivec.AsyncBatchResponses
|
|
17
|
+
options:
|
|
18
|
+
members:
|
|
19
|
+
- of
|
|
20
|
+
- of_task
|
|
21
|
+
- parse
|
|
22
|
+
|
|
23
|
+
::: openaivec.BatchEmbeddings
|
|
24
|
+
options:
|
|
25
|
+
members:
|
|
26
|
+
- of
|
|
27
|
+
- create
|
|
28
|
+
|
|
29
|
+
::: openaivec.AsyncBatchEmbeddings
|
|
30
|
+
options:
|
|
31
|
+
members:
|
|
32
|
+
- of
|
|
33
|
+
- create
|
|
34
|
+
|
|
35
|
+
## Task Configuration
|
|
36
|
+
|
|
37
|
+
::: openaivec.PreparedTask
|
|
38
|
+
|
|
39
|
+
## Prompt Building
|
|
40
|
+
|
|
41
|
+
::: openaivec.FewShotPromptBuilder
|
|
42
|
+
options:
|
|
43
|
+
members:
|
|
44
|
+
- purpose
|
|
45
|
+
- caution
|
|
46
|
+
- example
|
|
47
|
+
- improve
|
|
48
|
+
- build
|
|
49
|
+
- build_json
|
|
50
|
+
- get_object
|
|
51
|
+
|
|
52
|
+
## Usage Examples
|
|
53
|
+
|
|
54
|
+
### Basic Batch Processing
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from openaivec import BatchResponses
|
|
58
|
+
from openai import OpenAI
|
|
59
|
+
|
|
60
|
+
# Create batch client
|
|
61
|
+
client = BatchResponses.of(
|
|
62
|
+
client=OpenAI(),
|
|
63
|
+
model_name="gpt-4.1-mini"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Process multiple inputs
|
|
67
|
+
results = client.parse([
|
|
68
|
+
"Translate 'hello' to French",
|
|
69
|
+
"What is 2+2?",
|
|
70
|
+
"Name three colors"
|
|
71
|
+
])
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Structured Outputs with Tasks
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from openaivec import BatchResponses, PreparedTask
|
|
78
|
+
from openai import OpenAI
|
|
79
|
+
from pydantic import BaseModel
|
|
80
|
+
|
|
81
|
+
class Sentiment(BaseModel):
|
|
82
|
+
sentiment: str
|
|
83
|
+
confidence: float
|
|
84
|
+
|
|
85
|
+
task = PreparedTask(
|
|
86
|
+
instructions="Analyze sentiment",
|
|
87
|
+
response_format=Sentiment,
|
|
88
|
+
temperature=0.0
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
client = BatchResponses.of_task(
|
|
92
|
+
client=OpenAI(),
|
|
93
|
+
model_name="gpt-4.1-mini",
|
|
94
|
+
task=task
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
results = client.parse([
|
|
98
|
+
"I love this product!",
|
|
99
|
+
"This is terrible quality"
|
|
100
|
+
])
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Advanced Prompt Building
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from openaivec import FewShotPromptBuilder
|
|
107
|
+
|
|
108
|
+
prompt = (
|
|
109
|
+
FewShotPromptBuilder()
|
|
110
|
+
.purpose("Classify animals by their habitat")
|
|
111
|
+
.caution("Consider both land and water animals")
|
|
112
|
+
.example("dolphin", "aquatic")
|
|
113
|
+
.example("eagle", "aerial")
|
|
114
|
+
.example("bear", "terrestrial")
|
|
115
|
+
.improve() # AI-powered improvement
|
|
116
|
+
.build()
|
|
117
|
+
)
|
|
118
|
+
```
|
|
@@ -63,13 +63,9 @@ nav:
|
|
|
63
63
|
- FAQ Generation: examples/generate_faq.ipynb
|
|
64
64
|
- Token Count and Processing Time: examples/batch_size.ipynb
|
|
65
65
|
- API Reference:
|
|
66
|
-
-
|
|
66
|
+
- Main Package: api/main.md
|
|
67
67
|
- pandas_ext: api/pandas_ext.md
|
|
68
68
|
- spark: api/spark.md
|
|
69
|
-
- prompt: api/prompt.md
|
|
70
|
-
- util: api/util.md
|
|
71
|
-
- responses: api/responses.md
|
|
72
|
-
- embeddings: api/embeddings.md
|
|
73
69
|
- task: api/task.md
|
|
74
70
|
- Task Domains:
|
|
75
71
|
- Natural Language Processing:
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
2
|
+
from ._model import PreparedTask
|
|
3
|
+
from ._prompt import FewShotPromptBuilder
|
|
4
|
+
from ._responses import AsyncBatchResponses, BatchResponses
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AsyncBatchEmbeddings",
|
|
8
|
+
"AsyncBatchResponses",
|
|
9
|
+
"BatchEmbeddings",
|
|
10
|
+
"BatchResponses",
|
|
11
|
+
"FewShotPromptBuilder",
|
|
12
|
+
"PreparedTask",
|
|
13
|
+
]
|
|
@@ -2,6 +2,8 @@ from dataclasses import dataclass, field
|
|
|
2
2
|
from threading import RLock
|
|
3
3
|
from typing import Any, Callable, Dict, Set, Type, TypeVar
|
|
4
4
|
|
|
5
|
+
__all__ = []
|
|
6
|
+
|
|
5
7
|
"""Simple dependency injection container with singleton lifecycle management.
|
|
6
8
|
|
|
7
9
|
This module provides a lightweight dependency injection container that manages
|
openaivec-0.13.6/src/openaivec/embeddings.py → openaivec-0.14.0/src/openaivec/_embeddings.py
RENAMED
|
@@ -6,9 +6,9 @@ import numpy as np
|
|
|
6
6
|
from numpy.typing import NDArray
|
|
7
7
|
from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
|
|
8
8
|
|
|
9
|
-
from openaivec.
|
|
10
|
-
from openaivec.
|
|
11
|
-
from openaivec.
|
|
9
|
+
from openaivec._log import observe
|
|
10
|
+
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
11
|
+
from openaivec._util import backoff, backoff_async
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
14
|
"BatchEmbeddings",
|
|
@@ -5,6 +5,8 @@ from dataclasses import dataclass, field
|
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from typing import List
|
|
7
7
|
|
|
8
|
+
__all__ = []
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
@dataclass(frozen=True)
|
|
10
12
|
class PerformanceMetric:
|
|
@@ -20,7 +22,7 @@ class BatchSizeSuggester:
|
|
|
20
22
|
min_batch_size: int = 10
|
|
21
23
|
min_duration: float = 30.0
|
|
22
24
|
max_duration: float = 60.0
|
|
23
|
-
step_ratio: float = 0.
|
|
25
|
+
step_ratio: float = 0.2
|
|
24
26
|
sample_size: int = 4
|
|
25
27
|
_history: List[PerformanceMetric] = field(default_factory=list)
|
|
26
28
|
_lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
|
|
@@ -51,8 +51,8 @@ from openai import OpenAI
|
|
|
51
51
|
from openai.types.responses import ParsedResponse
|
|
52
52
|
from pydantic import BaseModel
|
|
53
53
|
|
|
54
|
-
from openaivec.
|
|
55
|
-
from openaivec.
|
|
54
|
+
from openaivec._model import ResponsesModelName
|
|
55
|
+
from openaivec._provider import CONTAINER
|
|
56
56
|
|
|
57
57
|
__all__ = [
|
|
58
58
|
"FewShotPrompt",
|
|
@@ -4,8 +4,8 @@ import warnings
|
|
|
4
4
|
import tiktoken
|
|
5
5
|
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
|
|
6
6
|
|
|
7
|
-
from openaivec import di
|
|
8
|
-
from openaivec.
|
|
7
|
+
from openaivec import _di as di
|
|
8
|
+
from openaivec._model import (
|
|
9
9
|
AzureOpenAIAPIKey,
|
|
10
10
|
AzureOpenAIAPIVersion,
|
|
11
11
|
AzureOpenAIBaseURL,
|
|
@@ -13,7 +13,9 @@ from openaivec.model import (
|
|
|
13
13
|
OpenAIAPIKey,
|
|
14
14
|
ResponsesModelName,
|
|
15
15
|
)
|
|
16
|
-
from openaivec.
|
|
16
|
+
from openaivec._util import TextChunker
|
|
17
|
+
|
|
18
|
+
__all__ = []
|
|
17
19
|
|
|
18
20
|
CONTAINER = di.Container()
|
|
19
21
|
|
|
@@ -4,7 +4,9 @@ from collections.abc import Hashable
|
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from typing import Any, Awaitable, Callable, Dict, Generic, List, TypeVar
|
|
6
6
|
|
|
7
|
-
from openaivec.
|
|
7
|
+
from openaivec._optimize import BatchSizeSuggester
|
|
8
|
+
|
|
9
|
+
__all__ = []
|
|
8
10
|
|
|
9
11
|
S = TypeVar("S", bound=Hashable)
|
|
10
12
|
T = TypeVar("T")
|
|
@@ -7,10 +7,10 @@ from openai import AsyncOpenAI, BadRequestError, InternalServerError, OpenAI, Ra
|
|
|
7
7
|
from openai.types.responses import ParsedResponse
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
10
|
-
from openaivec.
|
|
11
|
-
from openaivec.
|
|
12
|
-
from openaivec.
|
|
13
|
-
from openaivec.
|
|
10
|
+
from openaivec._log import observe
|
|
11
|
+
from openaivec._model import PreparedTask, ResponseFormat
|
|
12
|
+
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
13
|
+
from openaivec._util import backoff, backoff_async
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
16
|
"BatchResponses",
|
|
@@ -29,7 +29,7 @@ from typing import Any, Dict, List, Literal, Type
|
|
|
29
29
|
|
|
30
30
|
from pydantic import BaseModel, Field, create_model
|
|
31
31
|
|
|
32
|
-
__all__ = [
|
|
32
|
+
__all__ = []
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def serialize_base_model(obj: Type[BaseModel]) -> Dict[str, Any]:
|
|
@@ -48,13 +48,20 @@ import numpy as np
|
|
|
48
48
|
import pandas as pd
|
|
49
49
|
import tiktoken
|
|
50
50
|
from openai import AsyncOpenAI, OpenAI
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
"embeddings_model",
|
|
54
|
+
"responses_model",
|
|
55
|
+
"use",
|
|
56
|
+
"use_async",
|
|
57
|
+
]
|
|
51
58
|
from pydantic import BaseModel
|
|
52
59
|
|
|
53
|
-
from openaivec.
|
|
54
|
-
from openaivec.
|
|
55
|
-
from openaivec.
|
|
56
|
-
from openaivec.
|
|
57
|
-
from openaivec.
|
|
60
|
+
from openaivec._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
61
|
+
from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
|
|
62
|
+
from openaivec._provider import CONTAINER, _check_azure_v1_api_url
|
|
63
|
+
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
64
|
+
from openaivec._responses import AsyncBatchResponses, BatchResponses
|
|
58
65
|
from openaivec.task.table import FillNaResponse, fillna
|
|
59
66
|
|
|
60
67
|
__all__ = [
|
|
@@ -192,7 +199,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
192
199
|
|
|
193
200
|
Example:
|
|
194
201
|
```python
|
|
195
|
-
from openaivec.
|
|
202
|
+
from openaivec._proxy import BatchingMapProxy
|
|
196
203
|
import numpy as np
|
|
197
204
|
|
|
198
205
|
# Create a shared cache with custom batch size
|
|
@@ -290,8 +297,8 @@ class OpenAIVecSeriesAccessor:
|
|
|
290
297
|
|
|
291
298
|
Example:
|
|
292
299
|
```python
|
|
293
|
-
from openaivec.
|
|
294
|
-
from openaivec.
|
|
300
|
+
from openaivec._model import PreparedTask
|
|
301
|
+
from openaivec._proxy import BatchingMapProxy
|
|
295
302
|
|
|
296
303
|
# Create a shared cache with custom batch size
|
|
297
304
|
shared_cache = BatchingMapProxy(batch_size=64)
|
|
@@ -323,7 +330,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
323
330
|
|
|
324
331
|
Example:
|
|
325
332
|
```python
|
|
326
|
-
from openaivec.
|
|
333
|
+
from openaivec._model import PreparedTask
|
|
327
334
|
|
|
328
335
|
# Assume you have a prepared task for sentiment analysis
|
|
329
336
|
sentiment_task = PreparedTask(...)
|
|
@@ -510,7 +517,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
510
517
|
|
|
511
518
|
Example:
|
|
512
519
|
```python
|
|
513
|
-
from openaivec.
|
|
520
|
+
from openaivec._proxy import BatchingMapProxy
|
|
514
521
|
|
|
515
522
|
# Create a shared cache with custom batch size
|
|
516
523
|
shared_cache = BatchingMapProxy(batch_size=64)
|
|
@@ -607,7 +614,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
607
614
|
|
|
608
615
|
Example:
|
|
609
616
|
```python
|
|
610
|
-
from openaivec.
|
|
617
|
+
from openaivec._model import PreparedTask
|
|
611
618
|
|
|
612
619
|
# Assume you have a prepared task for data analysis
|
|
613
620
|
analysis_task = PreparedTask(...)
|
|
@@ -770,7 +777,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
770
777
|
|
|
771
778
|
Example:
|
|
772
779
|
```python
|
|
773
|
-
from openaivec.
|
|
780
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
774
781
|
|
|
775
782
|
# Create a shared cache with custom batch size and concurrency
|
|
776
783
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -822,7 +829,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
822
829
|
|
|
823
830
|
Example:
|
|
824
831
|
```python
|
|
825
|
-
from openaivec.
|
|
832
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
826
833
|
import numpy as np
|
|
827
834
|
|
|
828
835
|
# Create a shared cache with custom batch size and concurrency
|
|
@@ -878,8 +885,8 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
878
885
|
|
|
879
886
|
Example:
|
|
880
887
|
```python
|
|
881
|
-
from openaivec.
|
|
882
|
-
from openaivec.
|
|
888
|
+
from openaivec._model import PreparedTask
|
|
889
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
883
890
|
|
|
884
891
|
# Create a shared cache with custom batch size and concurrency
|
|
885
892
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -1027,7 +1034,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1027
1034
|
|
|
1028
1035
|
Example:
|
|
1029
1036
|
```python
|
|
1030
|
-
from openaivec.
|
|
1037
|
+
from openaivec._model import PreparedTask
|
|
1031
1038
|
|
|
1032
1039
|
# Assume you have a prepared task for sentiment analysis
|
|
1033
1040
|
sentiment_task = PreparedTask(...)
|
|
@@ -1110,7 +1117,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1110
1117
|
|
|
1111
1118
|
Example:
|
|
1112
1119
|
```python
|
|
1113
|
-
from openaivec.
|
|
1120
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
1114
1121
|
|
|
1115
1122
|
# Create a shared cache with custom batch size and concurrency
|
|
1116
1123
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -1224,7 +1231,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1224
1231
|
|
|
1225
1232
|
Example:
|
|
1226
1233
|
```python
|
|
1227
|
-
from openaivec.
|
|
1234
|
+
from openaivec._model import PreparedTask
|
|
1228
1235
|
|
|
1229
1236
|
# Assume you have a prepared task for data analysis
|
|
1230
1237
|
analysis_task = PreparedTask(...)
|
|
@@ -12,6 +12,15 @@ improved performance in I/O-bound operations.
|
|
|
12
12
|
automatically cache duplicate inputs within each partition, significantly reducing
|
|
13
13
|
API calls and costs when processing datasets with overlapping content.
|
|
14
14
|
|
|
15
|
+
__all__ = [
|
|
16
|
+
"count_tokens_udf",
|
|
17
|
+
"embeddings_udf",
|
|
18
|
+
"responses_udf",
|
|
19
|
+
"similarity_udf",
|
|
20
|
+
"split_to_chunks_udf",
|
|
21
|
+
"task_udf",
|
|
22
|
+
]
|
|
23
|
+
|
|
15
24
|
## Setup
|
|
16
25
|
|
|
17
26
|
First, obtain a Spark session and configure authentication:
|
|
@@ -127,10 +136,10 @@ from pyspark.sql.udf import UserDefinedFunction
|
|
|
127
136
|
from typing_extensions import Literal
|
|
128
137
|
|
|
129
138
|
from openaivec import pandas_ext
|
|
130
|
-
from openaivec.
|
|
131
|
-
from openaivec.
|
|
132
|
-
from openaivec.
|
|
133
|
-
from openaivec.
|
|
139
|
+
from openaivec._model import PreparedTask, ResponseFormat
|
|
140
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
141
|
+
from openaivec._serialize import deserialize_base_model, serialize_base_model
|
|
142
|
+
from openaivec._util import TextChunker
|
|
134
143
|
|
|
135
144
|
__all__ = [
|
|
136
145
|
"responses_udf",
|
|
@@ -32,7 +32,7 @@ Specialized tasks for customer service operations:
|
|
|
32
32
|
### Quick Start with Default Tasks
|
|
33
33
|
```python
|
|
34
34
|
from openai import OpenAI
|
|
35
|
-
from openaivec.
|
|
35
|
+
from openaivec._responses import BatchResponses
|
|
36
36
|
from openaivec.task import nlp, customer_support
|
|
37
37
|
|
|
38
38
|
client = OpenAI()
|
{openaivec-0.13.6 → openaivec-0.14.0}/src/openaivec/task/customer_support/customer_sentiment.py
RENAMED
|
@@ -9,7 +9,7 @@ Example:
|
|
|
9
9
|
|
|
10
10
|
```python
|
|
11
11
|
from openai import OpenAI
|
|
12
|
-
from openaivec.
|
|
12
|
+
from openaivec._responses import BatchResponses
|
|
13
13
|
from openaivec.task import customer_support
|
|
14
14
|
|
|
15
15
|
client = OpenAI()
|
|
@@ -65,7 +65,7 @@ from typing import List, Literal
|
|
|
65
65
|
|
|
66
66
|
from pydantic import BaseModel, Field
|
|
67
67
|
|
|
68
|
-
from openaivec.
|
|
68
|
+
from openaivec._model import PreparedTask
|
|
69
69
|
|
|
70
70
|
__all__ = ["customer_sentiment"]
|
|
71
71
|
|