openaivec 0.13.7__tar.gz → 0.14.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.13.7 → openaivec-0.14.1}/.github/copilot-instructions.md +41 -32
- {openaivec-0.13.7 → openaivec-0.14.1}/PKG-INFO +5 -5
- {openaivec-0.13.7 → openaivec-0.14.1}/README.md +4 -4
- openaivec-0.14.1/docs/api/main.md +19 -0
- openaivec-0.14.1/docs/api/pandas_ext.md +3 -0
- openaivec-0.14.1/docs/api/spark.md +3 -0
- openaivec-0.14.1/docs/api/task.md +3 -0
- openaivec-0.14.1/docs/api/tasks/customer_support/customer_sentiment.md +3 -0
- openaivec-0.14.1/docs/api/tasks/customer_support/inquiry_classification.md +3 -0
- openaivec-0.14.1/docs/api/tasks/customer_support/inquiry_summary.md +3 -0
- openaivec-0.14.1/docs/api/tasks/customer_support/intent_analysis.md +3 -0
- openaivec-0.14.1/docs/api/tasks/customer_support/response_suggestion.md +3 -0
- openaivec-0.14.1/docs/api/tasks/customer_support/urgency_analysis.md +3 -0
- openaivec-0.14.1/docs/api/tasks/nlp/dependency_parsing.md +3 -0
- openaivec-0.14.1/docs/api/tasks/nlp/keyword_extraction.md +3 -0
- openaivec-0.14.1/docs/api/tasks/nlp/morphological_analysis.md +3 -0
- openaivec-0.14.1/docs/api/tasks/nlp/named_entity_recognition.md +3 -0
- openaivec-0.14.1/docs/api/tasks/nlp/sentiment_analysis.md +3 -0
- openaivec-0.14.1/docs/api/tasks/nlp/translation.md +3 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/docs/index.md +2 -4
- {openaivec-0.13.7 → openaivec-0.14.1}/mkdocs.yml +20 -7
- openaivec-0.14.1/src/openaivec/__init__.py +13 -0
- openaivec-0.13.7/src/openaivec/embeddings.py → openaivec-0.14.1/src/openaivec/_embeddings.py +3 -3
- openaivec-0.13.7/src/openaivec/prompt.py → openaivec-0.14.1/src/openaivec/_prompt.py +2 -2
- openaivec-0.13.7/src/openaivec/provider.py → openaivec-0.14.1/src/openaivec/_provider.py +3 -3
- openaivec-0.13.7/src/openaivec/proxy.py → openaivec-0.14.1/src/openaivec/_proxy.py +1 -1
- openaivec-0.13.7/src/openaivec/responses.py → openaivec-0.14.1/src/openaivec/_responses.py +4 -4
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/pandas_ext.py +18 -18
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/spark.py +4 -4
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/__init__.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/customer_sentiment.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/inquiry_classification.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/inquiry_summary.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/intent_analysis.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/response_suggestion.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/urgency_analysis.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/nlp/dependency_parsing.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/nlp/keyword_extraction.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/nlp/morphological_analysis.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/nlp/named_entity_recognition.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/nlp/sentiment_analysis.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/nlp/translation.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/table/fillna.py +3 -3
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_di.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_embeddings.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_optimize.py +2 -2
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_pandas_ext.py +6 -6
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_prompt.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_provider.py +3 -3
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_proxy.py +22 -22
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_proxy_suggester.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_responses.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_serialize.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_spark.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_task.py +1 -1
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/test_util.py +1 -1
- openaivec-0.13.7/docs/api/di.md +0 -15
- openaivec-0.13.7/docs/api/embeddings.md +0 -15
- openaivec-0.13.7/docs/api/pandas_ext.md +0 -15
- openaivec-0.13.7/docs/api/prompt.md +0 -15
- openaivec-0.13.7/docs/api/proxy.md +0 -102
- openaivec-0.13.7/docs/api/responses.md +0 -15
- openaivec-0.13.7/docs/api/spark.md +0 -15
- openaivec-0.13.7/docs/api/task.md +0 -19
- openaivec-0.13.7/docs/api/tasks/customer_support/customer_sentiment.md +0 -3
- openaivec-0.13.7/docs/api/tasks/customer_support/inquiry_classification.md +0 -3
- openaivec-0.13.7/docs/api/tasks/customer_support/inquiry_summary.md +0 -3
- openaivec-0.13.7/docs/api/tasks/customer_support/intent_analysis.md +0 -3
- openaivec-0.13.7/docs/api/tasks/customer_support/response_suggestion.md +0 -3
- openaivec-0.13.7/docs/api/tasks/customer_support/urgency_analysis.md +0 -3
- openaivec-0.13.7/docs/api/tasks/nlp/dependency_parsing.md +0 -15
- openaivec-0.13.7/docs/api/tasks/nlp/keyword_extraction.md +0 -15
- openaivec-0.13.7/docs/api/tasks/nlp/morphological_analysis.md +0 -15
- openaivec-0.13.7/docs/api/tasks/nlp/named_entity_recognition.md +0 -15
- openaivec-0.13.7/docs/api/tasks/nlp/sentiment_analysis.md +0 -15
- openaivec-0.13.7/docs/api/tasks/nlp/translation.md +0 -15
- openaivec-0.13.7/docs/api/util.md +0 -15
- openaivec-0.13.7/src/openaivec/__init__.py +0 -14
- {openaivec-0.13.7 → openaivec-0.14.1}/.env.example +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/.github/workflows/python-mkdocs.yml +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/.github/workflows/python-package.yml +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/.github/workflows/python-test.yml +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/.github/workflows/python-update.yml +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/.gitignore +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/LICENSE +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/SECURITY.md +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/SUPPORT.md +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/docs/robots.txt +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/pyproject.toml +0 -0
- /openaivec-0.13.7/src/openaivec/di.py → /openaivec-0.14.1/src/openaivec/_di.py +0 -0
- /openaivec-0.13.7/src/openaivec/log.py → /openaivec-0.14.1/src/openaivec/_log.py +0 -0
- /openaivec-0.13.7/src/openaivec/model.py → /openaivec-0.14.1/src/openaivec/_model.py +0 -0
- /openaivec-0.13.7/src/openaivec/optimize.py → /openaivec-0.14.1/src/openaivec/_optimize.py +0 -0
- /openaivec-0.13.7/src/openaivec/serialize.py → /openaivec-0.14.1/src/openaivec/_serialize.py +0 -0
- /openaivec-0.13.7/src/openaivec/util.py → /openaivec-0.14.1/src/openaivec/_util.py +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/tests/__init__.py +0 -0
- {openaivec-0.13.7 → openaivec-0.14.1}/uv.lock +0 -0
|
@@ -13,37 +13,40 @@ This repository-wide guide tells GitHub Copilot how to propose code that fits ou
|
|
|
13
13
|
|
|
14
14
|
## Architecture and roles
|
|
15
15
|
|
|
16
|
-
- `src/openaivec/
|
|
16
|
+
- `src/openaivec/_proxy.py` (internal)
|
|
17
17
|
- Core batching, deduplication, order preservation, and caching
|
|
18
18
|
- `BatchingMapProxy[S, T]` (sync) / `AsyncBatchingMapProxy[S, T]` (async)
|
|
19
19
|
- The map_func contract is strict: return a list of the same length and order as the inputs
|
|
20
20
|
- Progress bars only in notebook environments via `tqdm.auto`, gated by `show_progress=True`
|
|
21
|
-
- `src/openaivec/
|
|
21
|
+
- `src/openaivec/_responses.py` (internal)
|
|
22
22
|
- Batched wrapper over OpenAI Responses JSON-mode API
|
|
23
23
|
- `BatchResponses` / `AsyncBatchResponses` use the proxy internally
|
|
24
24
|
- Retries via `backoff`/`backoff_async` for transient errors (RateLimit, 5xx)
|
|
25
25
|
- Reasoning models (o1/o3 family) must use `temperature=None`; helpful guidance on errors
|
|
26
|
-
- `src/openaivec/
|
|
26
|
+
- `src/openaivec/_embeddings.py` (internal)
|
|
27
27
|
- Batched embeddings (sync/async)
|
|
28
|
-
- `src/openaivec/pandas_ext.py`
|
|
28
|
+
- `src/openaivec/pandas_ext.py` (public)
|
|
29
29
|
- `Series.ai` / `Series.aio` entry points for responses/embeddings
|
|
30
|
-
- Uses DI container (`
|
|
30
|
+
- Uses DI container (`_provider.CONTAINER`) to get client and model names
|
|
31
31
|
- Supports batch size, progress, and cache sharing (`*_with_cache`)
|
|
32
|
-
- `src/openaivec/spark.py`
|
|
32
|
+
- `src/openaivec/spark.py` (public)
|
|
33
33
|
- UDF builders: `responses_udf` / `task_udf` / `embeddings_udf` / `count_tokens_udf` / `split_to_chunks_udf`
|
|
34
34
|
- Per-partition duplicate caching to reduce API calls
|
|
35
35
|
- Pydantic → Spark StructType schema conversion
|
|
36
|
-
- `src/openaivec/
|
|
36
|
+
- `src/openaivec/_provider.py` (internal)
|
|
37
37
|
- DI container and automatic OpenAI/Azure OpenAI client provisioning
|
|
38
|
-
- Warns if Azure base URL isn
|
|
39
|
-
- `src/openaivec/
|
|
38
|
+
- Warns if Azure base URL isn't v1 format
|
|
39
|
+
- `src/openaivec/_util.py` (internal)
|
|
40
40
|
- `backoff` / `backoff_async` and `TextChunker`
|
|
41
|
-
- Additional modules
|
|
42
|
-
- `src/openaivec/
|
|
43
|
-
- `src/openaivec/
|
|
44
|
-
- `src/openaivec/
|
|
45
|
-
- `src/openaivec/
|
|
46
|
-
- `src/openaivec/task
|
|
41
|
+
- Additional internal modules
|
|
42
|
+
- `src/openaivec/_di.py`: lightweight DI container
|
|
43
|
+
- `src/openaivec/_log.py`: logging/observe helpers
|
|
44
|
+
- `src/openaivec/_prompt.py`: few-shot prompt building
|
|
45
|
+
- `src/openaivec/_serialize.py`: Pydantic schema (de)serialization
|
|
46
|
+
- `src/openaivec/_model.py`: task configuration models
|
|
47
|
+
- `src/openaivec/_optimize.py`: performance optimization
|
|
48
|
+
- `src/openaivec/task/` (public)
|
|
49
|
+
- Pre-built, structured task library
|
|
47
50
|
|
|
48
51
|
## Dev commands (uv)
|
|
49
52
|
|
|
@@ -76,17 +79,17 @@ uv run mkdocs serve
|
|
|
76
79
|
|
|
77
80
|
## API contracts and critical rules
|
|
78
81
|
|
|
79
|
-
- Proxy (BatchingMapProxy / AsyncBatchingMapProxy)
|
|
82
|
+
- Proxy (`_proxy.py` - BatchingMapProxy / AsyncBatchingMapProxy)
|
|
80
83
|
- map_func must return a list with the same length and order as inputs; on mismatch, release events and raise ValueError
|
|
81
84
|
- Inputs are de-duplicated while preserving first-occurrence order; outputs are restored to the original order
|
|
82
85
|
- Progress is only shown in notebooks when `show_progress=True`
|
|
83
86
|
- Async version enforces `max_concurrency` via `asyncio.Semaphore`
|
|
84
|
-
- Responses
|
|
87
|
+
- Responses (`_responses.py`)
|
|
85
88
|
- Use OpenAI Responses JSON mode (`responses.parse`)
|
|
86
89
|
- For reasoning models (o1/o3 families), you MUST set `temperature=None`; helpful error messaging is built-in
|
|
87
90
|
- Strongly prefer structured outputs with Pydantic models
|
|
88
91
|
- Retries with exponential backoff for RateLimit/5xx
|
|
89
|
-
- Embeddings
|
|
92
|
+
- Embeddings (`_embeddings.py`)
|
|
90
93
|
- Return NumPy float32 arrays
|
|
91
94
|
- pandas extensions
|
|
92
95
|
- `.ai.responses` / `.ai.embeddings` strictly preserve Series index and length
|
|
@@ -97,9 +100,9 @@ uv run mkdocs serve
|
|
|
97
100
|
- Convert Pydantic models to Spark schemas; treat Enum/Literal as strings
|
|
98
101
|
- Reasoning models require `temperature=None`
|
|
99
102
|
- Provide token counting and text chunking helpers
|
|
100
|
-
- Provider/DI and Azure
|
|
103
|
+
- Provider/DI and Azure (`_provider.py` / `_di.py`)
|
|
101
104
|
- Auto-detect OpenAI vs Azure OpenAI from env vars
|
|
102
|
-
- Azure requires v1 base URL (warn otherwise) and uses deployment name as the
|
|
105
|
+
- Azure requires v1 base URL (warn otherwise) and uses deployment name as the "model"
|
|
103
106
|
|
|
104
107
|
## Preferred patterns (Do) and Avoid (Don’t)
|
|
105
108
|
|
|
@@ -141,20 +144,26 @@ Don’t
|
|
|
141
144
|
## Package Visibility Guidelines (`__all__`)
|
|
142
145
|
|
|
143
146
|
### Public API Modules
|
|
144
|
-
These modules are part of the public API and
|
|
145
|
-
|
|
146
|
-
- `
|
|
147
|
-
- `
|
|
148
|
-
- `prompt.py` - Few-shot prompt building
|
|
149
|
-
- `responses.py` - Batch response processing
|
|
150
|
-
- `spark.py` - Apache Spark UDF builders
|
|
151
|
-
- `pandas_ext.py` - Pandas DataFrame/Series extensions
|
|
147
|
+
These modules are part of the public API and have appropriate `__all__` declarations:
|
|
148
|
+
|
|
149
|
+
- `pandas_ext.py` - Pandas DataFrame/Series extensions with `.ai/.aio` accessors
|
|
150
|
+
- `spark.py` - Apache Spark UDF builders for distributed processing
|
|
152
151
|
- `task/*` - All task modules (NLP, customer support, table operations)
|
|
153
152
|
|
|
154
|
-
### Internal Modules
|
|
155
|
-
These modules are for internal use only and
|
|
153
|
+
### Internal Modules (underscore-prefixed)
|
|
154
|
+
These modules are for internal use only and have `__all__ = []`:
|
|
155
|
+
|
|
156
|
+
- `_embeddings.py` - Batch embedding processing (internal implementation)
|
|
157
|
+
- `_model.py` - Task configuration models (internal types)
|
|
158
|
+
- `_prompt.py` - Few-shot prompt building (internal implementation)
|
|
159
|
+
- `_responses.py` - Batch response processing (internal implementation)
|
|
160
|
+
- `_util.py`, `_serialize.py`, `_log.py`, `_provider.py`, `_proxy.py`, `_di.py`, `_optimize.py` - Internal utilities
|
|
156
161
|
|
|
157
|
-
|
|
162
|
+
### Main Package API
|
|
163
|
+
Users access core functionality through `__init__.py` exports:
|
|
164
|
+
- `BatchResponses`, `AsyncBatchResponses`
|
|
165
|
+
- `BatchEmbeddings`, `AsyncBatchEmbeddings`
|
|
166
|
+
- `PreparedTask`, `FewShotPromptBuilder`
|
|
158
167
|
|
|
159
168
|
### `__all__` Best Practices
|
|
160
169
|
|
|
@@ -202,7 +211,7 @@ These modules are for internal use only and should have `__all__ = []`:
|
|
|
202
211
|
- pandas `.ai` with shared cache
|
|
203
212
|
|
|
204
213
|
```python
|
|
205
|
-
from openaivec.
|
|
214
|
+
from openaivec._proxy import BatchingMapProxy
|
|
206
215
|
|
|
207
216
|
shared = BatchingMapProxy[str, str](batch_size=64)
|
|
208
217
|
df["text"].ai.responses_with_cache("instructions", cache=shared)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openaivec
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.14.1
|
|
4
4
|
Summary: Generative mutation for tabular calculation
|
|
5
5
|
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
6
|
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
@@ -98,7 +98,7 @@ survey_responses.assign(
|
|
|
98
98
|
).ai.extract("structured") # Auto-expands to columns
|
|
99
99
|
```
|
|
100
100
|
|
|
101
|
-
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/)**
|
|
101
|
+
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
102
102
|
|
|
103
103
|
# Overview
|
|
104
104
|
|
|
@@ -514,7 +514,7 @@ return rendered prompt with XML format.
|
|
|
514
514
|
Here is an example:
|
|
515
515
|
|
|
516
516
|
```python
|
|
517
|
-
from openaivec
|
|
517
|
+
from openaivec import FewShotPromptBuilder
|
|
518
518
|
|
|
519
519
|
prompt: str = (
|
|
520
520
|
FewShotPromptBuilder()
|
|
@@ -577,7 +577,7 @@ Here is an example:
|
|
|
577
577
|
|
|
578
578
|
```python
|
|
579
579
|
from openai import OpenAI
|
|
580
|
-
from openaivec
|
|
580
|
+
from openaivec import FewShotPromptBuilder
|
|
581
581
|
|
|
582
582
|
client = OpenAI(...)
|
|
583
583
|
model_name = "<your-model-name>"
|
|
@@ -746,7 +746,7 @@ uv run ruff check . --fix
|
|
|
746
746
|
📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
|
|
747
747
|
📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
|
|
748
748
|
📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
|
|
749
|
-
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/)** - Complete collection of tutorials and use cases
|
|
749
|
+
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/pandas/)** - Complete collection of tutorials and use cases
|
|
750
750
|
|
|
751
751
|
## Community
|
|
752
752
|
|
|
@@ -72,7 +72,7 @@ survey_responses.assign(
|
|
|
72
72
|
).ai.extract("structured") # Auto-expands to columns
|
|
73
73
|
```
|
|
74
74
|
|
|
75
|
-
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/)**
|
|
75
|
+
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
76
76
|
|
|
77
77
|
# Overview
|
|
78
78
|
|
|
@@ -488,7 +488,7 @@ return rendered prompt with XML format.
|
|
|
488
488
|
Here is an example:
|
|
489
489
|
|
|
490
490
|
```python
|
|
491
|
-
from openaivec
|
|
491
|
+
from openaivec import FewShotPromptBuilder
|
|
492
492
|
|
|
493
493
|
prompt: str = (
|
|
494
494
|
FewShotPromptBuilder()
|
|
@@ -551,7 +551,7 @@ Here is an example:
|
|
|
551
551
|
|
|
552
552
|
```python
|
|
553
553
|
from openai import OpenAI
|
|
554
|
-
from openaivec
|
|
554
|
+
from openaivec import FewShotPromptBuilder
|
|
555
555
|
|
|
556
556
|
client = OpenAI(...)
|
|
557
557
|
model_name = "<your-model-name>"
|
|
@@ -720,7 +720,7 @@ uv run ruff check . --fix
|
|
|
720
720
|
📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
|
|
721
721
|
📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
|
|
722
722
|
📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
|
|
723
|
-
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/)** - Complete collection of tutorials and use cases
|
|
723
|
+
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/pandas/)** - Complete collection of tutorials and use cases
|
|
724
724
|
|
|
725
725
|
## Community
|
|
726
726
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Main Package API
|
|
2
|
+
|
|
3
|
+
The main `openaivec` package provides the core classes for AI-powered data processing.
|
|
4
|
+
|
|
5
|
+
## Core Classes
|
|
6
|
+
|
|
7
|
+
All core functionality is accessible through the main package imports:
|
|
8
|
+
|
|
9
|
+
::: openaivec.BatchResponses
|
|
10
|
+
|
|
11
|
+
::: openaivec.AsyncBatchResponses
|
|
12
|
+
|
|
13
|
+
::: openaivec.BatchEmbeddings
|
|
14
|
+
|
|
15
|
+
::: openaivec.AsyncBatchEmbeddings
|
|
16
|
+
|
|
17
|
+
## Prompt Building
|
|
18
|
+
|
|
19
|
+
::: openaivec.FewShotPromptBuilder
|
|
@@ -67,12 +67,10 @@ Get started with these comprehensive examples:
|
|
|
67
67
|
|
|
68
68
|
Detailed documentation for all components:
|
|
69
69
|
|
|
70
|
+
🔗 **[Main Package](api/main.md)** - Core classes (BatchResponses, BatchEmbeddings, FewShotPromptBuilder)
|
|
70
71
|
🔗 **[pandas_ext](api/pandas_ext.md)** - Pandas Series and DataFrame extensions
|
|
71
72
|
🔗 **[spark](api/spark.md)** - Apache Spark UDF builders
|
|
72
|
-
🔗 **[
|
|
73
|
-
🔗 **[embeddings](api/embeddings.md)** - Batch embedding generation
|
|
74
|
-
🔗 **[prompt](api/prompt.md)** - Few-shot prompt building
|
|
75
|
-
🔗 **[util](api/util.md)** - Utility functions and helpers
|
|
73
|
+
🔗 **[task](api/task.md)** - Pre-built task modules for NLP and customer support
|
|
76
74
|
|
|
77
75
|
## Quick Start
|
|
78
76
|
|
|
@@ -63,13 +63,9 @@ nav:
|
|
|
63
63
|
- FAQ Generation: examples/generate_faq.ipynb
|
|
64
64
|
- Token Count and Processing Time: examples/batch_size.ipynb
|
|
65
65
|
- API Reference:
|
|
66
|
-
-
|
|
66
|
+
- Main Package: api/main.md
|
|
67
67
|
- pandas_ext: api/pandas_ext.md
|
|
68
68
|
- spark: api/spark.md
|
|
69
|
-
- prompt: api/prompt.md
|
|
70
|
-
- util: api/util.md
|
|
71
|
-
- responses: api/responses.md
|
|
72
|
-
- embeddings: api/embeddings.md
|
|
73
69
|
- task: api/task.md
|
|
74
70
|
- Task Domains:
|
|
75
71
|
- Natural Language Processing:
|
|
@@ -135,8 +131,25 @@ plugins:
|
|
|
135
131
|
python:
|
|
136
132
|
paths:
|
|
137
133
|
- src
|
|
138
|
-
|
|
139
|
-
|
|
134
|
+
options:
|
|
135
|
+
docstring_style: google
|
|
136
|
+
show_submodules: true
|
|
137
|
+
show_source: true
|
|
138
|
+
show_root_heading: true
|
|
139
|
+
show_root_toc_entry: true
|
|
140
|
+
heading_level: 2
|
|
141
|
+
members_order: source
|
|
142
|
+
show_signature_annotations: true
|
|
143
|
+
separate_signature: true
|
|
144
|
+
show_bases: true
|
|
145
|
+
show_docstring_parameters: true
|
|
146
|
+
show_docstring_returns: true
|
|
147
|
+
show_docstring_examples: true
|
|
148
|
+
show_category_heading: true
|
|
149
|
+
group_by_category: true
|
|
150
|
+
show_if_no_docstring: false
|
|
151
|
+
inherited_members: false
|
|
152
|
+
merge_init_into_class: true
|
|
140
153
|
|
|
141
154
|
markdown_extensions:
|
|
142
155
|
- abbr
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
2
|
+
from ._model import PreparedTask
|
|
3
|
+
from ._prompt import FewShotPromptBuilder
|
|
4
|
+
from ._responses import AsyncBatchResponses, BatchResponses
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AsyncBatchEmbeddings",
|
|
8
|
+
"AsyncBatchResponses",
|
|
9
|
+
"BatchEmbeddings",
|
|
10
|
+
"BatchResponses",
|
|
11
|
+
"FewShotPromptBuilder",
|
|
12
|
+
"PreparedTask",
|
|
13
|
+
]
|
openaivec-0.13.7/src/openaivec/embeddings.py → openaivec-0.14.1/src/openaivec/_embeddings.py
RENAMED
|
@@ -6,9 +6,9 @@ import numpy as np
|
|
|
6
6
|
from numpy.typing import NDArray
|
|
7
7
|
from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
|
|
8
8
|
|
|
9
|
-
from openaivec.
|
|
10
|
-
from openaivec.
|
|
11
|
-
from openaivec.
|
|
9
|
+
from openaivec._log import observe
|
|
10
|
+
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
11
|
+
from openaivec._util import backoff, backoff_async
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
14
|
"BatchEmbeddings",
|
|
@@ -51,8 +51,8 @@ from openai import OpenAI
|
|
|
51
51
|
from openai.types.responses import ParsedResponse
|
|
52
52
|
from pydantic import BaseModel
|
|
53
53
|
|
|
54
|
-
from openaivec.
|
|
55
|
-
from openaivec.
|
|
54
|
+
from openaivec._model import ResponsesModelName
|
|
55
|
+
from openaivec._provider import CONTAINER
|
|
56
56
|
|
|
57
57
|
__all__ = [
|
|
58
58
|
"FewShotPrompt",
|
|
@@ -4,8 +4,8 @@ import warnings
|
|
|
4
4
|
import tiktoken
|
|
5
5
|
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
|
|
6
6
|
|
|
7
|
-
from openaivec import di
|
|
8
|
-
from openaivec.
|
|
7
|
+
from openaivec import _di as di
|
|
8
|
+
from openaivec._model import (
|
|
9
9
|
AzureOpenAIAPIKey,
|
|
10
10
|
AzureOpenAIAPIVersion,
|
|
11
11
|
AzureOpenAIBaseURL,
|
|
@@ -13,7 +13,7 @@ from openaivec.model import (
|
|
|
13
13
|
OpenAIAPIKey,
|
|
14
14
|
ResponsesModelName,
|
|
15
15
|
)
|
|
16
|
-
from openaivec.
|
|
16
|
+
from openaivec._util import TextChunker
|
|
17
17
|
|
|
18
18
|
__all__ = []
|
|
19
19
|
|
|
@@ -4,7 +4,7 @@ from collections.abc import Hashable
|
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from typing import Any, Awaitable, Callable, Dict, Generic, List, TypeVar
|
|
6
6
|
|
|
7
|
-
from openaivec.
|
|
7
|
+
from openaivec._optimize import BatchSizeSuggester
|
|
8
8
|
|
|
9
9
|
__all__ = []
|
|
10
10
|
|
|
@@ -7,10 +7,10 @@ from openai import AsyncOpenAI, BadRequestError, InternalServerError, OpenAI, Ra
|
|
|
7
7
|
from openai.types.responses import ParsedResponse
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
10
|
-
from openaivec.
|
|
11
|
-
from openaivec.
|
|
12
|
-
from openaivec.
|
|
13
|
-
from openaivec.
|
|
10
|
+
from openaivec._log import observe
|
|
11
|
+
from openaivec._model import PreparedTask, ResponseFormat
|
|
12
|
+
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
13
|
+
from openaivec._util import backoff, backoff_async
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
16
|
"BatchResponses",
|
|
@@ -57,11 +57,11 @@ __all__ = [
|
|
|
57
57
|
]
|
|
58
58
|
from pydantic import BaseModel
|
|
59
59
|
|
|
60
|
-
from openaivec.
|
|
61
|
-
from openaivec.
|
|
62
|
-
from openaivec.
|
|
63
|
-
from openaivec.
|
|
64
|
-
from openaivec.
|
|
60
|
+
from openaivec._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
61
|
+
from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
|
|
62
|
+
from openaivec._provider import CONTAINER, _check_azure_v1_api_url
|
|
63
|
+
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
64
|
+
from openaivec._responses import AsyncBatchResponses, BatchResponses
|
|
65
65
|
from openaivec.task.table import FillNaResponse, fillna
|
|
66
66
|
|
|
67
67
|
__all__ = [
|
|
@@ -199,7 +199,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
199
199
|
|
|
200
200
|
Example:
|
|
201
201
|
```python
|
|
202
|
-
from openaivec.
|
|
202
|
+
from openaivec._proxy import BatchingMapProxy
|
|
203
203
|
import numpy as np
|
|
204
204
|
|
|
205
205
|
# Create a shared cache with custom batch size
|
|
@@ -297,8 +297,8 @@ class OpenAIVecSeriesAccessor:
|
|
|
297
297
|
|
|
298
298
|
Example:
|
|
299
299
|
```python
|
|
300
|
-
from openaivec.
|
|
301
|
-
from openaivec.
|
|
300
|
+
from openaivec._model import PreparedTask
|
|
301
|
+
from openaivec._proxy import BatchingMapProxy
|
|
302
302
|
|
|
303
303
|
# Create a shared cache with custom batch size
|
|
304
304
|
shared_cache = BatchingMapProxy(batch_size=64)
|
|
@@ -330,7 +330,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
330
330
|
|
|
331
331
|
Example:
|
|
332
332
|
```python
|
|
333
|
-
from openaivec.
|
|
333
|
+
from openaivec._model import PreparedTask
|
|
334
334
|
|
|
335
335
|
# Assume you have a prepared task for sentiment analysis
|
|
336
336
|
sentiment_task = PreparedTask(...)
|
|
@@ -517,7 +517,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
517
517
|
|
|
518
518
|
Example:
|
|
519
519
|
```python
|
|
520
|
-
from openaivec.
|
|
520
|
+
from openaivec._proxy import BatchingMapProxy
|
|
521
521
|
|
|
522
522
|
# Create a shared cache with custom batch size
|
|
523
523
|
shared_cache = BatchingMapProxy(batch_size=64)
|
|
@@ -614,7 +614,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
614
614
|
|
|
615
615
|
Example:
|
|
616
616
|
```python
|
|
617
|
-
from openaivec.
|
|
617
|
+
from openaivec._model import PreparedTask
|
|
618
618
|
|
|
619
619
|
# Assume you have a prepared task for data analysis
|
|
620
620
|
analysis_task = PreparedTask(...)
|
|
@@ -777,7 +777,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
777
777
|
|
|
778
778
|
Example:
|
|
779
779
|
```python
|
|
780
|
-
from openaivec.
|
|
780
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
781
781
|
|
|
782
782
|
# Create a shared cache with custom batch size and concurrency
|
|
783
783
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -829,7 +829,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
829
829
|
|
|
830
830
|
Example:
|
|
831
831
|
```python
|
|
832
|
-
from openaivec.
|
|
832
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
833
833
|
import numpy as np
|
|
834
834
|
|
|
835
835
|
# Create a shared cache with custom batch size and concurrency
|
|
@@ -885,8 +885,8 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
885
885
|
|
|
886
886
|
Example:
|
|
887
887
|
```python
|
|
888
|
-
from openaivec.
|
|
889
|
-
from openaivec.
|
|
888
|
+
from openaivec._model import PreparedTask
|
|
889
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
890
890
|
|
|
891
891
|
# Create a shared cache with custom batch size and concurrency
|
|
892
892
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -1034,7 +1034,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1034
1034
|
|
|
1035
1035
|
Example:
|
|
1036
1036
|
```python
|
|
1037
|
-
from openaivec.
|
|
1037
|
+
from openaivec._model import PreparedTask
|
|
1038
1038
|
|
|
1039
1039
|
# Assume you have a prepared task for sentiment analysis
|
|
1040
1040
|
sentiment_task = PreparedTask(...)
|
|
@@ -1117,7 +1117,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1117
1117
|
|
|
1118
1118
|
Example:
|
|
1119
1119
|
```python
|
|
1120
|
-
from openaivec.
|
|
1120
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
1121
1121
|
|
|
1122
1122
|
# Create a shared cache with custom batch size and concurrency
|
|
1123
1123
|
shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
|
|
@@ -1231,7 +1231,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1231
1231
|
|
|
1232
1232
|
Example:
|
|
1233
1233
|
```python
|
|
1234
|
-
from openaivec.
|
|
1234
|
+
from openaivec._model import PreparedTask
|
|
1235
1235
|
|
|
1236
1236
|
# Assume you have a prepared task for data analysis
|
|
1237
1237
|
analysis_task = PreparedTask(...)
|
|
@@ -136,10 +136,10 @@ from pyspark.sql.udf import UserDefinedFunction
|
|
|
136
136
|
from typing_extensions import Literal
|
|
137
137
|
|
|
138
138
|
from openaivec import pandas_ext
|
|
139
|
-
from openaivec.
|
|
140
|
-
from openaivec.
|
|
141
|
-
from openaivec.
|
|
142
|
-
from openaivec.
|
|
139
|
+
from openaivec._model import PreparedTask, ResponseFormat
|
|
140
|
+
from openaivec._proxy import AsyncBatchingMapProxy
|
|
141
|
+
from openaivec._serialize import deserialize_base_model, serialize_base_model
|
|
142
|
+
from openaivec._util import TextChunker
|
|
143
143
|
|
|
144
144
|
__all__ = [
|
|
145
145
|
"responses_udf",
|
|
@@ -32,7 +32,7 @@ Specialized tasks for customer service operations:
|
|
|
32
32
|
### Quick Start with Default Tasks
|
|
33
33
|
```python
|
|
34
34
|
from openai import OpenAI
|
|
35
|
-
from openaivec.
|
|
35
|
+
from openaivec._responses import BatchResponses
|
|
36
36
|
from openaivec.task import nlp, customer_support
|
|
37
37
|
|
|
38
38
|
client = OpenAI()
|
{openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/customer_sentiment.py
RENAMED
|
@@ -9,7 +9,7 @@ Example:
|
|
|
9
9
|
|
|
10
10
|
```python
|
|
11
11
|
from openai import OpenAI
|
|
12
|
-
from openaivec.
|
|
12
|
+
from openaivec._responses import BatchResponses
|
|
13
13
|
from openaivec.task import customer_support
|
|
14
14
|
|
|
15
15
|
client = OpenAI()
|
|
@@ -65,7 +65,7 @@ from typing import List, Literal
|
|
|
65
65
|
|
|
66
66
|
from pydantic import BaseModel, Field
|
|
67
67
|
|
|
68
|
-
from openaivec.
|
|
68
|
+
from openaivec._model import PreparedTask
|
|
69
69
|
|
|
70
70
|
__all__ = ["customer_sentiment"]
|
|
71
71
|
|
{openaivec-0.13.7 → openaivec-0.14.1}/src/openaivec/task/customer_support/inquiry_classification.py
RENAMED
|
@@ -8,7 +8,7 @@ Example:
|
|
|
8
8
|
|
|
9
9
|
```python
|
|
10
10
|
from openai import OpenAI
|
|
11
|
-
from openaivec.
|
|
11
|
+
from openaivec._responses import BatchResponses
|
|
12
12
|
from openaivec.task import customer_support
|
|
13
13
|
|
|
14
14
|
client = OpenAI()
|
|
@@ -96,7 +96,7 @@ from typing import Dict, List, Literal
|
|
|
96
96
|
|
|
97
97
|
from pydantic import BaseModel, Field
|
|
98
98
|
|
|
99
|
-
from openaivec.
|
|
99
|
+
from openaivec._model import PreparedTask
|
|
100
100
|
|
|
101
101
|
__all__ = ["inquiry_classification"]
|
|
102
102
|
|