openaivec 0.13.0__tar.gz → 0.13.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openaivec-0.13.0 → openaivec-0.13.2}/PKG-INFO +39 -7
- {openaivec-0.13.0 → openaivec-0.13.2}/README.md +38 -6
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/embeddings.py +8 -8
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/model.py +37 -1
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/pandas_ext.py +30 -17
- openaivec-0.13.2/src/openaivec/provider.py +150 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/responses.py +79 -31
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/spark.py +15 -15
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/util.py +18 -12
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_provider.py +77 -22
- openaivec-0.13.2/tests/test_util.py +297 -0
- openaivec-0.13.0/src/openaivec/provider.py +0 -98
- openaivec-0.13.0/tests/test_util.py +0 -41
- {openaivec-0.13.0 → openaivec-0.13.2}/.env.example +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-mkdocs.yml +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-package.yml +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-test.yml +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-update.yml +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/.gitignore +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/CODE_OF_CONDUCT.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/LICENSE +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/SECURITY.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/SUPPORT.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/di.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/embeddings.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/pandas_ext.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/prompt.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/proxy.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/responses.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/spark.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/task.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/translation.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/util.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/index.md +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/docs/robots.txt +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/mkdocs.yml +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/pyproject.toml +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/__init__.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/di.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/log.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/prompt.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/proxy.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/serialize.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/__init__.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/__init__.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/__init__.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/translation.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/table/__init__.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/table/fillna.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/__init__.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_di.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_embeddings.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_pandas_ext.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_prompt.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_proxy.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_responses.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_serialize.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_spark.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_task.py +0 -0
- {openaivec-0.13.0 → openaivec-0.13.2}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openaivec
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.2
|
|
4
4
|
Summary: Generative mutation for tabular calculation
|
|
5
5
|
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
6
|
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
@@ -180,8 +180,8 @@ from openaivec import pandas_ext
|
|
|
180
180
|
os.environ["OPENAI_API_KEY"] = "your-api-key-here"
|
|
181
181
|
# Or for Azure OpenAI:
|
|
182
182
|
# os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-key"
|
|
183
|
-
# os.environ["
|
|
184
|
-
# os.environ["AZURE_OPENAI_API_VERSION"] = "
|
|
183
|
+
# os.environ["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
|
|
184
|
+
# os.environ["AZURE_OPENAI_API_VERSION"] = "preview"
|
|
185
185
|
|
|
186
186
|
# Authentication Option 2: Custom client (optional)
|
|
187
187
|
# from openai import OpenAI, AsyncOpenAI
|
|
@@ -190,6 +190,7 @@ os.environ["OPENAI_API_KEY"] = "your-api-key-here"
|
|
|
190
190
|
# pandas_ext.use_async(AsyncOpenAI())
|
|
191
191
|
|
|
192
192
|
# Configure model (optional - defaults to gpt-4.1-mini)
|
|
193
|
+
# For Azure OpenAI: use your deployment name, for OpenAI: use model name
|
|
193
194
|
pandas_ext.responses_model("gpt-4.1-mini")
|
|
194
195
|
|
|
195
196
|
# Create your data
|
|
@@ -211,6 +212,27 @@ result = df.assign(
|
|
|
211
212
|
|
|
212
213
|
📓 **[Interactive pandas examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
213
214
|
|
|
215
|
+
### Using with Reasoning Models
|
|
216
|
+
|
|
217
|
+
When using reasoning models (o1-preview, o1-mini, o3-mini, etc.), you must set `temperature=None` to avoid API errors:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
# For reasoning models like o1-preview, o1-mini, o3-mini
|
|
221
|
+
pandas_ext.responses_model("o1-mini") # Set your reasoning model
|
|
222
|
+
|
|
223
|
+
# MUST use temperature=None with reasoning models
|
|
224
|
+
result = df.assign(
|
|
225
|
+
analysis=lambda df: df.text.ai.responses(
|
|
226
|
+
"Analyze this text step by step",
|
|
227
|
+
temperature=None # Required for reasoning models
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
**Why this is needed**: Reasoning models don't support temperature parameters and will return an error if temperature is specified. The library automatically detects these errors and provides guidance on how to fix them.
|
|
233
|
+
|
|
234
|
+
**Reference**: [Azure OpenAI Reasoning Models](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning)
|
|
235
|
+
|
|
214
236
|
### Using Pre-configured Tasks
|
|
215
237
|
|
|
216
238
|
For common text processing operations, openaivec provides ready-to-use tasks that eliminate the need to write custom prompts:
|
|
@@ -322,7 +344,7 @@ sc.environment["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
|
|
|
322
344
|
|
|
323
345
|
# Option 2: Using Azure OpenAI
|
|
324
346
|
# sc.environment["AZURE_OPENAI_API_KEY"] = os.environ.get("AZURE_OPENAI_API_KEY")
|
|
325
|
-
# sc.environment["
|
|
347
|
+
# sc.environment["AZURE_OPENAI_BASE_URL"] = os.environ.get("AZURE_OPENAI_BASE_URL")
|
|
326
348
|
# sc.environment["AZURE_OPENAI_API_VERSION"] = os.environ.get("AZURE_OPENAI_API_VERSION")
|
|
327
349
|
```
|
|
328
350
|
|
|
@@ -380,6 +402,16 @@ spark.udf.register(
|
|
|
380
402
|
)
|
|
381
403
|
)
|
|
382
404
|
|
|
405
|
+
# --- Register UDF for Reasoning Models ---
|
|
406
|
+
# For reasoning models (o1-preview, o1-mini, o3, etc.), set temperature=None
|
|
407
|
+
spark.udf.register(
|
|
408
|
+
"reasoning_analysis",
|
|
409
|
+
responses_udf(
|
|
410
|
+
instructions="Analyze this step by step with detailed reasoning",
|
|
411
|
+
temperature=None # Required for reasoning models
|
|
412
|
+
)
|
|
413
|
+
)
|
|
414
|
+
|
|
383
415
|
```
|
|
384
416
|
|
|
385
417
|
You can now use these UDFs in Spark SQL:
|
|
@@ -666,15 +698,15 @@ steps:
|
|
|
666
698
|
|
|
667
699
|
# Configure Azure OpenAI authentication
|
|
668
700
|
sc.environment["AZURE_OPENAI_API_KEY"] = "<your-api-key>"
|
|
669
|
-
sc.environment["
|
|
670
|
-
sc.environment["AZURE_OPENAI_API_VERSION"] = "
|
|
701
|
+
sc.environment["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
|
|
702
|
+
sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
|
|
671
703
|
|
|
672
704
|
# Register UDFs
|
|
673
705
|
spark.udf.register(
|
|
674
706
|
"analyze_text",
|
|
675
707
|
responses_udf(
|
|
676
708
|
instructions="Analyze the sentiment of the text",
|
|
677
|
-
model_name="
|
|
709
|
+
model_name="gpt-4.1-mini" # Use your Azure deployment name here
|
|
678
710
|
)
|
|
679
711
|
)
|
|
680
712
|
```
|
|
@@ -156,8 +156,8 @@ from openaivec import pandas_ext
|
|
|
156
156
|
os.environ["OPENAI_API_KEY"] = "your-api-key-here"
|
|
157
157
|
# Or for Azure OpenAI:
|
|
158
158
|
# os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-key"
|
|
159
|
-
# os.environ["
|
|
160
|
-
# os.environ["AZURE_OPENAI_API_VERSION"] = "
|
|
159
|
+
# os.environ["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
|
|
160
|
+
# os.environ["AZURE_OPENAI_API_VERSION"] = "preview"
|
|
161
161
|
|
|
162
162
|
# Authentication Option 2: Custom client (optional)
|
|
163
163
|
# from openai import OpenAI, AsyncOpenAI
|
|
@@ -166,6 +166,7 @@ os.environ["OPENAI_API_KEY"] = "your-api-key-here"
|
|
|
166
166
|
# pandas_ext.use_async(AsyncOpenAI())
|
|
167
167
|
|
|
168
168
|
# Configure model (optional - defaults to gpt-4.1-mini)
|
|
169
|
+
# For Azure OpenAI: use your deployment name, for OpenAI: use model name
|
|
169
170
|
pandas_ext.responses_model("gpt-4.1-mini")
|
|
170
171
|
|
|
171
172
|
# Create your data
|
|
@@ -187,6 +188,27 @@ result = df.assign(
|
|
|
187
188
|
|
|
188
189
|
📓 **[Interactive pandas examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
189
190
|
|
|
191
|
+
### Using with Reasoning Models
|
|
192
|
+
|
|
193
|
+
When using reasoning models (o1-preview, o1-mini, o3-mini, etc.), you must set `temperature=None` to avoid API errors:
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
# For reasoning models like o1-preview, o1-mini, o3-mini
|
|
197
|
+
pandas_ext.responses_model("o1-mini") # Set your reasoning model
|
|
198
|
+
|
|
199
|
+
# MUST use temperature=None with reasoning models
|
|
200
|
+
result = df.assign(
|
|
201
|
+
analysis=lambda df: df.text.ai.responses(
|
|
202
|
+
"Analyze this text step by step",
|
|
203
|
+
temperature=None # Required for reasoning models
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Why this is needed**: Reasoning models don't support temperature parameters and will return an error if temperature is specified. The library automatically detects these errors and provides guidance on how to fix them.
|
|
209
|
+
|
|
210
|
+
**Reference**: [Azure OpenAI Reasoning Models](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning)
|
|
211
|
+
|
|
190
212
|
### Using Pre-configured Tasks
|
|
191
213
|
|
|
192
214
|
For common text processing operations, openaivec provides ready-to-use tasks that eliminate the need to write custom prompts:
|
|
@@ -298,7 +320,7 @@ sc.environment["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
|
|
|
298
320
|
|
|
299
321
|
# Option 2: Using Azure OpenAI
|
|
300
322
|
# sc.environment["AZURE_OPENAI_API_KEY"] = os.environ.get("AZURE_OPENAI_API_KEY")
|
|
301
|
-
# sc.environment["
|
|
323
|
+
# sc.environment["AZURE_OPENAI_BASE_URL"] = os.environ.get("AZURE_OPENAI_BASE_URL")
|
|
302
324
|
# sc.environment["AZURE_OPENAI_API_VERSION"] = os.environ.get("AZURE_OPENAI_API_VERSION")
|
|
303
325
|
```
|
|
304
326
|
|
|
@@ -356,6 +378,16 @@ spark.udf.register(
|
|
|
356
378
|
)
|
|
357
379
|
)
|
|
358
380
|
|
|
381
|
+
# --- Register UDF for Reasoning Models ---
|
|
382
|
+
# For reasoning models (o1-preview, o1-mini, o3, etc.), set temperature=None
|
|
383
|
+
spark.udf.register(
|
|
384
|
+
"reasoning_analysis",
|
|
385
|
+
responses_udf(
|
|
386
|
+
instructions="Analyze this step by step with detailed reasoning",
|
|
387
|
+
temperature=None # Required for reasoning models
|
|
388
|
+
)
|
|
389
|
+
)
|
|
390
|
+
|
|
359
391
|
```
|
|
360
392
|
|
|
361
393
|
You can now use these UDFs in Spark SQL:
|
|
@@ -642,15 +674,15 @@ steps:
|
|
|
642
674
|
|
|
643
675
|
# Configure Azure OpenAI authentication
|
|
644
676
|
sc.environment["AZURE_OPENAI_API_KEY"] = "<your-api-key>"
|
|
645
|
-
sc.environment["
|
|
646
|
-
sc.environment["AZURE_OPENAI_API_VERSION"] = "
|
|
677
|
+
sc.environment["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
|
|
678
|
+
sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
|
|
647
679
|
|
|
648
680
|
# Register UDFs
|
|
649
681
|
spark.udf.register(
|
|
650
682
|
"analyze_text",
|
|
651
683
|
responses_udf(
|
|
652
684
|
instructions="Analyze the sentiment of the text",
|
|
653
|
-
model_name="
|
|
685
|
+
model_name="gpt-4.1-mini" # Use your Azure deployment name here
|
|
654
686
|
)
|
|
655
687
|
)
|
|
656
688
|
```
|
|
@@ -4,7 +4,7 @@ from typing import List
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
from numpy.typing import NDArray
|
|
7
|
-
from openai import AsyncOpenAI, OpenAI, RateLimitError
|
|
7
|
+
from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
|
|
8
8
|
|
|
9
9
|
from .log import observe
|
|
10
10
|
from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
@@ -24,7 +24,7 @@ class BatchEmbeddings:
|
|
|
24
24
|
|
|
25
25
|
Attributes:
|
|
26
26
|
client (OpenAI): Configured OpenAI client.
|
|
27
|
-
model_name (str):
|
|
27
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name (e.g., ``"text-embedding-3-small"``).
|
|
28
28
|
cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
|
|
29
29
|
"""
|
|
30
30
|
|
|
@@ -38,7 +38,7 @@ class BatchEmbeddings:
|
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
40
|
client (OpenAI): OpenAI client.
|
|
41
|
-
model_name (str):
|
|
41
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
42
42
|
batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
|
|
43
43
|
|
|
44
44
|
Returns:
|
|
@@ -47,7 +47,7 @@ class BatchEmbeddings:
|
|
|
47
47
|
return cls(client=client, model_name=model_name, cache=BatchingMapProxy(batch_size=batch_size))
|
|
48
48
|
|
|
49
49
|
@observe(_LOGGER)
|
|
50
|
-
@backoff(
|
|
50
|
+
@backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
|
|
51
51
|
def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
|
|
52
52
|
"""Embed one minibatch of strings.
|
|
53
53
|
|
|
@@ -90,7 +90,7 @@ class AsyncBatchEmbeddings:
|
|
|
90
90
|
import asyncio
|
|
91
91
|
import numpy as np
|
|
92
92
|
from openai import AsyncOpenAI
|
|
93
|
-
|
|
93
|
+
from openaivec import AsyncBatchEmbeddings
|
|
94
94
|
|
|
95
95
|
# Assuming openai_async_client is an initialized AsyncOpenAI client
|
|
96
96
|
openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
|
|
@@ -119,7 +119,7 @@ class AsyncBatchEmbeddings:
|
|
|
119
119
|
|
|
120
120
|
Attributes:
|
|
121
121
|
client (AsyncOpenAI): Configured OpenAI async client.
|
|
122
|
-
model_name (str):
|
|
122
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
123
123
|
cache (AsyncBatchingMapProxy[str, NDArray[np.float32]]): Async batching proxy.
|
|
124
124
|
"""
|
|
125
125
|
|
|
@@ -141,7 +141,7 @@ class AsyncBatchEmbeddings:
|
|
|
141
141
|
|
|
142
142
|
Args:
|
|
143
143
|
client (AsyncOpenAI): OpenAI async client.
|
|
144
|
-
model_name (str):
|
|
144
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
145
145
|
batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
|
|
146
146
|
max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
|
|
147
147
|
|
|
@@ -155,7 +155,7 @@ class AsyncBatchEmbeddings:
|
|
|
155
155
|
)
|
|
156
156
|
|
|
157
157
|
@observe(_LOGGER)
|
|
158
|
-
@backoff_async(
|
|
158
|
+
@backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
|
|
159
159
|
async def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
|
|
160
160
|
"""Embed one minibatch of strings asynchronously.
|
|
161
161
|
|
|
@@ -59,29 +59,65 @@ class PreparedTask:
|
|
|
59
59
|
|
|
60
60
|
@dataclass(frozen=True)
|
|
61
61
|
class ResponsesModelName:
|
|
62
|
+
"""Container for responses model name configuration.
|
|
63
|
+
|
|
64
|
+
Attributes:
|
|
65
|
+
value (str): The model name for OpenAI responses API.
|
|
66
|
+
"""
|
|
67
|
+
|
|
62
68
|
value: str
|
|
63
69
|
|
|
64
70
|
|
|
65
71
|
@dataclass(frozen=True)
|
|
66
72
|
class EmbeddingsModelName:
|
|
73
|
+
"""Container for embeddings model name configuration.
|
|
74
|
+
|
|
75
|
+
Attributes:
|
|
76
|
+
value (str): The model name for OpenAI embeddings API.
|
|
77
|
+
"""
|
|
78
|
+
|
|
67
79
|
value: str
|
|
68
80
|
|
|
69
81
|
|
|
70
82
|
@dataclass(frozen=True)
|
|
71
83
|
class OpenAIAPIKey:
|
|
84
|
+
"""Container for OpenAI API key configuration.
|
|
85
|
+
|
|
86
|
+
Attributes:
|
|
87
|
+
value (str): The API key for OpenAI services.
|
|
88
|
+
"""
|
|
89
|
+
|
|
72
90
|
value: str
|
|
73
91
|
|
|
74
92
|
|
|
75
93
|
@dataclass(frozen=True)
|
|
76
94
|
class AzureOpenAIAPIKey:
|
|
95
|
+
"""Container for Azure OpenAI API key configuration.
|
|
96
|
+
|
|
97
|
+
Attributes:
|
|
98
|
+
value (str): The API key for Azure OpenAI services.
|
|
99
|
+
"""
|
|
100
|
+
|
|
77
101
|
value: str
|
|
78
102
|
|
|
79
103
|
|
|
80
104
|
@dataclass(frozen=True)
|
|
81
|
-
class
|
|
105
|
+
class AzureOpenAIBaseURL:
|
|
106
|
+
"""Container for Azure OpenAI base URL configuration.
|
|
107
|
+
|
|
108
|
+
Attributes:
|
|
109
|
+
value (str): The base URL for Azure OpenAI services.
|
|
110
|
+
"""
|
|
111
|
+
|
|
82
112
|
value: str
|
|
83
113
|
|
|
84
114
|
|
|
85
115
|
@dataclass(frozen=True)
|
|
86
116
|
class AzureOpenAIAPIVersion:
|
|
117
|
+
"""Container for Azure OpenAI API version configuration.
|
|
118
|
+
|
|
119
|
+
Attributes:
|
|
120
|
+
value (str): The API version for Azure OpenAI services.
|
|
121
|
+
"""
|
|
122
|
+
|
|
87
123
|
value: str
|
|
@@ -7,7 +7,7 @@ from openaivec import pandas_ext
|
|
|
7
7
|
|
|
8
8
|
# Option 1: Use environment variables (automatic detection)
|
|
9
9
|
# Set OPENAI_API_KEY or Azure OpenAI environment variables
|
|
10
|
-
# (AZURE_OPENAI_API_KEY,
|
|
10
|
+
# (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
|
|
11
11
|
# No explicit setup needed - clients are automatically created
|
|
12
12
|
|
|
13
13
|
# Option 2: Use an existing OpenAI client instance
|
|
@@ -17,14 +17,18 @@ pandas_ext.use(client)
|
|
|
17
17
|
# Option 3: Use an existing Azure OpenAI client instance
|
|
18
18
|
azure_client = AzureOpenAI(
|
|
19
19
|
api_key="your-azure-key",
|
|
20
|
-
|
|
21
|
-
api_version="
|
|
20
|
+
base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
|
|
21
|
+
api_version="preview"
|
|
22
22
|
)
|
|
23
23
|
pandas_ext.use(azure_client)
|
|
24
24
|
|
|
25
|
-
# Option 4: Use async
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
# Option 4: Use async Azure OpenAI client instance
|
|
26
|
+
async_azure_client = AsyncAzureOpenAI(
|
|
27
|
+
api_key="your-azure-key",
|
|
28
|
+
base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
|
|
29
|
+
api_version="preview"
|
|
30
|
+
)
|
|
31
|
+
pandas_ext.use_async(async_azure_client)
|
|
28
32
|
|
|
29
33
|
# Set up model names (optional, defaults shown)
|
|
30
34
|
pandas_ext.responses_model("gpt-4.1-mini")
|
|
@@ -48,7 +52,7 @@ from pydantic import BaseModel
|
|
|
48
52
|
|
|
49
53
|
from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
50
54
|
from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
|
|
51
|
-
from .provider import CONTAINER
|
|
55
|
+
from .provider import CONTAINER, _check_azure_v1_api_url
|
|
52
56
|
from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
53
57
|
from .responses import AsyncBatchResponses, BatchResponses
|
|
54
58
|
from .task.table import FillNaResponse, fillna
|
|
@@ -74,6 +78,10 @@ def use(client: OpenAI) -> None:
|
|
|
74
78
|
`openai.AzureOpenAI` instance.
|
|
75
79
|
The same instance is reused by every helper in this module.
|
|
76
80
|
"""
|
|
81
|
+
# Check Azure v1 API URL if using AzureOpenAI client
|
|
82
|
+
if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
|
|
83
|
+
_check_azure_v1_api_url(str(client.base_url))
|
|
84
|
+
|
|
77
85
|
CONTAINER.register(OpenAI, lambda: client)
|
|
78
86
|
|
|
79
87
|
|
|
@@ -85,6 +93,10 @@ def use_async(client: AsyncOpenAI) -> None:
|
|
|
85
93
|
`openai.AsyncAzureOpenAI` instance.
|
|
86
94
|
The same instance is reused by every helper in this module.
|
|
87
95
|
"""
|
|
96
|
+
# Check Azure v1 API URL if using AsyncAzureOpenAI client
|
|
97
|
+
if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
|
|
98
|
+
_check_azure_v1_api_url(str(client.base_url))
|
|
99
|
+
|
|
88
100
|
CONTAINER.register(AsyncOpenAI, lambda: client)
|
|
89
101
|
|
|
90
102
|
|
|
@@ -92,7 +104,7 @@ def responses_model(name: str) -> None:
|
|
|
92
104
|
"""Override the model used for text responses.
|
|
93
105
|
|
|
94
106
|
Args:
|
|
95
|
-
name (str):
|
|
107
|
+
name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
|
|
96
108
|
(for example, ``gpt-4.1-mini``).
|
|
97
109
|
"""
|
|
98
110
|
CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
|
|
@@ -102,7 +114,8 @@ def embeddings_model(name: str) -> None:
|
|
|
102
114
|
"""Override the model used for text embeddings.
|
|
103
115
|
|
|
104
116
|
Args:
|
|
105
|
-
name (str):
|
|
117
|
+
name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name,
|
|
118
|
+
e.g. ``text-embedding-3-small``.
|
|
106
119
|
"""
|
|
107
120
|
CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
|
|
108
121
|
|
|
@@ -143,7 +156,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
143
156
|
instructions: str,
|
|
144
157
|
cache: BatchingMapProxy[str, ResponseFormat],
|
|
145
158
|
response_format: Type[ResponseFormat] = str,
|
|
146
|
-
temperature: float = 0.0,
|
|
159
|
+
temperature: float | None = 0.0,
|
|
147
160
|
top_p: float = 1.0,
|
|
148
161
|
) -> pd.Series:
|
|
149
162
|
client: BatchResponses = BatchResponses(
|
|
@@ -205,7 +218,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
205
218
|
instructions: str,
|
|
206
219
|
response_format: Type[ResponseFormat] = str,
|
|
207
220
|
batch_size: int = 128,
|
|
208
|
-
temperature: float = 0.0,
|
|
221
|
+
temperature: float | None = 0.0,
|
|
209
222
|
top_p: float = 1.0,
|
|
210
223
|
) -> pd.Series:
|
|
211
224
|
"""Call an LLM once for every Series element.
|
|
@@ -438,7 +451,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
438
451
|
instructions: str,
|
|
439
452
|
cache: BatchingMapProxy[str, ResponseFormat],
|
|
440
453
|
response_format: Type[ResponseFormat] = str,
|
|
441
|
-
temperature: float = 0.0,
|
|
454
|
+
temperature: float | None = 0.0,
|
|
442
455
|
top_p: float = 1.0,
|
|
443
456
|
) -> pd.Series:
|
|
444
457
|
"""Generate a response for each row after serialising it to JSON using a provided cache.
|
|
@@ -496,7 +509,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
496
509
|
instructions: str,
|
|
497
510
|
response_format: Type[ResponseFormat] = str,
|
|
498
511
|
batch_size: int = 128,
|
|
499
|
-
temperature: float = 0.0,
|
|
512
|
+
temperature: float | None = 0.0,
|
|
500
513
|
top_p: float = 1.0,
|
|
501
514
|
) -> pd.Series:
|
|
502
515
|
"""Generate a response for each row after serialising it to JSON.
|
|
@@ -681,7 +694,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
681
694
|
instructions: str,
|
|
682
695
|
cache: AsyncBatchingMapProxy[str, ResponseFormat],
|
|
683
696
|
response_format: Type[ResponseFormat] = str,
|
|
684
|
-
temperature: float = 0.0,
|
|
697
|
+
temperature: float | None = 0.0,
|
|
685
698
|
top_p: float = 1.0,
|
|
686
699
|
) -> pd.Series:
|
|
687
700
|
"""Call an LLM once for every Series element using a provided cache (asynchronously).
|
|
@@ -848,7 +861,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
848
861
|
instructions: str,
|
|
849
862
|
response_format: Type[ResponseFormat] = str,
|
|
850
863
|
batch_size: int = 128,
|
|
851
|
-
temperature: float = 0.0,
|
|
864
|
+
temperature: float | None = 0.0,
|
|
852
865
|
top_p: float = 1.0,
|
|
853
866
|
max_concurrency: int = 8,
|
|
854
867
|
) -> pd.Series:
|
|
@@ -975,7 +988,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
975
988
|
instructions: str,
|
|
976
989
|
cache: AsyncBatchingMapProxy[str, ResponseFormat],
|
|
977
990
|
response_format: Type[ResponseFormat] = str,
|
|
978
|
-
temperature: float = 0.0,
|
|
991
|
+
temperature: float | None = 0.0,
|
|
979
992
|
top_p: float = 1.0,
|
|
980
993
|
) -> pd.Series:
|
|
981
994
|
"""Generate a response for each row after serialising it to JSON using a provided cache (asynchronously).
|
|
@@ -1040,7 +1053,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1040
1053
|
instructions: str,
|
|
1041
1054
|
response_format: Type[ResponseFormat] = str,
|
|
1042
1055
|
batch_size: int = 128,
|
|
1043
|
-
temperature: float = 0.0,
|
|
1056
|
+
temperature: float | None = 0.0,
|
|
1044
1057
|
top_p: float = 1.0,
|
|
1045
1058
|
max_concurrency: int = 8,
|
|
1046
1059
|
) -> pd.Series:
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import warnings
|
|
3
|
+
|
|
4
|
+
import tiktoken
|
|
5
|
+
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
|
|
6
|
+
|
|
7
|
+
from . import di
|
|
8
|
+
from .model import (
|
|
9
|
+
AzureOpenAIAPIKey,
|
|
10
|
+
AzureOpenAIAPIVersion,
|
|
11
|
+
AzureOpenAIBaseURL,
|
|
12
|
+
EmbeddingsModelName,
|
|
13
|
+
OpenAIAPIKey,
|
|
14
|
+
ResponsesModelName,
|
|
15
|
+
)
|
|
16
|
+
from .util import TextChunker
|
|
17
|
+
|
|
18
|
+
CONTAINER = di.Container()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _check_azure_v1_api_url(base_url: str) -> None:
|
|
22
|
+
"""Check if Azure OpenAI base URL uses the recommended v1 API format.
|
|
23
|
+
|
|
24
|
+
Issues a warning if the URL doesn't end with '/openai/v1/' to encourage
|
|
25
|
+
migration to the v1 API format as recommended by Microsoft.
|
|
26
|
+
|
|
27
|
+
Reference: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
base_url (str): The Azure OpenAI base URL to check.
|
|
31
|
+
"""
|
|
32
|
+
if base_url and not base_url.rstrip("/").endswith("/openai/v1"):
|
|
33
|
+
warnings.warn(
|
|
34
|
+
"⚠️ Azure OpenAI v1 API is recommended. Your base URL should end with '/openai/v1/'. "
|
|
35
|
+
f"Current URL: '{base_url}'. "
|
|
36
|
+
"Consider updating to: 'https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/' "
|
|
37
|
+
"for better performance and future compatibility. "
|
|
38
|
+
"See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle",
|
|
39
|
+
UserWarning,
|
|
40
|
+
stacklevel=3,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def provide_openai_client() -> OpenAI:
|
|
45
|
+
"""Provide OpenAI client based on environment variables.
|
|
46
|
+
|
|
47
|
+
Automatically detects and prioritizes OpenAI over Azure OpenAI configuration.
|
|
48
|
+
Checks the following environment variables in order:
|
|
49
|
+
1. OPENAI_API_KEY - if set, creates standard OpenAI client
|
|
50
|
+
2. Azure OpenAI variables (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL,
|
|
51
|
+
AZURE_OPENAI_API_VERSION) - if all set, creates Azure OpenAI client
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
OpenAI: Configured OpenAI or AzureOpenAI client instance.
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ValueError: If no valid environment variables are found for either service.
|
|
58
|
+
"""
|
|
59
|
+
openai_api_key = CONTAINER.resolve(OpenAIAPIKey)
|
|
60
|
+
if openai_api_key.value:
|
|
61
|
+
return OpenAI()
|
|
62
|
+
|
|
63
|
+
azure_api_key = CONTAINER.resolve(AzureOpenAIAPIKey)
|
|
64
|
+
azure_base_url = CONTAINER.resolve(AzureOpenAIBaseURL)
|
|
65
|
+
azure_api_version = CONTAINER.resolve(AzureOpenAIAPIVersion)
|
|
66
|
+
|
|
67
|
+
if all(param.value for param in [azure_api_key, azure_base_url, azure_api_version]):
|
|
68
|
+
_check_azure_v1_api_url(azure_base_url.value)
|
|
69
|
+
return AzureOpenAI(
|
|
70
|
+
api_key=azure_api_key.value,
|
|
71
|
+
base_url=azure_base_url.value,
|
|
72
|
+
api_version=azure_api_version.value,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
raise ValueError(
|
|
76
|
+
"No valid OpenAI or Azure OpenAI environment variables found. "
|
|
77
|
+
"Please set either OPENAI_API_KEY or AZURE_OPENAI_API_KEY, "
|
|
78
|
+
"AZURE_OPENAI_BASE_URL, and AZURE_OPENAI_API_VERSION."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def provide_async_openai_client() -> AsyncOpenAI:
|
|
83
|
+
"""Provide asynchronous OpenAI client based on environment variables.
|
|
84
|
+
|
|
85
|
+
Automatically detects and prioritizes OpenAI over Azure OpenAI configuration.
|
|
86
|
+
Checks the following environment variables in order:
|
|
87
|
+
1. OPENAI_API_KEY - if set, creates standard AsyncOpenAI client
|
|
88
|
+
2. Azure OpenAI variables (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL,
|
|
89
|
+
AZURE_OPENAI_API_VERSION) - if all set, creates AsyncAzureOpenAI client
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
AsyncOpenAI: Configured AsyncOpenAI or AsyncAzureOpenAI client instance.
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
ValueError: If no valid environment variables are found for either service.
|
|
96
|
+
"""
|
|
97
|
+
openai_api_key = CONTAINER.resolve(OpenAIAPIKey)
|
|
98
|
+
if openai_api_key.value:
|
|
99
|
+
return AsyncOpenAI()
|
|
100
|
+
|
|
101
|
+
azure_api_key = CONTAINER.resolve(AzureOpenAIAPIKey)
|
|
102
|
+
azure_base_url = CONTAINER.resolve(AzureOpenAIBaseURL)
|
|
103
|
+
azure_api_version = CONTAINER.resolve(AzureOpenAIAPIVersion)
|
|
104
|
+
|
|
105
|
+
if all(param.value for param in [azure_api_key, azure_base_url, azure_api_version]):
|
|
106
|
+
_check_azure_v1_api_url(azure_base_url.value)
|
|
107
|
+
return AsyncAzureOpenAI(
|
|
108
|
+
api_key=azure_api_key.value,
|
|
109
|
+
base_url=azure_base_url.value,
|
|
110
|
+
api_version=azure_api_version.value,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
raise ValueError(
|
|
114
|
+
"No valid OpenAI or Azure OpenAI environment variables found. "
|
|
115
|
+
"Please set either OPENAI_API_KEY or AZURE_OPENAI_API_KEY, "
|
|
116
|
+
"AZURE_OPENAI_BASE_URL, and AZURE_OPENAI_API_VERSION."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName("gpt-4.1-mini"))
|
|
121
|
+
CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName("text-embedding-3-small"))
|
|
122
|
+
CONTAINER.register(OpenAIAPIKey, lambda: OpenAIAPIKey(os.getenv("OPENAI_API_KEY")))
|
|
123
|
+
CONTAINER.register(AzureOpenAIAPIKey, lambda: AzureOpenAIAPIKey(os.getenv("AZURE_OPENAI_API_KEY")))
|
|
124
|
+
CONTAINER.register(AzureOpenAIBaseURL, lambda: AzureOpenAIBaseURL(os.getenv("AZURE_OPENAI_BASE_URL")))
|
|
125
|
+
CONTAINER.register(
|
|
126
|
+
cls=AzureOpenAIAPIVersion,
|
|
127
|
+
provider=lambda: AzureOpenAIAPIVersion(os.getenv("AZURE_OPENAI_API_VERSION", "preview")),
|
|
128
|
+
)
|
|
129
|
+
CONTAINER.register(OpenAI, provide_openai_client)
|
|
130
|
+
CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
|
|
131
|
+
CONTAINER.register(tiktoken.Encoding, lambda: tiktoken.get_encoding("o200k_base"))
|
|
132
|
+
CONTAINER.register(TextChunker, lambda: TextChunker(CONTAINER.resolve(tiktoken.Encoding)))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def reset_environment_registrations():
|
|
136
|
+
"""Reset environment variable related registrations in the container.
|
|
137
|
+
|
|
138
|
+
This function re-registers environment variable dependent services to pick up
|
|
139
|
+
current environment variable values. Useful for testing when environment
|
|
140
|
+
variables are changed after initial container setup.
|
|
141
|
+
"""
|
|
142
|
+
CONTAINER.register(OpenAIAPIKey, lambda: OpenAIAPIKey(os.getenv("OPENAI_API_KEY")))
|
|
143
|
+
CONTAINER.register(AzureOpenAIAPIKey, lambda: AzureOpenAIAPIKey(os.getenv("AZURE_OPENAI_API_KEY")))
|
|
144
|
+
CONTAINER.register(AzureOpenAIBaseURL, lambda: AzureOpenAIBaseURL(os.getenv("AZURE_OPENAI_BASE_URL")))
|
|
145
|
+
CONTAINER.register(
|
|
146
|
+
cls=AzureOpenAIAPIVersion,
|
|
147
|
+
provider=lambda: AzureOpenAIAPIVersion(os.getenv("AZURE_OPENAI_API_VERSION", "preview")),
|
|
148
|
+
)
|
|
149
|
+
CONTAINER.register(OpenAI, provide_openai_client)
|
|
150
|
+
CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
|