openaivec 0.13.0__tar.gz → 0.13.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {openaivec-0.13.0 → openaivec-0.13.2}/PKG-INFO +39 -7
  2. {openaivec-0.13.0 → openaivec-0.13.2}/README.md +38 -6
  3. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/embeddings.py +8 -8
  4. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/model.py +37 -1
  5. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/pandas_ext.py +30 -17
  6. openaivec-0.13.2/src/openaivec/provider.py +150 -0
  7. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/responses.py +79 -31
  8. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/spark.py +15 -15
  9. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/util.py +18 -12
  10. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_provider.py +77 -22
  11. openaivec-0.13.2/tests/test_util.py +297 -0
  12. openaivec-0.13.0/src/openaivec/provider.py +0 -98
  13. openaivec-0.13.0/tests/test_util.py +0 -41
  14. {openaivec-0.13.0 → openaivec-0.13.2}/.env.example +0 -0
  15. {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-mkdocs.yml +0 -0
  16. {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-package.yml +0 -0
  17. {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-test.yml +0 -0
  18. {openaivec-0.13.0 → openaivec-0.13.2}/.github/workflows/python-update.yml +0 -0
  19. {openaivec-0.13.0 → openaivec-0.13.2}/.gitignore +0 -0
  20. {openaivec-0.13.0 → openaivec-0.13.2}/CODE_OF_CONDUCT.md +0 -0
  21. {openaivec-0.13.0 → openaivec-0.13.2}/LICENSE +0 -0
  22. {openaivec-0.13.0 → openaivec-0.13.2}/SECURITY.md +0 -0
  23. {openaivec-0.13.0 → openaivec-0.13.2}/SUPPORT.md +0 -0
  24. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/di.md +0 -0
  25. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/embeddings.md +0 -0
  26. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/pandas_ext.md +0 -0
  27. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/prompt.md +0 -0
  28. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/proxy.md +0 -0
  29. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/responses.md +0 -0
  30. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/spark.md +0 -0
  31. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/task.md +0 -0
  32. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  33. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  34. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  35. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  36. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  37. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  38. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  39. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  40. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  41. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  42. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  43. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/tasks/nlp/translation.md +0 -0
  44. {openaivec-0.13.0 → openaivec-0.13.2}/docs/api/util.md +0 -0
  45. {openaivec-0.13.0 → openaivec-0.13.2}/docs/index.md +0 -0
  46. {openaivec-0.13.0 → openaivec-0.13.2}/docs/robots.txt +0 -0
  47. {openaivec-0.13.0 → openaivec-0.13.2}/mkdocs.yml +0 -0
  48. {openaivec-0.13.0 → openaivec-0.13.2}/pyproject.toml +0 -0
  49. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/__init__.py +0 -0
  50. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/di.py +0 -0
  51. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/log.py +0 -0
  52. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/prompt.py +0 -0
  53. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/proxy.py +0 -0
  54. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/serialize.py +0 -0
  55. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/__init__.py +0 -0
  56. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/__init__.py +0 -0
  57. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  58. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  59. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  60. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  61. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  62. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  63. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/__init__.py +0 -0
  64. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  65. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  66. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  67. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  68. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  69. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/nlp/translation.py +0 -0
  70. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/table/__init__.py +0 -0
  71. {openaivec-0.13.0 → openaivec-0.13.2}/src/openaivec/task/table/fillna.py +0 -0
  72. {openaivec-0.13.0 → openaivec-0.13.2}/tests/__init__.py +0 -0
  73. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_di.py +0 -0
  74. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_embeddings.py +0 -0
  75. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_pandas_ext.py +0 -0
  76. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_prompt.py +0 -0
  77. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_proxy.py +0 -0
  78. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_responses.py +0 -0
  79. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_serialize.py +0 -0
  80. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_spark.py +0 -0
  81. {openaivec-0.13.0 → openaivec-0.13.2}/tests/test_task.py +0 -0
  82. {openaivec-0.13.0 → openaivec-0.13.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.13.0
3
+ Version: 0.13.2
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -180,8 +180,8 @@ from openaivec import pandas_ext
180
180
  os.environ["OPENAI_API_KEY"] = "your-api-key-here"
181
181
  # Or for Azure OpenAI:
182
182
  # os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-key"
183
- # os.environ["AZURE_OPENAI_API_ENDPOINT"] = "https://<your-resource-name>.services.ai.azure.com"
184
- # os.environ["AZURE_OPENAI_API_VERSION"] = "2025-04-01-preview"
183
+ # os.environ["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
184
+ # os.environ["AZURE_OPENAI_API_VERSION"] = "preview"
185
185
 
186
186
  # Authentication Option 2: Custom client (optional)
187
187
  # from openai import OpenAI, AsyncOpenAI
@@ -190,6 +190,7 @@ os.environ["OPENAI_API_KEY"] = "your-api-key-here"
190
190
  # pandas_ext.use_async(AsyncOpenAI())
191
191
 
192
192
  # Configure model (optional - defaults to gpt-4.1-mini)
193
+ # For Azure OpenAI: use your deployment name, for OpenAI: use model name
193
194
  pandas_ext.responses_model("gpt-4.1-mini")
194
195
 
195
196
  # Create your data
@@ -211,6 +212,27 @@ result = df.assign(
211
212
 
212
213
  📓 **[Interactive pandas examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
213
214
 
215
+ ### Using with Reasoning Models
216
+
217
+ When using reasoning models (o1-preview, o1-mini, o3-mini, etc.), you must set `temperature=None` to avoid API errors:
218
+
219
+ ```python
220
+ # For reasoning models like o1-preview, o1-mini, o3-mini
221
+ pandas_ext.responses_model("o1-mini") # Set your reasoning model
222
+
223
+ # MUST use temperature=None with reasoning models
224
+ result = df.assign(
225
+ analysis=lambda df: df.text.ai.responses(
226
+ "Analyze this text step by step",
227
+ temperature=None # Required for reasoning models
228
+ )
229
+ )
230
+ ```
231
+
232
+ **Why this is needed**: Reasoning models don't support temperature parameters and will return an error if temperature is specified. The library automatically detects these errors and provides guidance on how to fix them.
233
+
234
+ **Reference**: [Azure OpenAI Reasoning Models](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning)
235
+
214
236
  ### Using Pre-configured Tasks
215
237
 
216
238
  For common text processing operations, openaivec provides ready-to-use tasks that eliminate the need to write custom prompts:
@@ -322,7 +344,7 @@ sc.environment["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
322
344
 
323
345
  # Option 2: Using Azure OpenAI
324
346
  # sc.environment["AZURE_OPENAI_API_KEY"] = os.environ.get("AZURE_OPENAI_API_KEY")
325
- # sc.environment["AZURE_OPENAI_API_ENDPOINT"] = os.environ.get("AZURE_OPENAI_API_ENDPOINT")
347
+ # sc.environment["AZURE_OPENAI_BASE_URL"] = os.environ.get("AZURE_OPENAI_BASE_URL")
326
348
  # sc.environment["AZURE_OPENAI_API_VERSION"] = os.environ.get("AZURE_OPENAI_API_VERSION")
327
349
  ```
328
350
 
@@ -380,6 +402,16 @@ spark.udf.register(
380
402
  )
381
403
  )
382
404
 
405
+ # --- Register UDF for Reasoning Models ---
406
+ # For reasoning models (o1-preview, o1-mini, o3, etc.), set temperature=None
407
+ spark.udf.register(
408
+ "reasoning_analysis",
409
+ responses_udf(
410
+ instructions="Analyze this step by step with detailed reasoning",
411
+ temperature=None # Required for reasoning models
412
+ )
413
+ )
414
+
383
415
  ```
384
416
 
385
417
  You can now use these UDFs in Spark SQL:
@@ -666,15 +698,15 @@ steps:
666
698
 
667
699
  # Configure Azure OpenAI authentication
668
700
  sc.environment["AZURE_OPENAI_API_KEY"] = "<your-api-key>"
669
- sc.environment["AZURE_OPENAI_API_ENDPOINT"] = "https://<your-resource-name>.services.ai.azure.com"
670
- sc.environment["AZURE_OPENAI_API_VERSION"] = "2025-04-01-preview"
701
+ sc.environment["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
702
+ sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
671
703
 
672
704
  # Register UDFs
673
705
  spark.udf.register(
674
706
  "analyze_text",
675
707
  responses_udf(
676
708
  instructions="Analyze the sentiment of the text",
677
- model_name="<your-deployment-name>"
709
+ model_name="gpt-4.1-mini" # Use your Azure deployment name here
678
710
  )
679
711
  )
680
712
  ```
@@ -156,8 +156,8 @@ from openaivec import pandas_ext
156
156
  os.environ["OPENAI_API_KEY"] = "your-api-key-here"
157
157
  # Or for Azure OpenAI:
158
158
  # os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-key"
159
- # os.environ["AZURE_OPENAI_API_ENDPOINT"] = "https://<your-resource-name>.services.ai.azure.com"
160
- # os.environ["AZURE_OPENAI_API_VERSION"] = "2025-04-01-preview"
159
+ # os.environ["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
160
+ # os.environ["AZURE_OPENAI_API_VERSION"] = "preview"
161
161
 
162
162
  # Authentication Option 2: Custom client (optional)
163
163
  # from openai import OpenAI, AsyncOpenAI
@@ -166,6 +166,7 @@ os.environ["OPENAI_API_KEY"] = "your-api-key-here"
166
166
  # pandas_ext.use_async(AsyncOpenAI())
167
167
 
168
168
  # Configure model (optional - defaults to gpt-4.1-mini)
169
+ # For Azure OpenAI: use your deployment name, for OpenAI: use model name
169
170
  pandas_ext.responses_model("gpt-4.1-mini")
170
171
 
171
172
  # Create your data
@@ -187,6 +188,27 @@ result = df.assign(
187
188
 
188
189
  📓 **[Interactive pandas examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
189
190
 
191
+ ### Using with Reasoning Models
192
+
193
+ When using reasoning models (o1-preview, o1-mini, o3-mini, etc.), you must set `temperature=None` to avoid API errors:
194
+
195
+ ```python
196
+ # For reasoning models like o1-preview, o1-mini, o3-mini
197
+ pandas_ext.responses_model("o1-mini") # Set your reasoning model
198
+
199
+ # MUST use temperature=None with reasoning models
200
+ result = df.assign(
201
+ analysis=lambda df: df.text.ai.responses(
202
+ "Analyze this text step by step",
203
+ temperature=None # Required for reasoning models
204
+ )
205
+ )
206
+ ```
207
+
208
+ **Why this is needed**: Reasoning models don't support temperature parameters and will return an error if temperature is specified. The library automatically detects these errors and provides guidance on how to fix them.
209
+
210
+ **Reference**: [Azure OpenAI Reasoning Models](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/reasoning)
211
+
190
212
  ### Using Pre-configured Tasks
191
213
 
192
214
  For common text processing operations, openaivec provides ready-to-use tasks that eliminate the need to write custom prompts:
@@ -298,7 +320,7 @@ sc.environment["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
298
320
 
299
321
  # Option 2: Using Azure OpenAI
300
322
  # sc.environment["AZURE_OPENAI_API_KEY"] = os.environ.get("AZURE_OPENAI_API_KEY")
301
- # sc.environment["AZURE_OPENAI_API_ENDPOINT"] = os.environ.get("AZURE_OPENAI_API_ENDPOINT")
323
+ # sc.environment["AZURE_OPENAI_BASE_URL"] = os.environ.get("AZURE_OPENAI_BASE_URL")
302
324
  # sc.environment["AZURE_OPENAI_API_VERSION"] = os.environ.get("AZURE_OPENAI_API_VERSION")
303
325
  ```
304
326
 
@@ -356,6 +378,16 @@ spark.udf.register(
356
378
  )
357
379
  )
358
380
 
381
+ # --- Register UDF for Reasoning Models ---
382
+ # For reasoning models (o1-preview, o1-mini, o3, etc.), set temperature=None
383
+ spark.udf.register(
384
+ "reasoning_analysis",
385
+ responses_udf(
386
+ instructions="Analyze this step by step with detailed reasoning",
387
+ temperature=None # Required for reasoning models
388
+ )
389
+ )
390
+
359
391
  ```
360
392
 
361
393
  You can now use these UDFs in Spark SQL:
@@ -642,15 +674,15 @@ steps:
642
674
 
643
675
  # Configure Azure OpenAI authentication
644
676
  sc.environment["AZURE_OPENAI_API_KEY"] = "<your-api-key>"
645
- sc.environment["AZURE_OPENAI_API_ENDPOINT"] = "https://<your-resource-name>.services.ai.azure.com"
646
- sc.environment["AZURE_OPENAI_API_VERSION"] = "2025-04-01-preview"
677
+ sc.environment["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
678
+ sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
647
679
 
648
680
  # Register UDFs
649
681
  spark.udf.register(
650
682
  "analyze_text",
651
683
  responses_udf(
652
684
  instructions="Analyze the sentiment of the text",
653
- model_name="<your-deployment-name>"
685
+ model_name="gpt-4.1-mini" # Use your Azure deployment name here
654
686
  )
655
687
  )
656
688
  ```
@@ -4,7 +4,7 @@ from typing import List
4
4
 
5
5
  import numpy as np
6
6
  from numpy.typing import NDArray
7
- from openai import AsyncOpenAI, OpenAI, RateLimitError
7
+ from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
8
8
 
9
9
  from .log import observe
10
10
  from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
@@ -24,7 +24,7 @@ class BatchEmbeddings:
24
24
 
25
25
  Attributes:
26
26
  client (OpenAI): Configured OpenAI client.
27
- model_name (str): Model identifier (e.g., ``"text-embedding-3-small"``).
27
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name (e.g., ``"text-embedding-3-small"``).
28
28
  cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
29
29
  """
30
30
 
@@ -38,7 +38,7 @@ class BatchEmbeddings:
38
38
 
39
39
  Args:
40
40
  client (OpenAI): OpenAI client.
41
- model_name (str): Embeddings model name.
41
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
42
42
  batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
43
43
 
44
44
  Returns:
@@ -47,7 +47,7 @@ class BatchEmbeddings:
47
47
  return cls(client=client, model_name=model_name, cache=BatchingMapProxy(batch_size=batch_size))
48
48
 
49
49
  @observe(_LOGGER)
50
- @backoff(exception=RateLimitError, scale=15, max_retries=8)
50
+ @backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
51
51
  def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
52
52
  """Embed one minibatch of strings.
53
53
 
@@ -90,7 +90,7 @@ class AsyncBatchEmbeddings:
90
90
  import asyncio
91
91
  import numpy as np
92
92
  from openai import AsyncOpenAI
93
- from openaivec import AsyncBatchEmbeddings
93
+ from openaivec import AsyncBatchEmbeddings
94
94
 
95
95
  # Assuming openai_async_client is an initialized AsyncOpenAI client
96
96
  openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
@@ -119,7 +119,7 @@ class AsyncBatchEmbeddings:
119
119
 
120
120
  Attributes:
121
121
  client (AsyncOpenAI): Configured OpenAI async client.
122
- model_name (str): Embeddings model name.
122
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
123
123
  cache (AsyncBatchingMapProxy[str, NDArray[np.float32]]): Async batching proxy.
124
124
  """
125
125
 
@@ -141,7 +141,7 @@ class AsyncBatchEmbeddings:
141
141
 
142
142
  Args:
143
143
  client (AsyncOpenAI): OpenAI async client.
144
- model_name (str): Embeddings model name.
144
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
145
145
  batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
146
146
  max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
147
147
 
@@ -155,7 +155,7 @@ class AsyncBatchEmbeddings:
155
155
  )
156
156
 
157
157
  @observe(_LOGGER)
158
- @backoff_async(exception=RateLimitError, scale=15, max_retries=8)
158
+ @backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
159
159
  async def _embed_chunk(self, inputs: List[str]) -> List[NDArray[np.float32]]:
160
160
  """Embed one minibatch of strings asynchronously.
161
161
 
@@ -59,29 +59,65 @@ class PreparedTask:
59
59
 
60
60
  @dataclass(frozen=True)
61
61
  class ResponsesModelName:
62
+ """Container for responses model name configuration.
63
+
64
+ Attributes:
65
+ value (str): The model name for OpenAI responses API.
66
+ """
67
+
62
68
  value: str
63
69
 
64
70
 
65
71
  @dataclass(frozen=True)
66
72
  class EmbeddingsModelName:
73
+ """Container for embeddings model name configuration.
74
+
75
+ Attributes:
76
+ value (str): The model name for OpenAI embeddings API.
77
+ """
78
+
67
79
  value: str
68
80
 
69
81
 
70
82
  @dataclass(frozen=True)
71
83
  class OpenAIAPIKey:
84
+ """Container for OpenAI API key configuration.
85
+
86
+ Attributes:
87
+ value (str): The API key for OpenAI services.
88
+ """
89
+
72
90
  value: str
73
91
 
74
92
 
75
93
  @dataclass(frozen=True)
76
94
  class AzureOpenAIAPIKey:
95
+ """Container for Azure OpenAI API key configuration.
96
+
97
+ Attributes:
98
+ value (str): The API key for Azure OpenAI services.
99
+ """
100
+
77
101
  value: str
78
102
 
79
103
 
80
104
  @dataclass(frozen=True)
81
- class AzureOpenAIEndpoint:
105
+ class AzureOpenAIBaseURL:
106
+ """Container for Azure OpenAI base URL configuration.
107
+
108
+ Attributes:
109
+ value (str): The base URL for Azure OpenAI services.
110
+ """
111
+
82
112
  value: str
83
113
 
84
114
 
85
115
  @dataclass(frozen=True)
86
116
  class AzureOpenAIAPIVersion:
117
+ """Container for Azure OpenAI API version configuration.
118
+
119
+ Attributes:
120
+ value (str): The API version for Azure OpenAI services.
121
+ """
122
+
87
123
  value: str
@@ -7,7 +7,7 @@ from openaivec import pandas_ext
7
7
 
8
8
  # Option 1: Use environment variables (automatic detection)
9
9
  # Set OPENAI_API_KEY or Azure OpenAI environment variables
10
- # (AZURE_OPENAI_API_KEY, AZURE_OPENAI_API_ENDPOINT, AZURE_OPENAI_API_VERSION)
10
+ # (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
11
11
  # No explicit setup needed - clients are automatically created
12
12
 
13
13
  # Option 2: Use an existing OpenAI client instance
@@ -17,14 +17,18 @@ pandas_ext.use(client)
17
17
  # Option 3: Use an existing Azure OpenAI client instance
18
18
  azure_client = AzureOpenAI(
19
19
  api_key="your-azure-key",
20
- azure_endpoint="https://<your-resource-name>.services.ai.azure.com",
21
- api_version="2025-04-01-preview"
20
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
21
+ api_version="preview"
22
22
  )
23
23
  pandas_ext.use(azure_client)
24
24
 
25
- # Option 4: Use async clients
26
- async_client = AsyncOpenAI(api_key="your-api-key")
27
- pandas_ext.use_async(async_client)
25
+ # Option 4: Use async Azure OpenAI client instance
26
+ async_azure_client = AsyncAzureOpenAI(
27
+ api_key="your-azure-key",
28
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
29
+ api_version="preview"
30
+ )
31
+ pandas_ext.use_async(async_azure_client)
28
32
 
29
33
  # Set up model names (optional, defaults shown)
30
34
  pandas_ext.responses_model("gpt-4.1-mini")
@@ -48,7 +52,7 @@ from pydantic import BaseModel
48
52
 
49
53
  from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
50
54
  from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
51
- from .provider import CONTAINER
55
+ from .provider import CONTAINER, _check_azure_v1_api_url
52
56
  from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
53
57
  from .responses import AsyncBatchResponses, BatchResponses
54
58
  from .task.table import FillNaResponse, fillna
@@ -74,6 +78,10 @@ def use(client: OpenAI) -> None:
74
78
  `openai.AzureOpenAI` instance.
75
79
  The same instance is reused by every helper in this module.
76
80
  """
81
+ # Check Azure v1 API URL if using AzureOpenAI client
82
+ if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
83
+ _check_azure_v1_api_url(str(client.base_url))
84
+
77
85
  CONTAINER.register(OpenAI, lambda: client)
78
86
 
79
87
 
@@ -85,6 +93,10 @@ def use_async(client: AsyncOpenAI) -> None:
85
93
  `openai.AsyncAzureOpenAI` instance.
86
94
  The same instance is reused by every helper in this module.
87
95
  """
96
+ # Check Azure v1 API URL if using AsyncAzureOpenAI client
97
+ if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
98
+ _check_azure_v1_api_url(str(client.base_url))
99
+
88
100
  CONTAINER.register(AsyncOpenAI, lambda: client)
89
101
 
90
102
 
@@ -92,7 +104,7 @@ def responses_model(name: str) -> None:
92
104
  """Override the model used for text responses.
93
105
 
94
106
  Args:
95
- name (str): Model name as listed in the OpenAI API
107
+ name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
96
108
  (for example, ``gpt-4.1-mini``).
97
109
  """
98
110
  CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
@@ -102,7 +114,8 @@ def embeddings_model(name: str) -> None:
102
114
  """Override the model used for text embeddings.
103
115
 
104
116
  Args:
105
- name (str): Embedding model name, e.g. ``text-embedding-3-small``.
117
+ name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name,
118
+ e.g. ``text-embedding-3-small``.
106
119
  """
107
120
  CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
108
121
 
@@ -143,7 +156,7 @@ class OpenAIVecSeriesAccessor:
143
156
  instructions: str,
144
157
  cache: BatchingMapProxy[str, ResponseFormat],
145
158
  response_format: Type[ResponseFormat] = str,
146
- temperature: float = 0.0,
159
+ temperature: float | None = 0.0,
147
160
  top_p: float = 1.0,
148
161
  ) -> pd.Series:
149
162
  client: BatchResponses = BatchResponses(
@@ -205,7 +218,7 @@ class OpenAIVecSeriesAccessor:
205
218
  instructions: str,
206
219
  response_format: Type[ResponseFormat] = str,
207
220
  batch_size: int = 128,
208
- temperature: float = 0.0,
221
+ temperature: float | None = 0.0,
209
222
  top_p: float = 1.0,
210
223
  ) -> pd.Series:
211
224
  """Call an LLM once for every Series element.
@@ -438,7 +451,7 @@ class OpenAIVecDataFrameAccessor:
438
451
  instructions: str,
439
452
  cache: BatchingMapProxy[str, ResponseFormat],
440
453
  response_format: Type[ResponseFormat] = str,
441
- temperature: float = 0.0,
454
+ temperature: float | None = 0.0,
442
455
  top_p: float = 1.0,
443
456
  ) -> pd.Series:
444
457
  """Generate a response for each row after serialising it to JSON using a provided cache.
@@ -496,7 +509,7 @@ class OpenAIVecDataFrameAccessor:
496
509
  instructions: str,
497
510
  response_format: Type[ResponseFormat] = str,
498
511
  batch_size: int = 128,
499
- temperature: float = 0.0,
512
+ temperature: float | None = 0.0,
500
513
  top_p: float = 1.0,
501
514
  ) -> pd.Series:
502
515
  """Generate a response for each row after serialising it to JSON.
@@ -681,7 +694,7 @@ class AsyncOpenAIVecSeriesAccessor:
681
694
  instructions: str,
682
695
  cache: AsyncBatchingMapProxy[str, ResponseFormat],
683
696
  response_format: Type[ResponseFormat] = str,
684
- temperature: float = 0.0,
697
+ temperature: float | None = 0.0,
685
698
  top_p: float = 1.0,
686
699
  ) -> pd.Series:
687
700
  """Call an LLM once for every Series element using a provided cache (asynchronously).
@@ -848,7 +861,7 @@ class AsyncOpenAIVecSeriesAccessor:
848
861
  instructions: str,
849
862
  response_format: Type[ResponseFormat] = str,
850
863
  batch_size: int = 128,
851
- temperature: float = 0.0,
864
+ temperature: float | None = 0.0,
852
865
  top_p: float = 1.0,
853
866
  max_concurrency: int = 8,
854
867
  ) -> pd.Series:
@@ -975,7 +988,7 @@ class AsyncOpenAIVecDataFrameAccessor:
975
988
  instructions: str,
976
989
  cache: AsyncBatchingMapProxy[str, ResponseFormat],
977
990
  response_format: Type[ResponseFormat] = str,
978
- temperature: float = 0.0,
991
+ temperature: float | None = 0.0,
979
992
  top_p: float = 1.0,
980
993
  ) -> pd.Series:
981
994
  """Generate a response for each row after serialising it to JSON using a provided cache (asynchronously).
@@ -1040,7 +1053,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1040
1053
  instructions: str,
1041
1054
  response_format: Type[ResponseFormat] = str,
1042
1055
  batch_size: int = 128,
1043
- temperature: float = 0.0,
1056
+ temperature: float | None = 0.0,
1044
1057
  top_p: float = 1.0,
1045
1058
  max_concurrency: int = 8,
1046
1059
  ) -> pd.Series:
@@ -0,0 +1,150 @@
1
+ import os
2
+ import warnings
3
+
4
+ import tiktoken
5
+ from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
6
+
7
+ from . import di
8
+ from .model import (
9
+ AzureOpenAIAPIKey,
10
+ AzureOpenAIAPIVersion,
11
+ AzureOpenAIBaseURL,
12
+ EmbeddingsModelName,
13
+ OpenAIAPIKey,
14
+ ResponsesModelName,
15
+ )
16
+ from .util import TextChunker
17
+
18
+ CONTAINER = di.Container()
19
+
20
+
21
+ def _check_azure_v1_api_url(base_url: str) -> None:
22
+ """Check if Azure OpenAI base URL uses the recommended v1 API format.
23
+
24
+ Issues a warning if the URL doesn't end with '/openai/v1/' to encourage
25
+ migration to the v1 API format as recommended by Microsoft.
26
+
27
+ Reference: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle
28
+
29
+ Args:
30
+ base_url (str): The Azure OpenAI base URL to check.
31
+ """
32
+ if base_url and not base_url.rstrip("/").endswith("/openai/v1"):
33
+ warnings.warn(
34
+ "⚠️ Azure OpenAI v1 API is recommended. Your base URL should end with '/openai/v1/'. "
35
+ f"Current URL: '{base_url}'. "
36
+ "Consider updating to: 'https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/' "
37
+ "for better performance and future compatibility. "
38
+ "See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle",
39
+ UserWarning,
40
+ stacklevel=3,
41
+ )
42
+
43
+
44
+ def provide_openai_client() -> OpenAI:
45
+ """Provide OpenAI client based on environment variables.
46
+
47
+ Automatically detects and prioritizes OpenAI over Azure OpenAI configuration.
48
+ Checks the following environment variables in order:
49
+ 1. OPENAI_API_KEY - if set, creates standard OpenAI client
50
+ 2. Azure OpenAI variables (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL,
51
+ AZURE_OPENAI_API_VERSION) - if all set, creates Azure OpenAI client
52
+
53
+ Returns:
54
+ OpenAI: Configured OpenAI or AzureOpenAI client instance.
55
+
56
+ Raises:
57
+ ValueError: If no valid environment variables are found for either service.
58
+ """
59
+ openai_api_key = CONTAINER.resolve(OpenAIAPIKey)
60
+ if openai_api_key.value:
61
+ return OpenAI()
62
+
63
+ azure_api_key = CONTAINER.resolve(AzureOpenAIAPIKey)
64
+ azure_base_url = CONTAINER.resolve(AzureOpenAIBaseURL)
65
+ azure_api_version = CONTAINER.resolve(AzureOpenAIAPIVersion)
66
+
67
+ if all(param.value for param in [azure_api_key, azure_base_url, azure_api_version]):
68
+ _check_azure_v1_api_url(azure_base_url.value)
69
+ return AzureOpenAI(
70
+ api_key=azure_api_key.value,
71
+ base_url=azure_base_url.value,
72
+ api_version=azure_api_version.value,
73
+ )
74
+
75
+ raise ValueError(
76
+ "No valid OpenAI or Azure OpenAI environment variables found. "
77
+ "Please set either OPENAI_API_KEY or AZURE_OPENAI_API_KEY, "
78
+ "AZURE_OPENAI_BASE_URL, and AZURE_OPENAI_API_VERSION."
79
+ )
80
+
81
+
82
+ def provide_async_openai_client() -> AsyncOpenAI:
83
+ """Provide asynchronous OpenAI client based on environment variables.
84
+
85
+ Automatically detects and prioritizes OpenAI over Azure OpenAI configuration.
86
+ Checks the following environment variables in order:
87
+ 1. OPENAI_API_KEY - if set, creates standard AsyncOpenAI client
88
+ 2. Azure OpenAI variables (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL,
89
+ AZURE_OPENAI_API_VERSION) - if all set, creates AsyncAzureOpenAI client
90
+
91
+ Returns:
92
+ AsyncOpenAI: Configured AsyncOpenAI or AsyncAzureOpenAI client instance.
93
+
94
+ Raises:
95
+ ValueError: If no valid environment variables are found for either service.
96
+ """
97
+ openai_api_key = CONTAINER.resolve(OpenAIAPIKey)
98
+ if openai_api_key.value:
99
+ return AsyncOpenAI()
100
+
101
+ azure_api_key = CONTAINER.resolve(AzureOpenAIAPIKey)
102
+ azure_base_url = CONTAINER.resolve(AzureOpenAIBaseURL)
103
+ azure_api_version = CONTAINER.resolve(AzureOpenAIAPIVersion)
104
+
105
+ if all(param.value for param in [azure_api_key, azure_base_url, azure_api_version]):
106
+ _check_azure_v1_api_url(azure_base_url.value)
107
+ return AsyncAzureOpenAI(
108
+ api_key=azure_api_key.value,
109
+ base_url=azure_base_url.value,
110
+ api_version=azure_api_version.value,
111
+ )
112
+
113
+ raise ValueError(
114
+ "No valid OpenAI or Azure OpenAI environment variables found. "
115
+ "Please set either OPENAI_API_KEY or AZURE_OPENAI_API_KEY, "
116
+ "AZURE_OPENAI_BASE_URL, and AZURE_OPENAI_API_VERSION."
117
+ )
118
+
119
+
120
+ CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName("gpt-4.1-mini"))
121
+ CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName("text-embedding-3-small"))
122
+ CONTAINER.register(OpenAIAPIKey, lambda: OpenAIAPIKey(os.getenv("OPENAI_API_KEY")))
123
+ CONTAINER.register(AzureOpenAIAPIKey, lambda: AzureOpenAIAPIKey(os.getenv("AZURE_OPENAI_API_KEY")))
124
+ CONTAINER.register(AzureOpenAIBaseURL, lambda: AzureOpenAIBaseURL(os.getenv("AZURE_OPENAI_BASE_URL")))
125
+ CONTAINER.register(
126
+ cls=AzureOpenAIAPIVersion,
127
+ provider=lambda: AzureOpenAIAPIVersion(os.getenv("AZURE_OPENAI_API_VERSION", "preview")),
128
+ )
129
+ CONTAINER.register(OpenAI, provide_openai_client)
130
+ CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
131
+ CONTAINER.register(tiktoken.Encoding, lambda: tiktoken.get_encoding("o200k_base"))
132
+ CONTAINER.register(TextChunker, lambda: TextChunker(CONTAINER.resolve(tiktoken.Encoding)))
133
+
134
+
135
+ def reset_environment_registrations():
136
+ """Reset environment variable related registrations in the container.
137
+
138
+ This function re-registers environment variable dependent services to pick up
139
+ current environment variable values. Useful for testing when environment
140
+ variables are changed after initial container setup.
141
+ """
142
+ CONTAINER.register(OpenAIAPIKey, lambda: OpenAIAPIKey(os.getenv("OPENAI_API_KEY")))
143
+ CONTAINER.register(AzureOpenAIAPIKey, lambda: AzureOpenAIAPIKey(os.getenv("AZURE_OPENAI_API_KEY")))
144
+ CONTAINER.register(AzureOpenAIBaseURL, lambda: AzureOpenAIBaseURL(os.getenv("AZURE_OPENAI_BASE_URL")))
145
+ CONTAINER.register(
146
+ cls=AzureOpenAIAPIVersion,
147
+ provider=lambda: AzureOpenAIAPIVersion(os.getenv("AZURE_OPENAI_API_VERSION", "preview")),
148
+ )
149
+ CONTAINER.register(OpenAI, provide_openai_client)
150
+ CONTAINER.register(AsyncOpenAI, provide_async_openai_client)