openaivec 1.0.9__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,10 @@ class BatchSizeSuggester:
21
21
  min_batch_size: int = 10
22
22
  min_duration: float = 30.0
23
23
  max_duration: float = 60.0
24
- step_ratio: float = 0.2
24
+ step_ratio_up: float = 0.1
25
+ step_ratio_down: float = 0.2
26
+ max_step: int | None = None
27
+ min_step: int = 1
25
28
  sample_size: int = 4
26
29
  _history: list[PerformanceMetric] = field(default_factory=list)
27
30
  _lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
@@ -34,8 +37,14 @@ class BatchSizeSuggester:
34
37
  raise ValueError("current_batch_size must be >= min_batch_size")
35
38
  if self.sample_size <= 0:
36
39
  raise ValueError("sample_size must be > 0")
37
- if self.step_ratio <= 0:
38
- raise ValueError("step_ratio must be > 0")
40
+ if self.step_ratio_up <= 0:
41
+ raise ValueError("step_ratio_up must be > 0")
42
+ if self.step_ratio_down <= 0:
43
+ raise ValueError("step_ratio_down must be > 0")
44
+ if self.max_step is not None and self.max_step <= 0:
45
+ raise ValueError("max_step must be > 0")
46
+ if self.min_step <= 0:
47
+ raise ValueError("min_step must be > 0")
39
48
  if self.min_duration <= 0 or self.max_duration <= 0:
40
49
  raise ValueError("min_duration and max_duration must be > 0")
41
50
  if self.min_duration >= self.max_duration:
@@ -94,9 +103,15 @@ class BatchSizeSuggester:
94
103
  current_size = self.current_batch_size
95
104
 
96
105
  if average_duration < self.min_duration:
97
- new_batch_size = int(current_size * (1 + self.step_ratio))
106
+ delta = max(self.min_step, int(current_size * self.step_ratio_up))
107
+ if self.max_step is not None:
108
+ delta = min(delta, self.max_step)
109
+ new_batch_size = current_size + delta
98
110
  elif average_duration > self.max_duration:
99
- new_batch_size = int(current_size * (1 - self.step_ratio))
111
+ delta = max(self.min_step, int(current_size * self.step_ratio_down))
112
+ if self.max_step is not None:
113
+ delta = min(delta, self.max_step)
114
+ new_batch_size = current_size - delta
100
115
  else:
101
116
  new_batch_size = current_size
102
117
 
openaivec/_provider.py CHANGED
@@ -21,6 +21,51 @@ __all__ = []
21
21
  CONTAINER = di.Container()
22
22
 
23
23
 
24
+ def _build_missing_credentials_error(
25
+ openai_api_key: str | None,
26
+ azure_api_key: str | None,
27
+ azure_base_url: str | None,
28
+ azure_api_version: str | None,
29
+ ) -> str:
30
+ """Build a detailed error message for missing credentials.
31
+
32
+ Args:
33
+ openai_api_key (str | None): The OpenAI API key value.
34
+ azure_api_key (str | None): The Azure OpenAI API key value.
35
+ azure_base_url (str | None): The Azure OpenAI base URL value.
36
+ azure_api_version (str | None): The Azure OpenAI API version value.
37
+
38
+ Returns:
39
+ str: A detailed error message with missing variables and setup instructions.
40
+ """
41
+ lines = ["No valid OpenAI or Azure OpenAI credentials found.", ""]
42
+
43
+ # Check OpenAI
44
+ lines.append("Option 1: Set OPENAI_API_KEY for OpenAI")
45
+ if openai_api_key:
46
+ lines.append(" ✓ OPENAI_API_KEY is set")
47
+ else:
48
+ lines.append(" ✗ OPENAI_API_KEY is not set")
49
+ lines.append(' Example: export OPENAI_API_KEY="sk-..."')
50
+ lines.append("")
51
+
52
+ # Check Azure OpenAI
53
+ lines.append("Option 2: Set all Azure OpenAI variables")
54
+ azure_vars = [
55
+ ("AZURE_OPENAI_API_KEY", azure_api_key, '"your-azure-api-key"'),
56
+ ("AZURE_OPENAI_BASE_URL", azure_base_url, '"https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"'),
57
+ ("AZURE_OPENAI_API_VERSION", azure_api_version, '"2024-12-01-preview"'),
58
+ ]
59
+ for var_name, var_value, example in azure_vars:
60
+ if var_value:
61
+ lines.append(f" ✓ {var_name} is set")
62
+ else:
63
+ lines.append(f" ✗ {var_name} is not set")
64
+ lines.append(f" Example: export {var_name}={example}")
65
+
66
+ return "\n".join(lines)
67
+
68
+
24
69
  def _check_azure_v1_api_url(base_url: str) -> None:
25
70
  """Check if Azure OpenAI base URL uses the recommended v1 API format.
26
71
 
@@ -81,9 +126,12 @@ def provide_openai_client() -> OpenAI:
81
126
  )
82
127
 
83
128
  raise ValueError(
84
- "No valid OpenAI or Azure OpenAI environment variables found. "
85
- "Please set either OPENAI_API_KEY or AZURE_OPENAI_API_KEY, "
86
- "AZURE_OPENAI_BASE_URL, and AZURE_OPENAI_API_VERSION."
129
+ _build_missing_credentials_error(
130
+ openai_api_key=openai_api_key.value,
131
+ azure_api_key=azure_api_key.value,
132
+ azure_base_url=azure_base_url.value,
133
+ azure_api_version=azure_api_version.value,
134
+ )
87
135
  )
88
136
 
89
137
 
@@ -124,9 +172,12 @@ def provide_async_openai_client() -> AsyncOpenAI:
124
172
  )
125
173
 
126
174
  raise ValueError(
127
- "No valid OpenAI or Azure OpenAI environment variables found. "
128
- "Please set either OPENAI_API_KEY or AZURE_OPENAI_API_KEY, "
129
- "AZURE_OPENAI_BASE_URL, and AZURE_OPENAI_API_VERSION."
175
+ _build_missing_credentials_error(
176
+ openai_api_key=openai_api_key.value,
177
+ azure_api_key=azure_api_key.value,
178
+ azure_base_url=azure_base_url.value,
179
+ azure_api_version=azure_api_version.value,
180
+ )
130
181
  )
131
182
 
132
183
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -60,9 +60,10 @@ sentiment = reviews.ai.responses(
60
60
  reasoning={"effort": "none"}, # Mirrors OpenAI SDK for reasoning models
61
61
  )
62
62
  print(sentiment.tolist())
63
+ # Output: ['Positive sentiment', 'Negative sentiment']
63
64
  ```
64
65
 
65
- **Try it live:** https://microsoft.github.io/openaivec/examples/pandas/
66
+ **Pandas tutorial (GitHub Pages):** https://microsoft.github.io/openaivec/examples/pandas/
66
67
 
67
68
  ## Benchmarks
68
69
 
@@ -81,6 +82,7 @@ Batching alone removes most HTTP overhead, and letting batching overlap with con
81
82
  ## Contents
82
83
 
83
84
  - [Why openaivec?](#why-openaivec)
85
+ - [Overview](#overview)
84
86
  - [Core Workflows](#core-workflows)
85
87
  - [Using with Apache Spark UDFs](#using-with-apache-spark-udfs)
86
88
  - [Building Prompts](#building-prompts)
@@ -92,14 +94,13 @@ Batching alone removes most HTTP overhead, and letting batching overlap with con
92
94
  ## Why openaivec?
93
95
 
94
96
  - Drop-in `.ai` and `.aio` accessors keep pandas analysts in familiar tooling.
95
- - OpenAI batch-optimized: `BatchingMapProxy`/`AsyncBatchingMapProxy` coalesce requests, dedupe prompts, and keep column order stable.
96
- - Smart batching (`BatchingMapProxy`/`AsyncBatchingMapProxy`) dedupes prompts, preserves order, and releases waiters on failure.
97
+ - OpenAI batch-optimized: `BatchingMapProxy`/`AsyncBatchingMapProxy` coalesce requests, dedupe prompts, preserve order, and release waiters on failure.
97
98
  - Reasoning support mirrors the OpenAI SDK; structured outputs accept Pydantic `response_format`.
98
99
  - Built-in caches and retries remove boilerplate; helpers reuse caches across pandas, Spark, and async flows.
99
100
  - Spark UDFs and Microsoft Fabric guides move notebooks into production-scale ETL.
100
101
  - Prompt tooling (`FewShotPromptBuilder`, `improve`) and the task library ship curated prompts with validated outputs.
101
102
 
102
- # Overview
103
+ ## Overview
103
104
 
104
105
  Vectorized OpenAI batch processing so you handle many inputs per call instead of one-by-one. Batching proxies dedupe inputs, enforce ordered outputs, and unblock waiters even on upstream errors. Cache helpers (`responses_with_cache`, Spark UDF builders) plug into the same layer so expensive prompts are reused across pandas, Spark, and async flows. Reasoning models honor SDK semantics. Requires Python 3.10+.
105
106
 
@@ -185,7 +186,7 @@ result = df.assign(
185
186
 
186
187
  ### Using with reasoning models
187
188
 
188
- Reasoning models (o1-preview, o1-mini, o3-mini, etc.) work without special flags. `reasoning` mirrors the OpenAI SDK.
189
+ Reasoning models (o1-preview, o1-mini, o3-mini, etc.) follow OpenAI SDK semantics. Pass `reasoning` when you want to override model defaults.
189
190
 
190
191
  ```python
191
192
  pandas_ext.set_responses_model("o1-mini") # Set your reasoning model
@@ -193,7 +194,7 @@ pandas_ext.set_responses_model("o1-mini") # Set your reasoning model
193
194
  result = df.assign(
194
195
  analysis=lambda df: df.text.ai.responses(
195
196
  "Analyze this text step by step",
196
- reasoning={"effort": "none"} # Optional: mirrors the OpenAI SDK argument
197
+ reasoning={"effort": "none"}, # Optional: mirrors the OpenAI SDK argument
197
198
  )
198
199
  )
199
200
  ```
@@ -253,7 +254,7 @@ df = pd.DataFrame({"text": [
253
254
  async def process_data():
254
255
  return await df["text"].aio.responses(
255
256
  "Analyze sentiment and classify as positive/negative/neutral",
256
- reasoning={"effort": "none"}, # Required for gpt-5.1
257
+ reasoning={"effort": "none"}, # Recommended for reasoning models
257
258
  max_concurrency=12 # Allow up to 12 concurrent requests
258
259
  )
259
260
 
@@ -4,14 +4,14 @@ openaivec/_embeddings.py,sha256=2JWFUZdHR1dvPdWPT4nVSZo0_TAz4gr8oLR3EhhtUyE,8200
4
4
  openaivec/_log.py,sha256=LHNs6AbJzM4weaRARZFroigxR6D148d7WSIMLk1IhbU,1439
5
5
  openaivec/_model.py,sha256=ICu9T2puXBMIkTOZdO7XStHMdSSHe4LmLVovsNfXb64,2744
6
6
  openaivec/_prompt.py,sha256=_fPATuWKaAdFD48Kuu0UQorlChA9mNZCDJx88bu_BuY,20626
7
- openaivec/_provider.py,sha256=8z8gPYY5-Z7rzDlj_NC6hR__DUqVAH7VLHJn6LalzRg,6158
7
+ openaivec/_provider.py,sha256=h-h2LwnaTDg-WquhD908upOSbMaMxV5OxoWWxfCdBTs,7952
8
8
  openaivec/_responses.py,sha256=82P_iO3uB0IBL0BZY51ncR02lGxoVzLDjCybTvliMR8,20661
9
9
  openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
10
10
  openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
11
11
  openaivec/pandas_ext.py,sha256=_y48qlG-npZsCCJJL1yev-yEU1YBZT83EiVl-lH0__o,87305
12
12
  openaivec/spark.py,sha256=XosDAcbzhnaIGyHBJ-p_ZBVJALroOXOFTjWWNRpSG3o,35022
13
13
  openaivec/_cache/__init__.py,sha256=IYUH5GKsJXuCX-k3XtT259rEz49EZm9KW2TIOTGW4uQ,314
14
- openaivec/_cache/optimize.py,sha256=3nS8VehbS7iGC1tPDDQh-iAgyKHbVYmMbCRBWM77U_U,3827
14
+ openaivec/_cache/optimize.py,sha256=9JKU7HYABx-sYzp9SqoHVljG0aRqzmCY5_KCJc_Uq3M,4545
15
15
  openaivec/_cache/proxy.py,sha256=aVjH_hmJIIso6SetV_-Ct3VaOSG-n9Dpil7TttnbYkE,30556
16
16
  openaivec/_schema/__init__.py,sha256=XUj3Jv6ZVDjyYzSmH6Q5lmDj-hBMfUg_eBNeZACXR6Q,368
17
17
  openaivec/_schema/infer.py,sha256=VyvORgmpkcPa8pITClOJYjNzF4VzgSWe_n-9kFJVUjE,15644
@@ -33,7 +33,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=P1AFazqmlE9Dy0OShNOXcY8X5rvsGg7X
33
33
  openaivec/task/nlp/translation.py,sha256=IgTy0PQZVF_Q6qis60STim7Vd7rYPVTfTfwP_U1kAKk,6603
34
34
  openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
35
35
  openaivec/task/table/fillna.py,sha256=nMlXvlUvyWgM9DxJDeRX3M37jxlqg0MgRet1Ds3ni5Y,6571
36
- openaivec-1.0.9.dist-info/METADATA,sha256=Dbr6LKw_0XQd9V9i7847HXn11Pqg2DicTVtgXnPQzmk,14139
37
- openaivec-1.0.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
38
- openaivec-1.0.9.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
39
- openaivec-1.0.9.dist-info/RECORD,,
36
+ openaivec-1.0.11.dist-info/METADATA,sha256=r80pLM802xfcjn0QcHTeShXr4l60cA8b1bsSX7SUXo8,14165
37
+ openaivec-1.0.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
38
+ openaivec-1.0.11.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
39
+ openaivec-1.0.11.dist-info/RECORD,,