openaivec 0.14.2__tar.gz → 0.14.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. openaivec-0.14.4/.github/copilot-instructions.md +349 -0
  2. {openaivec-0.14.2 → openaivec-0.14.4}/PKG-INFO +1 -1
  3. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_proxy.py +24 -2
  4. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_responses.py +77 -25
  5. openaivec-0.14.4/src/openaivec/_schema.py +454 -0
  6. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/pandas_ext.py +559 -423
  7. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/spark.py +21 -1
  8. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_pandas_ext.py +119 -0
  9. openaivec-0.14.4/tests/test_schema.py +371 -0
  10. openaivec-0.14.2/.github/copilot-instructions.md +0 -234
  11. {openaivec-0.14.2 → openaivec-0.14.4}/.env.example +0 -0
  12. {openaivec-0.14.2 → openaivec-0.14.4}/.github/workflows/python-mkdocs.yml +0 -0
  13. {openaivec-0.14.2 → openaivec-0.14.4}/.github/workflows/python-package.yml +0 -0
  14. {openaivec-0.14.2 → openaivec-0.14.4}/.github/workflows/python-test.yml +0 -0
  15. {openaivec-0.14.2 → openaivec-0.14.4}/.github/workflows/python-update.yml +0 -0
  16. {openaivec-0.14.2 → openaivec-0.14.4}/.gitignore +0 -0
  17. {openaivec-0.14.2 → openaivec-0.14.4}/CODE_OF_CONDUCT.md +0 -0
  18. {openaivec-0.14.2 → openaivec-0.14.4}/LICENSE +0 -0
  19. {openaivec-0.14.2 → openaivec-0.14.4}/README.md +0 -0
  20. {openaivec-0.14.2 → openaivec-0.14.4}/SECURITY.md +0 -0
  21. {openaivec-0.14.2 → openaivec-0.14.4}/SUPPORT.md +0 -0
  22. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/main.md +0 -0
  23. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/pandas_ext.md +0 -0
  24. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/spark.md +0 -0
  25. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/task.md +0 -0
  26. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  27. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  28. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  29. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  30. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  31. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  32. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  33. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  34. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  35. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  36. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  37. {openaivec-0.14.2 → openaivec-0.14.4}/docs/api/tasks/nlp/translation.md +0 -0
  38. {openaivec-0.14.2 → openaivec-0.14.4}/docs/index.md +0 -0
  39. {openaivec-0.14.2 → openaivec-0.14.4}/docs/robots.txt +0 -0
  40. {openaivec-0.14.2 → openaivec-0.14.4}/mkdocs.yml +0 -0
  41. {openaivec-0.14.2 → openaivec-0.14.4}/pyproject.toml +0 -0
  42. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/__init__.py +0 -0
  43. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_di.py +0 -0
  44. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_embeddings.py +0 -0
  45. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_log.py +0 -0
  46. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_model.py +0 -0
  47. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_optimize.py +0 -0
  48. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_prompt.py +0 -0
  49. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_provider.py +0 -0
  50. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_serialize.py +0 -0
  51. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/_util.py +0 -0
  52. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/__init__.py +0 -0
  53. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/customer_support/__init__.py +0 -0
  54. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  55. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  56. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  57. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  58. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  59. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  60. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/nlp/__init__.py +0 -0
  61. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  62. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  63. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  64. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  65. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  66. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/nlp/translation.py +0 -0
  67. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/table/__init__.py +0 -0
  68. {openaivec-0.14.2 → openaivec-0.14.4}/src/openaivec/task/table/fillna.py +0 -0
  69. {openaivec-0.14.2 → openaivec-0.14.4}/tests/__init__.py +0 -0
  70. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_di.py +0 -0
  71. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_embeddings.py +0 -0
  72. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_optimize.py +0 -0
  73. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_prompt.py +0 -0
  74. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_provider.py +0 -0
  75. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_proxy.py +0 -0
  76. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_proxy_suggester.py +0 -0
  77. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_responses.py +0 -0
  78. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_serialize.py +0 -0
  79. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
  80. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_spark.py +0 -0
  81. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_task.py +0 -0
  82. {openaivec-0.14.2 → openaivec-0.14.4}/tests/test_util.py +0 -0
  83. {openaivec-0.14.2 → openaivec-0.14.4}/uv.lock +0 -0
@@ -0,0 +1,349 @@
1
+ # Copilot Instructions – openaivec
2
+
3
+ Concise guide for generating code that fits this project’s architecture, performance model, style, and public API. Favor these rules over generic heuristics.
4
+
5
+ ---
6
+
7
+ ## 1. Purpose & Scope
8
+
9
+ Provide high‑throughput, batched access to OpenAI / Azure OpenAI Responses + Embeddings for pandas & Spark with strict ordering, deduplication, and structured outputs.
10
+
11
+ ---
12
+
13
+ ## 2. Public Surface (primary exports)
14
+
15
+ From `openaivec.__init__`:
16
+
17
+ - `BatchResponses`, `AsyncBatchResponses`
18
+ - `BatchEmbeddings`, `AsyncBatchEmbeddings`
19
+ - `PreparedTask`, `FewShotPromptBuilder`
20
+
21
+ Entry points:
22
+
23
+ - Pandas accessors: `Series.ai` / `Series.aio`
24
+ - Spark UDF builders in `spark.py`
25
+ - Structured tasks under `task/`
26
+
27
+ Azure note: Use deployment name as `model`. Warn if base URL not v1. Behavior otherwise mirrors OpenAI.
28
+
29
+ ---
30
+
31
+ ## 3. Architecture Map (roles)
32
+
33
+ Underscore modules are internal (not exported). Public surface = `__init__`, `pandas_ext.py`, `spark.py`, and `task/`.
34
+
35
+ Core batching & optimization:
36
+
37
+ - `_proxy.py`: Order‑preserving dedup, caching, progressive mini‑batch execution, progress bars (only notebooks), dynamic batch sizing when `batch_size=None` via `_optimize.BatchSizeSuggester`; sync + async variants.
38
+ - `_optimize.py`: `BatchSizeSuggester` adaptive control loop (targets 30–60s batches) + metrics capture.
39
+
40
+ Model / task abstractions:
41
+
42
+ - `_model.py`: Typed wrappers (model names, task configs, response/embedding model name value objects).
43
+ - `_prompt.py`: Few‑shot / structured prompt assembly (`FewShotPromptBuilder`).
44
+ - `task/`: Pre‑packaged `PreparedTask` definitions for common workflows (re-exported publicly).
45
+
46
+ LLM interaction layers:
47
+
48
+ - `_responses.py`: Vectorized JSON‑mode wrapper (`BatchResponses` / `AsyncBatchResponses`); enforces same‑length contract; structured parse via `responses.parse`; reasoning model temperature guard & enhanced guidance warnings; retries with `backoff`.
49
+ - `_embeddings.py`: Embedding batching (`BatchEmbeddings` / `AsyncBatchEmbeddings`) returning `np.float32` arrays, de‑dup aware.
50
+ - `_schema.py`: Dynamic schema inference (`SchemaInferer`) producing Pydantic models at runtime; internal, not exported.
51
+
52
+ I/O & provider setup:
53
+
54
+ - `_provider.py`: Environment-driven auto detection (OpenAI vs Azure). Registers defaults, validates Azure v1 base URL, DI container root (`CONTAINER`).
55
+ - `_di.py`: Lightweight dependency injection container; registration & resolution helpers.
56
+
57
+ Utilities & cross‑cutting concerns:
58
+
59
+ - `_util.py`: `backoff` / `backoff_async`, `TextChunker` token-based splitter.
60
+ - `_serialize.py`: Pydantic (de)serialization and Spark schema bridging support.
61
+ - `_log.py`: Observation decorator used for tracing (`@observe`).
62
+
63
+ DataFrame / Spark integration:
64
+
65
+ - `pandas_ext.py`: `.ai` / `.aio` accessors (sync + async), shared cache variants, model configuration helpers (`responses_model`, `embeddings_model`, `use`, `use_async`). Maintains Series length/index; optional auto batch size; exposes reasoning temperature control.
66
+ - `spark.py`: Async UDF builders (`responses_udf`, `task_udf`, `embeddings_udf`, `count_tokens_udf`, `split_to_chunks_udf`, `similarity_udf`). Per-partition duplicate caching; Pydantic → Spark `StructType` conversion; concurrency per executor with `max_concurrency`.
67
+ - `spark.py`: Async UDF builders (`responses_udf`, `task_udf`, `embeddings_udf`, `count_tokens_udf`, `split_to_chunks_udf`, `similarity_udf` – cosine similarity on embedding vectors). Per-partition duplicate caching; Pydantic → Spark `StructType` conversion; concurrency per executor with `max_concurrency`.
68
+
69
+ Observability & progress:
70
+
71
+ - Progress bars only when `show_progress=True` AND notebook environment heuristics in `_proxy.py` pass.
72
+ - Adaptive batch suggestions recorded automatically around each unit API call.
73
+
74
+ Public exports (`__init__.py`): `BatchResponses`, `AsyncBatchResponses`, `BatchEmbeddings`, `AsyncBatchEmbeddings`, `PreparedTask`, `FewShotPromptBuilder`.
75
+
76
+ ---
77
+
78
+ ## 4. Core Principles & Contracts
79
+
80
+ 1. Always batch via the Proxy; never per-item API loops.
81
+ 2. map_func must return a list of identical length & order; mismatch => raise `ValueError` after releasing events (deadlock prevention).
82
+ 3. Deduplicate inputs; restore original ordering in outputs.
83
+ 4. Preserve pandas index & Spark schema deterministically.
84
+ 5. Show progress only in notebooks and only if `show_progress=True`.
85
+ 6. Reasoning models (o1/o3 families and similar) must use `temperature=None`.
86
+ 7. Attach exponential backoff for transient RateLimit / 5xx errors.
87
+ 8. Structured outputs (Pydantic) preferred over free-form JSON/text.
88
+
89
+ ---
90
+
91
+ ## 5. Batching Proxy Rules
92
+
93
+ - Same-length return invariant is critical (break = bug).
94
+ - Async variant enforces `max_concurrency` via semaphore.
95
+ - Shared caches (`*_with_cache`) enable cross-operation reuse; do not bypass them.
96
+ - Release all waiting events if an exception occurs (avoid deadlocks).
97
+ - Progress bars use `tqdm.auto`; only displayed if notebook heuristics pass AND `show_progress=True`.
98
+
99
+ ---
100
+
101
+ ## 6. Responses API Guidelines
102
+
103
+ - Use Responses JSON mode (`responses.parse`).
104
+ - Reasoning model safety: force `temperature=None`; provide clear error guidance.
105
+ - Favor small, reusable prompts enabling dedup benefits.
106
+ - Encourage Pydantic `response_format` for schema validation & Spark schema inference.
107
+
108
+ ---
109
+
110
+ ## 7. Embeddings Guidelines
111
+
112
+ - Return `np.ndarray` of dtype `float32`.
113
+ - Batch sizes typically larger than for Responses; keep order stable.
114
+ - Avoid per-item postprocessing—vector ops should stay batched.
115
+
116
+ ---
117
+
118
+ ## 8. pandas Extension Rules
119
+
120
+ - `.ai.responses` / `.ai.embeddings` preserve Series length & index.
121
+ - Async via `.aio.*` with configurable `batch_size` & `max_concurrency`.
122
+ - `*_with_cache` shares a passed proxy (promote reuse, minimal API calls).
123
+ - No hidden reindexing or sorting; user order is authoritative.
124
+
125
+ ---
126
+
127
+ ## 9. Spark UDF Rules
128
+
129
+ - Cache duplicates per partition (dict lookup) before remote calls.
130
+ - Convert Pydantic -> Spark StructType; treat `Enum`/`Literal` as `StringType`.
131
+ - Respect reasoning `temperature=None` rule.
132
+ - Provide chunking & token counting via helper UDFs.
133
+ - Avoid excessive nested structs—keep schemas shallow & ergonomic.
134
+
135
+ ---
136
+
137
+ ## 10. Provider / Azure Rules
138
+
139
+ - Auto-detect provider from env variables; deployment name = model for Azure.
140
+ - Warn (don’t fail) if Azure base URL not v1 format; still proceed.
141
+ - Keep code paths unified; avoid forking logic unless behavior diverges.
142
+
143
+ ---
144
+
145
+ ## 11. Coding Standards
146
+
147
+ - Python ≥ 3.10; Ruff for lint/format (`line-length=120`).
148
+ - Absolute imports (except re-export patterns in `__init__.py`) – enforced by Ruff rule TID252.
149
+ - Modern typing (`|` unions, builtins generics); prefer `str | None` over `Optional[str]`, `list[str]` over `List[str]`.
150
+ - Prefer `@dataclass` for simple immutable-ish contracts; use Pydantic only for validation-boundaries.
151
+ - Raise narrow exceptions (`ValueError`, `TypeError`) on contract violations—avoid broad except.
152
+ - Public APIs: Google-style docstrings with return/raises sections.
153
+
154
+ ---
155
+
156
+ ## 12. Testing Strategy
157
+
158
+ Live-first philosophy: call real OpenAI / Azure endpoints when tests validate core contracts and remain fast. Use mocks only for: (a) forced transient errors, (b) rare fault paths, (c) deterministic pure utilities.
159
+
160
+ Key rules:
161
+
162
+ 1. Skip (not fail) when credentials (`OPENAI_API_KEY` or Azure env) absent.
163
+ 2. Keep prompts minimal; batch size 1–4 for speed & cost.
164
+ 3. Assertions allow natural-language variance—focus on structure, ordering, lengths, types.
165
+ 4. Test dedup, ordering, cache reuse, concurrency limits, reasoning temperature enforcement.
166
+ 5. Inject retries by patching the smallest internal callable (not the whole client) for fault tests.
167
+ 6. Mark heavier suites separately if needed (e.g., `@pytest.mark.heavy_live`).
168
+ 7. Flake mitigation: broaden assertions (containment / regex / type+length) instead of pinning brittle verbatim strings.
169
+
170
+ ---
171
+
172
+ ## 13. Performance Guidance
173
+
174
+ - Responses batch size: 32–128 (default 128). Embeddings: 64–256.
175
+ - Async `max_concurrency`: typical 4–12 (tune per rate limits).
176
+ - Exploit dedup to collapse repeated prompts/inputs.
177
+ - Reuse caches across Series operations & Spark partitions.
178
+ - Avoid synchronous hotspots inside async loops (keep map_func lean).
179
+ - Automatic batch size mode targets ~30–60s per batch (`BatchSizeSuggester`).
180
+
181
+ ---
182
+
183
+ ## 14. Public / Internal Module Policy (`__all__`)
184
+
185
+ Public: `pandas_ext.py`, `spark.py`, everything under `task/`.
186
+ Internal: all underscore-prefixed modules; set `__all__ = []` explicitly.
187
+ Package exports: maintain alphabetical `__all__` in `__init__.py` for core classes (`BatchResponses`, etc.).
188
+ When adding public symbols: update `__all__`, docs (`docs/api/`), and examples if helpful.
189
+
190
+ Best practices:
191
+
192
+ 1. Internal-only code never leaks via wildcard import.
193
+ 2. Task modules export their primary callable/class.
194
+ 3. Keep `__all__` diff minimal & alphabetized.
195
+
196
+ ---
197
+
198
+ ## 15. Documentation
199
+
200
+ - New APIs: add or update `docs/api/*.md`; brief runnable snippet preferred over prose.
201
+ - Add concise example notebooks only if they illustrate distinct usage (avoid overlap).
202
+ - Update `mkdocs.yml` nav for new pages.
203
+
204
+ ---
205
+
206
+ ## 16. PR Checklist
207
+
208
+ - [ ] Ruff check & format pass.
209
+ - [ ] Public API contracts (length/order/types) preserved.
210
+ - [ ] All remote calls batched (no per-item loops).
211
+ - [ ] Reasoning models enforce `temperature=None`.
212
+ - [ ] Tests updated/added: live where feasible; skip gracefully without credentials.
213
+ - [ ] Mock usage (if any) narrowly scoped & justified.
214
+ - [ ] Docs + `__all__` updated for new public symbols.
215
+ - [ ] Performance considerations (batch sizes, concurrency) sensible.
216
+
217
+ ---
218
+
219
+ ## 17. Common Snippets
220
+
221
+ New batched API wrapper (sync):
222
+
223
+ ```python
224
+ @observe(_LOGGER)
225
+ @backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
226
+ def _unit_of_work(self, xs: list[str]) -> list[TOut]:
227
+ resp = self.client.api(xs)
228
+ return convert(resp) # Same length/order
229
+
230
+ def create(self, inputs: list[str]) -> list[TOut]:
231
+ return self.cache.map(inputs, self._unit_of_work)
232
+ ```
233
+
234
+ Reasoning model temperature:
235
+
236
+ ```python
237
+ # o1/o3 & similar reasoning models must set temperature None
238
+ temperature=None
239
+ ```
240
+
241
+ pandas `.ai` with shared cache:
242
+
243
+ ```python
244
+ from openaivec._proxy import BatchingMapProxy
245
+ shared = BatchingMapProxy[str, str](batch_size=64)
246
+ df["text"].ai.responses_with_cache("instructions", cache=shared)
247
+ ```
248
+
249
+ Spark structured Responses UDF:
250
+
251
+ ```python
252
+ from pydantic import BaseModel
253
+ from openaivec.spark import responses_udf
254
+
255
+ class R(BaseModel):
256
+ value: str
257
+
258
+ udf = responses_udf(
259
+ instructions="Do something",
260
+ response_format=R,
261
+ batch_size=64,
262
+ max_concurrency=8,
263
+ )
264
+ ```
265
+
266
+ Register custom OpenAI / Azure clients for pandas extension:
267
+
268
+ ```python
269
+ from openai import OpenAI, AzureOpenAI, AsyncAzureOpenAI
270
+ from openaivec import pandas_ext
271
+
272
+ # OpenAI client
273
+ client = OpenAI(api_key="sk-...")
274
+ pandas_ext.use(client)
275
+
276
+ # Azure OpenAI sync
277
+ azure = AzureOpenAI(
278
+ api_key="...",
279
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
280
+ api_version="preview",
281
+ )
282
+ pandas_ext.use(azure)
283
+
284
+ # Azure OpenAI async
285
+ azure_async = AsyncAzureOpenAI(
286
+ api_key="...",
287
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
288
+ api_version="preview",
289
+ )
290
+ pandas_ext.use_async(azure_async)
291
+
292
+ // Override model names (optional)
293
+ pandas_ext.responses_model("gpt-4.1-mini")
294
+ pandas_ext.embeddings_model("text-embedding-3-small")
295
+ ```
296
+
297
+ ---
298
+
299
+ When unsure, inspect implementations (`_proxy.py`, `_responses.py`, `_embeddings.py`, `pandas_ext.py`, `spark.py`) and related tests. Keep suggestions minimal, batched, and structurally safe.
300
+
301
+ ---
302
+
303
+ ## 18. Dev Workflow Commands
304
+
305
+ Canonical local commands (uv-based). Prefer these in automation & docs.
306
+
307
+ Install (all extras + dev):
308
+
309
+ ```bash
310
+ uv sync --all-extras --dev
311
+ ```
312
+
313
+ Editable install (if needed by external tooling):
314
+
315
+ ```bash
316
+ uv pip install -e .
317
+ ```
318
+
319
+ Lint & format (Ruff):
320
+
321
+ ```bash
322
+ uv run ruff check . --fix
323
+ uv run ruff format .
324
+ ```
325
+
326
+ Run full test suite (quiet):
327
+
328
+ ```bash
329
+ uv run pytest -q
330
+ ```
331
+
332
+ Run a focused test:
333
+
334
+ ```bash
335
+ uv run pytest tests/test_responses.py::test_reasoning_temperature_guard -q
336
+ ```
337
+
338
+ Serve docs (MkDocs live reload):
339
+
340
+ ```bash
341
+ uv run mkdocs serve
342
+ ```
343
+
344
+ Environment setup notes:
345
+
346
+ - Set `OPENAI_API_KEY` or Azure trio (`AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_BASE_URL`, `AZURE_OPENAI_API_VERSION`).
347
+ - Tests auto-skip live paths when credentials absent.
348
+ - Use separate shell profiles per provider if switching frequently.
349
+ - Azure canonical base URL should end with `/openai/v1/` (e.g. `https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/`); non‑v1 forms emit a warning.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.2
3
+ Version: 0.14.4
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -460,7 +460,20 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
460
460
  self.__process_owned(owned, map_func)
461
461
  self.__wait_for(wait_for, map_func)
462
462
 
463
- return self.__values(items)
463
+ # Fetch results before purging None entries
464
+ results = self.__values(items)
465
+
466
+ # Remove None values from cache so they are recomputed on future calls
467
+ with self._lock:
468
+ if self._cache: # micro-optimization
469
+ for k in set(items):
470
+ try:
471
+ if self._cache.get(k, object()) is None:
472
+ del self._cache[k]
473
+ except KeyError:
474
+ pass
475
+
476
+ return results
464
477
 
465
478
 
466
479
  @dataclass
@@ -745,4 +758,13 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
745
758
  await self.__process_owned(owned, map_func)
746
759
  await self.__wait_for(wait_for, map_func)
747
760
 
748
- return await self.__values(items)
761
+ results = await self.__values(items)
762
+
763
+ # Remove None values from cache after retrieval to avoid persisting incomplete results
764
+ async with self._lock:
765
+ if self._cache:
766
+ for k in set(items):
767
+ if self._cache.get(k, object()) is None:
768
+ self._cache.pop(k, None)
769
+
770
+ return results
@@ -1,7 +1,7 @@
1
1
  import warnings
2
2
  from dataclasses import dataclass, field
3
3
  from logging import Logger, getLogger
4
- from typing import Generic, List, Type, cast
4
+ from typing import Any, Generic, List, Type, cast
5
5
 
6
6
  from openai import AsyncOpenAI, BadRequestError, InternalServerError, OpenAI, RateLimitError
7
7
  from openai.types.responses import ParsedResponse
@@ -163,7 +163,7 @@ class BatchResponses(Generic[ResponseFormat]):
163
163
  client: OpenAI
164
164
  model_name: str # For Azure: deployment name, for OpenAI: model name
165
165
  system_message: str
166
- temperature: float | None = 0.0
166
+ temperature: float | None = None
167
167
  top_p: float = 1.0
168
168
  response_format: Type[ResponseFormat] = str # type: ignore[assignment]
169
169
  cache: BatchingMapProxy[str, ResponseFormat] = field(default_factory=lambda: BatchingMapProxy(batch_size=None))
@@ -241,7 +241,9 @@ class BatchResponses(Generic[ResponseFormat]):
241
241
 
242
242
  @observe(_LOGGER)
243
243
  @backoff(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
244
- def _request_llm(self, user_messages: List[Message[str]]) -> ParsedResponse[Response[ResponseFormat]]:
244
+ def _request_llm(
245
+ self, user_messages: List[Message[str]], **extra_api_params: Any
246
+ ) -> ParsedResponse[Response[ResponseFormat]]:
245
247
  """Make a single call to the OpenAI JSON‑mode endpoint.
246
248
 
247
249
  Args:
@@ -265,16 +267,29 @@ class BatchResponses(Generic[ResponseFormat]):
265
267
  class ResponseT(BaseModel):
266
268
  assistant_messages: List[MessageT]
267
269
 
268
- # Prepare API parameters, excluding temperature if None (for reasoning models)
269
- api_params = {
270
+ # Build base API parameters (cannot be overridden by caller)
271
+ api_params: dict[str, Any] = {
270
272
  "model": self.model_name,
271
273
  "instructions": self._vectorized_system_message,
272
274
  "input": Request(user_messages=user_messages).model_dump_json(),
273
- "top_p": self.top_p,
274
275
  "text_format": ResponseT,
275
276
  }
276
- if self.temperature is not None:
277
- api_params["temperature"] = self.temperature
277
+
278
+ # Resolve nucleus sampling (caller can override)
279
+ top_p = extra_api_params.pop("top_p", self.top_p)
280
+ if top_p is not None:
281
+ api_params["top_p"] = top_p
282
+
283
+ # Resolve temperature (caller can override). If None, omit entirely for reasoning models.
284
+ temperature = extra_api_params.pop("temperature", self.temperature)
285
+ if temperature is not None:
286
+ api_params["temperature"] = temperature
287
+
288
+ # Merge remaining user supplied params, excluding protected keys
289
+ for k, v in extra_api_params.items():
290
+ if k in {"model", "instructions", "input", "text_format"}:
291
+ continue # ignore attempts to override core batching contract
292
+ api_params[k] = v
278
293
 
279
294
  try:
280
295
  completion: ParsedResponse[ResponseT] = self.client.responses.parse(**api_params)
@@ -285,7 +300,7 @@ class BatchResponses(Generic[ResponseFormat]):
285
300
  return cast(ParsedResponse[Response[ResponseFormat]], completion)
286
301
 
287
302
  @observe(_LOGGER)
288
- def _predict_chunk(self, user_messages: List[str]) -> List[ResponseFormat | None]:
303
+ def _predict_chunk(self, user_messages: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
289
304
  """Helper executed for every unique minibatch.
290
305
 
291
306
  This method:
@@ -297,7 +312,7 @@ class BatchResponses(Generic[ResponseFormat]):
297
312
  only on its arguments – which allows safe reuse.
298
313
  """
299
314
  messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
300
- responses: ParsedResponse[Response[ResponseFormat]] = self._request_llm(messages)
315
+ responses: ParsedResponse[Response[ResponseFormat]] = self._request_llm(messages, **api_kwargs)
301
316
  if not responses.output_parsed:
302
317
  return [None] * len(messages)
303
318
  response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
@@ -305,17 +320,28 @@ class BatchResponses(Generic[ResponseFormat]):
305
320
  return sorted_responses
306
321
 
307
322
  @observe(_LOGGER)
308
- def parse(self, inputs: List[str]) -> List[ResponseFormat | None]:
323
+ def parse(self, inputs: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
309
324
  """Batched predict.
310
325
 
326
+ Accepts arbitrary keyword arguments that are forwarded to the underlying
327
+ ``OpenAI.responses.parse`` call for future‑proofing (e.g., ``max_output_tokens``,
328
+ penalties, etc.). ``top_p`` and ``temperature`` default to the instance's
329
+ configured values but can be overridden explicitly.
330
+
311
331
  Args:
312
332
  inputs (List[str]): Prompts that require responses. Duplicates are de‑duplicated.
333
+ **api_kwargs: Extra keyword args forwarded to the OpenAI Responses API.
313
334
 
314
335
  Returns:
315
336
  List[ResponseFormat | None]: Assistant responses aligned to ``inputs``.
316
337
  """
317
- result = self.cache.map(inputs, self._predict_chunk)
318
- return result # type: ignore[return-value]
338
+ if not api_kwargs:
339
+ return self.cache.map(inputs, self._predict_chunk) # type: ignore[return-value]
340
+
341
+ def _predict_with(xs: List[str]) -> List[ResponseFormat | None]:
342
+ return self._predict_chunk(xs, **api_kwargs)
343
+
344
+ return self.cache.map(inputs, _predict_with) # type: ignore[return-value]
319
345
 
320
346
 
321
347
  @dataclass(frozen=True)
@@ -382,7 +408,7 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
382
408
  client: AsyncOpenAI,
383
409
  model_name: str,
384
410
  system_message: str,
385
- temperature: float | None = 0.0,
411
+ temperature: float | None = None,
386
412
  top_p: float = 1.0,
387
413
  response_format: Type[ResponseFormat] = str,
388
414
  batch_size: int | None = None,
@@ -455,7 +481,9 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
455
481
 
456
482
  @backoff_async(exceptions=[RateLimitError, InternalServerError], scale=1, max_retries=12)
457
483
  @observe(_LOGGER)
458
- async def _request_llm(self, user_messages: List[Message[str]]) -> ParsedResponse[Response[ResponseFormat]]:
484
+ async def _request_llm(
485
+ self, user_messages: List[Message[str]], **extra_api_params: Any
486
+ ) -> ParsedResponse[Response[ResponseFormat]]:
459
487
  """Make a single async call to the OpenAI JSON‑mode endpoint.
460
488
 
461
489
  Args:
@@ -476,16 +504,29 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
476
504
  class ResponseT(BaseModel):
477
505
  assistant_messages: List[MessageT]
478
506
 
479
- # Prepare API parameters, excluding temperature if None (for reasoning models)
480
- api_params = {
507
+ # Build base API parameters (cannot be overridden by caller)
508
+ api_params: dict[str, Any] = {
481
509
  "model": self.model_name,
482
510
  "instructions": self._vectorized_system_message,
483
511
  "input": Request(user_messages=user_messages).model_dump_json(),
484
- "top_p": self.top_p,
485
512
  "text_format": ResponseT,
486
513
  }
487
- if self.temperature is not None:
488
- api_params["temperature"] = self.temperature
514
+
515
+ # Resolve nucleus sampling (caller can override)
516
+ top_p = extra_api_params.pop("top_p", self.top_p)
517
+ if top_p is not None:
518
+ api_params["top_p"] = top_p
519
+
520
+ # Resolve temperature (caller can override). If None, omit entirely for reasoning models.
521
+ temperature = extra_api_params.pop("temperature", self.temperature)
522
+ if temperature is not None:
523
+ api_params["temperature"] = temperature
524
+
525
+ # Merge remaining user supplied params, excluding protected keys
526
+ for k, v in extra_api_params.items():
527
+ if k in {"model", "instructions", "input", "text_format"}:
528
+ continue
529
+ api_params[k] = v
489
530
 
490
531
  try:
491
532
  completion: ParsedResponse[ResponseT] = await self.client.responses.parse(**api_params)
@@ -496,7 +537,7 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
496
537
  return cast(ParsedResponse[Response[ResponseFormat]], completion)
497
538
 
498
539
  @observe(_LOGGER)
499
- async def _predict_chunk(self, user_messages: List[str]) -> List[ResponseFormat | None]:
540
+ async def _predict_chunk(self, user_messages: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
500
541
  """Async helper executed for every unique minibatch.
501
542
 
502
543
  This method:
@@ -507,7 +548,7 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
507
548
  The function is pure – it has no side‑effects and the result depends only on its arguments.
508
549
  """
509
550
  messages = [Message(id=i, body=message) for i, message in enumerate(user_messages)]
510
- responses: ParsedResponse[Response[ResponseFormat]] = await self._request_llm(messages) # type: ignore[call-issue]
551
+ responses: ParsedResponse[Response[ResponseFormat]] = await self._request_llm(messages, **api_kwargs) # type: ignore[call-issue]
511
552
  if not responses.output_parsed:
512
553
  return [None] * len(messages)
513
554
  response_dict = {message.id: message.body for message in responses.output_parsed.assistant_messages}
@@ -516,14 +557,25 @@ class AsyncBatchResponses(Generic[ResponseFormat]):
516
557
  return sorted_responses
517
558
 
518
559
  @observe(_LOGGER)
519
- async def parse(self, inputs: List[str]) -> List[ResponseFormat | None]:
560
+ async def parse(self, inputs: List[str], **api_kwargs: Any) -> List[ResponseFormat | None]:
520
561
  """Batched predict (async).
521
562
 
563
+ Accepts arbitrary keyword arguments forwarded to ``AsyncOpenAI.responses.parse``.
564
+ ``top_p`` and ``temperature`` default to instance configuration but can be
565
+ overridden per call. This prepares for future API parameters without
566
+ changing the public surface again.
567
+
522
568
  Args:
523
569
  inputs (List[str]): Prompts that require responses. Duplicates are de‑duplicated.
570
+ **api_kwargs: Extra keyword args for the OpenAI Responses API.
524
571
 
525
572
  Returns:
526
573
  List[ResponseFormat | None]: Assistant responses aligned to ``inputs``.
527
574
  """
528
- result = await self.cache.map(inputs, self._predict_chunk)
529
- return result # type: ignore[return-value]
575
+ if not api_kwargs:
576
+ return await self.cache.map(inputs, self._predict_chunk) # type: ignore[return-value]
577
+
578
+ async def _predict_with(xs: List[str]) -> List[ResponseFormat | None]:
579
+ return await self._predict_chunk(xs, **api_kwargs)
580
+
581
+ return await self.cache.map(inputs, _predict_with) # type: ignore[return-value]