openaivec 0.99.3__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openaivec-1.0.1/PKG-INFO +377 -0
  2. openaivec-1.0.1/README.md +351 -0
  3. openaivec-0.99.3/PKG-INFO +0 -807
  4. openaivec-0.99.3/README.md +0 -781
  5. {openaivec-0.99.3 → openaivec-1.0.1}/.env.example +0 -0
  6. {openaivec-0.99.3 → openaivec-1.0.1}/.github/copilot-instructions.md +0 -0
  7. {openaivec-0.99.3 → openaivec-1.0.1}/.github/dependabot.yml +0 -0
  8. {openaivec-0.99.3 → openaivec-1.0.1}/.github/workflows/docs.yml +0 -0
  9. {openaivec-0.99.3 → openaivec-1.0.1}/.github/workflows/publish.yml +0 -0
  10. {openaivec-0.99.3 → openaivec-1.0.1}/.github/workflows/test.yml +0 -0
  11. {openaivec-0.99.3 → openaivec-1.0.1}/.gitignore +0 -0
  12. {openaivec-0.99.3 → openaivec-1.0.1}/AGENTS.md +0 -0
  13. {openaivec-0.99.3 → openaivec-1.0.1}/CODE_OF_CONDUCT.md +0 -0
  14. {openaivec-0.99.3 → openaivec-1.0.1}/LICENSE +0 -0
  15. {openaivec-0.99.3 → openaivec-1.0.1}/SECURITY.md +0 -0
  16. {openaivec-0.99.3 → openaivec-1.0.1}/SUPPORT.md +0 -0
  17. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/main.md +0 -0
  18. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/pandas_ext.md +0 -0
  19. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/spark.md +0 -0
  20. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/task.md +0 -0
  21. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  22. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  23. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  24. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  25. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  26. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  27. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  28. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  29. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  30. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  31. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  32. {openaivec-0.99.3 → openaivec-1.0.1}/docs/api/tasks/nlp/translation.md +0 -0
  33. {openaivec-0.99.3 → openaivec-1.0.1}/docs/contributor-guide.md +0 -0
  34. {openaivec-0.99.3 → openaivec-1.0.1}/docs/index.md +0 -0
  35. {openaivec-0.99.3 → openaivec-1.0.1}/docs/robots.txt +0 -0
  36. {openaivec-0.99.3 → openaivec-1.0.1}/mkdocs.yml +0 -0
  37. {openaivec-0.99.3 → openaivec-1.0.1}/pyproject.toml +0 -0
  38. {openaivec-0.99.3 → openaivec-1.0.1}/pytest.ini +0 -0
  39. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/__init__.py +0 -0
  40. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_cache/__init__.py +0 -0
  41. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_cache/optimize.py +0 -0
  42. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_cache/proxy.py +0 -0
  43. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_di.py +0 -0
  44. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_embeddings.py +0 -0
  45. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_log.py +0 -0
  46. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_model.py +0 -0
  47. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_prompt.py +0 -0
  48. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_provider.py +0 -0
  49. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_responses.py +0 -0
  50. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_schema/__init__.py +0 -0
  51. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_schema/infer.py +0 -0
  52. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_schema/spec.py +0 -0
  53. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_serialize.py +0 -0
  54. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/_util.py +0 -0
  55. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/pandas_ext.py +0 -0
  56. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/spark.py +0 -0
  57. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/__init__.py +0 -0
  58. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/customer_support/__init__.py +0 -0
  59. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  60. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  61. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  62. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  63. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  64. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  65. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/nlp/__init__.py +0 -0
  66. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  67. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  68. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  69. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  70. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  71. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/nlp/translation.py +0 -0
  72. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/table/__init__.py +0 -0
  73. {openaivec-0.99.3 → openaivec-1.0.1}/src/openaivec/task/table/fillna.py +0 -0
  74. {openaivec-0.99.3 → openaivec-1.0.1}/tests/__init__.py +0 -0
  75. {openaivec-0.99.3 → openaivec-1.0.1}/tests/_cache/test_optimize.py +0 -0
  76. {openaivec-0.99.3 → openaivec-1.0.1}/tests/_cache/test_proxy.py +0 -0
  77. {openaivec-0.99.3 → openaivec-1.0.1}/tests/_cache/test_proxy_suggester.py +0 -0
  78. {openaivec-0.99.3 → openaivec-1.0.1}/tests/_schema/test_infer.py +0 -0
  79. {openaivec-0.99.3 → openaivec-1.0.1}/tests/_schema/test_spec.py +0 -0
  80. {openaivec-0.99.3 → openaivec-1.0.1}/tests/conftest.py +0 -0
  81. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_di.py +0 -0
  82. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_embeddings.py +0 -0
  83. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_pandas_ext.py +0 -0
  84. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_prompt.py +0 -0
  85. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_provider.py +0 -0
  86. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_responses.py +0 -0
  87. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_serialize.py +0 -0
  88. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
  89. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_spark.py +0 -0
  90. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_task.py +0 -0
  91. {openaivec-0.99.3 → openaivec-1.0.1}/tests/test_util.py +0 -0
  92. {openaivec-0.99.3 → openaivec-1.0.1}/uv.lock +0 -0
@@ -0,0 +1,377 @@
1
+ Metadata-Version: 2.4
2
+ Name: openaivec
3
+ Version: 1.0.1
4
+ Summary: Generative mutation for tabular calculation
5
+ Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
+ Project-URL: Repository, https://github.com/microsoft/openaivec
7
+ Author-email: Hiroki Mizukami <hmizukami@microsoft.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: llm,openai,openai-api,openai-python,pandas,pyspark
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: >=3.10
18
+ Requires-Dist: ipywidgets>=8.1.7
19
+ Requires-Dist: openai>=1.74.0
20
+ Requires-Dist: pandas>=2.2.3
21
+ Requires-Dist: tiktoken>=0.9.0
22
+ Requires-Dist: tqdm>=4.67.1
23
+ Provides-Extra: spark
24
+ Requires-Dist: pyspark>=3.5.5; extra == 'spark'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # openaivec
28
+
29
+ Transform pandas and Spark workflows with AI-powered text processing—batching, caching, and guardrails included.
30
+
31
+ [Contributor guidelines](AGENTS.md)
32
+
33
+ ## Quick start
34
+
35
+ ```bash
36
+ pip install openaivec
37
+ ```
38
+
39
+ ```python
40
+ import os
41
+ import pandas as pd
42
+ from openaivec import pandas_ext
43
+
44
+ # Auth: choose OpenAI or Azure OpenAI
45
+ os.environ["OPENAI_API_KEY"] = "your-api-key"
46
+ # Azure alternative:
47
+ # os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-key"
48
+ # os.environ["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
49
+ # os.environ["AZURE_OPENAI_API_VERSION"] = "preview"
50
+
51
+ pandas_ext.set_responses_model("gpt-5.1") # Optional override (use deployment name for Azure)
52
+
53
+ reviews = pd.Series([
54
+ "Great coffee and friendly staff.",
55
+ "Delivery was late and the package was damaged.",
56
+ ])
57
+
58
+ sentiment = reviews.ai.responses(
59
+ "Summarize sentiment in one short sentence.",
60
+ reasoning={"effort": "medium"}, # Mirrors OpenAI SDK for reasoning models
61
+ )
62
+ print(sentiment.tolist())
63
+ ```
64
+
65
+ **Try it live:** https://microsoft.github.io/openaivec/examples/pandas/
66
+
67
+ ## Contents
68
+
69
+ - [Why openaivec?](#why-openaivec)
70
+ - [Core Workflows](#core-workflows)
71
+ - [Using with Apache Spark UDFs](#using-with-apache-spark-udfs)
72
+ - [Building Prompts](#building-prompts)
73
+ - [Using with Microsoft Fabric](#using-with-microsoft-fabric)
74
+ - [Contributing](#contributing)
75
+ - [Additional Resources](#additional-resources)
76
+ - [Community](#community)
77
+
78
+ ## Why openaivec?
79
+
80
+ - Drop-in `.ai` and `.aio` accessors keep pandas analysts in familiar tooling.
81
+ - Smart batching (`BatchingMapProxy`/`AsyncBatchingMapProxy`) dedupes prompts, preserves order, and releases waiters on failure.
82
+ - Reasoning support mirrors the OpenAI SDK; structured outputs accept Pydantic `response_format`.
83
+ - Built-in caches and retries remove boilerplate; helpers reuse caches across pandas, Spark, and async flows.
84
+ - Spark UDFs and Microsoft Fabric guides move notebooks into production-scale ETL.
85
+ - Prompt tooling (`FewShotPromptBuilder`, `improve`) and the task library ship curated prompts with validated outputs.
86
+
87
+ # Overview
88
+
89
+ Vectorized OpenAI access so you process many inputs per call instead of one-by-one. Batching proxies dedupe inputs, enforce ordered outputs, and unblock waiters even on upstream errors. Cache helpers (`responses_with_cache`, Spark UDF builders) plug into the same layer so expensive prompts are reused across pandas, Spark, and async flows. Reasoning models honor SDK semantics. Requires Python 3.10+.
90
+
91
+ ## Core Workflows
92
+
93
+ ### Direct API usage
94
+
95
+ For maximum control over batch processing:
96
+
97
+ ```python
98
+ import os
99
+ from openai import OpenAI
100
+ from openaivec import BatchResponses
101
+
102
+ # Initialize the batch client
103
+ client = BatchResponses.of(
104
+ client=OpenAI(),
105
+ model_name="gpt-5.1",
106
+ system_message="Please answer only with 'xx family' and do not output anything else.",
107
+ # batch_size defaults to None (automatic optimization)
108
+ )
109
+
110
+ result = client.parse(
111
+ ["panda", "rabbit", "koala"],
112
+ reasoning={"effort": "medium"}, # Required for gpt-5.1
113
+ )
114
+ print(result) # Expected output: ['bear family', 'rabbit family', 'koala family']
115
+ ```
116
+
117
+ 📓 **[Complete tutorial →](https://microsoft.github.io/openaivec/examples/pandas/)**
118
+
119
+ ### pandas integration (recommended)
120
+
121
+ The easiest way to get started with your DataFrames:
122
+
123
+ ```python
124
+ import os
125
+ import pandas as pd
126
+ from openaivec import pandas_ext
127
+
128
+ # Authentication Option 1: Environment variables (automatic detection)
129
+ os.environ["OPENAI_API_KEY"] = "your-api-key-here"
130
+ # Or for Azure OpenAI:
131
+ # os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-key"
132
+ # os.environ["AZURE_OPENAI_BASE_URL"] = "https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/"
133
+ # os.environ["AZURE_OPENAI_API_VERSION"] = "preview"
134
+
135
+ # Authentication Option 2: Custom client (optional)
136
+ # from openai import OpenAI, AsyncOpenAI
137
+ # pandas_ext.set_client(OpenAI())
138
+ # pandas_ext.set_async_client(AsyncOpenAI())
139
+
140
+ # Configure model (optional - defaults to gpt-5.1; use deployment name for Azure)
141
+ pandas_ext.set_responses_model("gpt-5.1")
142
+
143
+ # Create your data
144
+ df = pd.DataFrame({"name": ["panda", "rabbit", "koala"]})
145
+
146
+ # Add AI-powered columns
147
+ result = df.assign(
148
+ family=lambda df: df.name.ai.responses(
149
+ "What animal family? Answer with 'X family'",
150
+ reasoning={"effort": "medium"},
151
+ ),
152
+ habitat=lambda df: df.name.ai.responses(
153
+ "Primary habitat in one word",
154
+ reasoning={"effort": "medium"},
155
+ ),
156
+ fun_fact=lambda df: df.name.ai.responses(
157
+ "One interesting fact in 10 words or less",
158
+ reasoning={"effort": "medium"},
159
+ ),
160
+ )
161
+ ```
162
+
163
+ | name | family | habitat | fun_fact |
164
+ | ------ | ---------------- | ------- | -------------------------- |
165
+ | panda | bear family | forest | Eats bamboo 14 hours daily |
166
+ | rabbit | rabbit family | meadow | Can see nearly 360 degrees |
167
+ | koala | marsupial family | tree | Sleeps 22 hours per day |
168
+
169
+ 📓 **[Interactive pandas examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
170
+
171
+ ### Using with reasoning models
172
+
173
+ Reasoning models (o1-preview, o1-mini, o3-mini, etc.) work without special flags. `reasoning` mirrors the OpenAI SDK.
174
+
175
+ ```python
176
+ pandas_ext.set_responses_model("o1-mini") # Set your reasoning model
177
+
178
+ result = df.assign(
179
+ analysis=lambda df: df.text.ai.responses(
180
+ "Analyze this text step by step",
181
+ reasoning={"effort": "medium"} # Optional: mirrors the OpenAI SDK argument
182
+ )
183
+ )
184
+ ```
185
+
186
+ You can omit `reasoning` to use the model defaults or tune it per request with the same shape (`dict` with effort) as the OpenAI SDK.
187
+
188
+ ### Using pre-configured tasks
189
+
190
+ For common text processing operations, openaivec provides ready-to-use tasks that eliminate the need to write custom prompts:
191
+
192
+ ```python
193
+ from openaivec.task import nlp, customer_support
194
+
195
+ text_df = pd.DataFrame({
196
+ "text": [
197
+ "Great product, fast delivery!",
198
+ "Need help with billing issue",
199
+ "How do I reset my password?"
200
+ ]
201
+ })
202
+
203
+ results = text_df.assign(
204
+ sentiment=lambda df: df.text.ai.task(nlp.SENTIMENT_ANALYSIS),
205
+ intent=lambda df: df.text.ai.task(customer_support.INTENT_ANALYSIS),
206
+ )
207
+
208
+ # Extract structured results into separate columns
209
+ extracted_results = results.ai.extract("sentiment")
210
+ ```
211
+
212
+ **Task categories:** Text analysis (`nlp.SENTIMENT_ANALYSIS`, `nlp.MULTILINGUAL_TRANSLATION`, `nlp.NAMED_ENTITY_RECOGNITION`, `nlp.KEYWORD_EXTRACTION`); Content classification (`customer_support.INTENT_ANALYSIS`, `customer_support.URGENCY_ANALYSIS`, `customer_support.INQUIRY_CLASSIFICATION`).
213
+
214
+ ### Asynchronous processing with `.aio`
215
+
216
+ High-throughput workloads use the `.aio` accessor for async versions of all operations:
217
+
218
+ ```python
219
+ import asyncio
220
+ import pandas as pd
221
+ from openaivec import pandas_ext
222
+
223
+ pandas_ext.set_responses_model("gpt-5.1")
224
+
225
+ df = pd.DataFrame({"text": [
226
+ "This product is amazing!",
227
+ "Terrible customer service",
228
+ "Good value for money",
229
+ "Not what I expected"
230
+ ] * 250}) # 1000 rows for demonstration
231
+
232
+ async def process_data():
233
+ return await df["text"].aio.responses(
234
+ "Analyze sentiment and classify as positive/negative/neutral",
235
+ reasoning={"effort": "medium"}, # Required for gpt-5.1
236
+ max_concurrency=12 # Allow up to 12 concurrent requests
237
+ )
238
+
239
+ sentiments = asyncio.run(process_data())
240
+ ```
241
+
242
+ **Performance benefits:** Parallel processing with automatic batching/deduplication, built-in rate limiting and error handling, and memory-efficient streaming for large datasets.
243
+
244
+ ## Using with Apache Spark UDFs
245
+
246
+ Scale to enterprise datasets with distributed processing.
247
+
248
+ 📓 **[Spark tutorial →](https://microsoft.github.io/openaivec/examples/spark/)**
249
+
250
+ First, obtain a Spark session and configure authentication:
251
+
252
+ ```python
253
+ from pyspark.sql import SparkSession
254
+ from openaivec.spark import setup, setup_azure
255
+
256
+ spark = SparkSession.builder.getOrCreate()
257
+
258
+ # Option 1: Using OpenAI
259
+ setup(
260
+ spark,
261
+ api_key="your-openai-api-key",
262
+ responses_model_name="gpt-5.1", # Optional: set default model
263
+ embeddings_model_name="text-embedding-3-small" # Optional: set default model
264
+ )
265
+
266
+ # Option 2: Using Azure OpenAI
267
+ # setup_azure(
268
+ # spark,
269
+ # api_key="your-azure-openai-api-key",
270
+ # base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
271
+ # api_version="preview",
272
+ # responses_model_name="my-gpt4-deployment", # Optional: set default deployment
273
+ # embeddings_model_name="my-embedding-deployment" # Optional: set default deployment
274
+ # )
275
+ ```
276
+
277
+ Create and register UDFs using the provided helpers:
278
+
279
+ ```python
280
+ from openaivec.spark import responses_udf, task_udf, embeddings_udf, count_tokens_udf, similarity_udf, parse_udf
281
+ from pydantic import BaseModel
282
+
283
+ spark.udf.register(
284
+ "extract_brand",
285
+ responses_udf(
286
+ instructions="Extract the brand name from the product. Return only the brand name.",
287
+ reasoning={"effort": "medium"}, # Recommended with gpt-5.1
288
+ )
289
+ )
290
+
291
+ class Translation(BaseModel):
292
+ en: str
293
+ fr: str
294
+ ja: str
295
+
296
+ spark.udf.register(
297
+ "translate_struct",
298
+ responses_udf(
299
+ instructions="Translate the text to English, French, and Japanese.",
300
+ response_format=Translation,
301
+ reasoning={"effort": "medium"}, # Recommended with gpt-5.1
302
+ )
303
+ )
304
+
305
+ spark.udf.register("embed_text", embeddings_udf())
306
+ spark.udf.register("count_tokens", count_tokens_udf())
307
+ spark.udf.register("compute_similarity", similarity_udf())
308
+ ```
309
+
310
+ ### Spark performance tips
311
+
312
+ - Duplicate detection automatically caches repeated inputs per partition for UDFs.
313
+ - `batch_size=None` auto-optimizes; set 32–128 for fixed sizes if needed.
314
+ - `max_concurrency` is per executor; total concurrency = executors × max_concurrency. Start with 4–12.
315
+ - Monitor rate limits and adjust concurrency to your OpenAI tier.
316
+
317
+ ## Building Prompts
318
+
319
+ Few-shot prompts improve LLM quality. `FewShotPromptBuilder` structures purpose, cautions, and examples; `improve()` iterates with OpenAI to remove contradictions.
320
+
321
+ ```python
322
+ from openaivec import FewShotPromptBuilder
323
+
324
+ prompt = (
325
+ FewShotPromptBuilder()
326
+ .purpose("Return the smallest category that includes the given word")
327
+ .caution("Never use proper nouns as categories")
328
+ .example("Apple", "Fruit")
329
+ .example("Car", "Vehicle")
330
+ .improve(max_iter=1) # optional
331
+ .build()
332
+ )
333
+ ```
334
+
335
+ 📓 **[Advanced prompting techniques →](https://microsoft.github.io/openaivec/examples/prompt/)**
336
+
337
+ ## Using with Microsoft Fabric
338
+
339
+ [Microsoft Fabric](https://www.microsoft.com/en-us/microsoft-fabric/) is a unified, cloud-based analytics platform. Add `openaivec` from PyPI in your Fabric environment, select it in your notebook, and use `openaivec.spark` like standard Spark. Detailed walkthrough: 📓 **[Fabric guide →](https://microsoft.github.io/openaivec/examples/fabric/)**.
340
+
341
+ ## Contributing
342
+
343
+ We welcome contributions! Please:
344
+
345
+ 1. Fork and branch from `main`.
346
+ 2. Add or update tests when you change code.
347
+ 3. Run formatting and tests before opening a PR.
348
+
349
+ Install dev deps:
350
+
351
+ ```bash
352
+ uv sync --all-extras --dev
353
+ ```
354
+
355
+ Lint and format:
356
+
357
+ ```bash
358
+ uv run ruff check . --fix
359
+ ```
360
+
361
+ Quick test pass:
362
+
363
+ ```bash
364
+ uv run pytest -m "not slow and not requires_api"
365
+ ```
366
+
367
+ ## Additional Resources
368
+
369
+ 📓 **[Customer feedback analysis →](https://microsoft.github.io/openaivec/examples/customer_analysis/)** - Sentiment analysis & prioritization
370
+ 📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
371
+ 📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
372
+ 📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
373
+ 📓 **[All examples →](https://microsoft.github.io/openaivec/examples/pandas/)** - Complete collection of tutorials and use cases
374
+
375
+ ## Community
376
+
377
+ Join our Discord community for support and announcements: https://discord.gg/vbb83Pgn