openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/__init__.py +13 -4
- openaivec/_cache/__init__.py +12 -0
- openaivec/_cache/optimize.py +109 -0
- openaivec/_cache/proxy.py +806 -0
- openaivec/_di.py +326 -0
- openaivec/_embeddings.py +203 -0
- openaivec/{log.py → _log.py} +2 -2
- openaivec/_model.py +113 -0
- openaivec/{prompt.py → _prompt.py} +95 -28
- openaivec/_provider.py +207 -0
- openaivec/_responses.py +511 -0
- openaivec/_schema/__init__.py +9 -0
- openaivec/_schema/infer.py +340 -0
- openaivec/_schema/spec.py +350 -0
- openaivec/_serialize.py +234 -0
- openaivec/{util.py → _util.py} +25 -85
- openaivec/pandas_ext.py +1635 -425
- openaivec/spark.py +604 -335
- openaivec/task/__init__.py +27 -29
- openaivec/task/customer_support/__init__.py +9 -15
- openaivec/task/customer_support/customer_sentiment.py +51 -41
- openaivec/task/customer_support/inquiry_classification.py +86 -61
- openaivec/task/customer_support/inquiry_summary.py +44 -45
- openaivec/task/customer_support/intent_analysis.py +56 -41
- openaivec/task/customer_support/response_suggestion.py +49 -43
- openaivec/task/customer_support/urgency_analysis.py +76 -71
- openaivec/task/nlp/__init__.py +4 -4
- openaivec/task/nlp/dependency_parsing.py +19 -20
- openaivec/task/nlp/keyword_extraction.py +22 -24
- openaivec/task/nlp/morphological_analysis.py +25 -25
- openaivec/task/nlp/named_entity_recognition.py +26 -28
- openaivec/task/nlp/sentiment_analysis.py +29 -21
- openaivec/task/nlp/translation.py +24 -30
- openaivec/task/table/__init__.py +3 -0
- openaivec/task/table/fillna.py +183 -0
- openaivec-1.0.10.dist-info/METADATA +399 -0
- openaivec-1.0.10.dist-info/RECORD +39 -0
- {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
- openaivec/embeddings.py +0 -172
- openaivec/responses.py +0 -392
- openaivec/serialize.py +0 -225
- openaivec/task/model.py +0 -84
- openaivec-0.10.0.dist-info/METADATA +0 -546
- openaivec-0.10.0.dist-info/RECORD +0 -29
- {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,546 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: openaivec
|
|
3
|
-
Version: 0.10.0
|
|
4
|
-
Summary: Generative mutation for tabular calculation
|
|
5
|
-
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
|
-
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
7
|
-
Author-email: Hiroki Mizukami <hmizukami@microsoft.com>
|
|
8
|
-
License: MIT
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Keywords: llm,openai,openai-api,openai-python,pandas,pyspark
|
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
|
12
|
-
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Requires-Python: >=3.10
|
|
18
|
-
Requires-Dist: openai>=1.74.0
|
|
19
|
-
Requires-Dist: pandas>=2.2.3
|
|
20
|
-
Requires-Dist: tiktoken>=0.9.0
|
|
21
|
-
Provides-Extra: spark
|
|
22
|
-
Requires-Dist: pyspark>=3.5.5; extra == 'spark'
|
|
23
|
-
Description-Content-Type: text/markdown
|
|
24
|
-
|
|
25
|
-
# openaivec
|
|
26
|
-
|
|
27
|
-
**Transform your data analysis with AI-powered text processing at scale.**
|
|
28
|
-
|
|
29
|
-
**openaivec** enables data analysts to seamlessly integrate OpenAI's language models into their pandas and Spark workflows. Process thousands of text records with natural language instructions, turning unstructured data into actionable insights with just a few lines of code.
|
|
30
|
-
|
|
31
|
-
## 🚀 Quick Start: From Text to Insights in Seconds
|
|
32
|
-
|
|
33
|
-
Imagine analyzing 10,000 customer reviews. Instead of manual work, just write:
|
|
34
|
-
|
|
35
|
-
```python
|
|
36
|
-
import pandas as pd
|
|
37
|
-
from openaivec import pandas_ext
|
|
38
|
-
|
|
39
|
-
# Your data
|
|
40
|
-
reviews = pd.DataFrame({
|
|
41
|
-
"review": ["Great product, fast delivery!", "Terrible quality, very disappointed", ...]
|
|
42
|
-
})
|
|
43
|
-
|
|
44
|
-
# AI-powered analysis in one line
|
|
45
|
-
results = reviews.assign(
|
|
46
|
-
sentiment=lambda df: df.review.ai.responses("Classify sentiment: positive/negative/neutral"),
|
|
47
|
-
issues=lambda df: df.review.ai.responses("Extract main issues or compliments"),
|
|
48
|
-
priority=lambda df: df.review.ai.responses("Priority for follow-up: low/medium/high")
|
|
49
|
-
)
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
**Result**: Thousands of reviews classified and analyzed in minutes, not days.
|
|
53
|
-
|
|
54
|
-
📓 **[Try it yourself →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
55
|
-
|
|
56
|
-
## 💡 Real-World Impact
|
|
57
|
-
|
|
58
|
-
### Customer Feedback Analysis
|
|
59
|
-
```python
|
|
60
|
-
# Process 50,000 support tickets automatically
|
|
61
|
-
tickets.assign(
|
|
62
|
-
category=lambda df: df.description.ai.responses("Categorize: billing/technical/feature_request"),
|
|
63
|
-
urgency=lambda df: df.description.ai.responses("Urgency level: low/medium/high/critical"),
|
|
64
|
-
solution_type=lambda df: df.description.ai.responses("Best resolution approach")
|
|
65
|
-
)
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
### Market Research at Scale
|
|
69
|
-
```python
|
|
70
|
-
# Analyze multilingual social media data
|
|
71
|
-
social_data.assign(
|
|
72
|
-
english_text=lambda df: df.post.ai.responses("Translate to English"),
|
|
73
|
-
brand_mention=lambda df: df.english_text.ai.responses("Extract brand mentions and sentiment"),
|
|
74
|
-
market_trend=lambda df: df.english_text.ai.responses("Identify emerging trends or concerns")
|
|
75
|
-
)
|
|
76
|
-
```
|
|
77
|
-
|
|
78
|
-
### Survey Data Transformation
|
|
79
|
-
```python
|
|
80
|
-
# Convert free-text responses to structured data
|
|
81
|
-
from pydantic import BaseModel
|
|
82
|
-
|
|
83
|
-
class Demographics(BaseModel):
|
|
84
|
-
age_group: str
|
|
85
|
-
location: str
|
|
86
|
-
interests: list[str]
|
|
87
|
-
|
|
88
|
-
survey_responses.assign(
|
|
89
|
-
structured=lambda df: df.response.ai.responses(
|
|
90
|
-
"Extract demographics as structured data",
|
|
91
|
-
response_format=Demographics
|
|
92
|
-
)
|
|
93
|
-
).ai.extract("structured") # Auto-expands to columns
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
📓 **[See more examples →](https://microsoft.github.io/openaivec/examples/)**
|
|
97
|
-
|
|
98
|
-
# Overview
|
|
99
|
-
|
|
100
|
-
This package provides a vectorized interface for the OpenAI API, enabling you to process multiple inputs with a single
|
|
101
|
-
API call instead of sending requests one by one.
|
|
102
|
-
This approach helps reduce latency and simplifies your code.
|
|
103
|
-
|
|
104
|
-
Additionally, it integrates effortlessly with Pandas DataFrames and Apache Spark UDFs, making it easy to incorporate
|
|
105
|
-
into your data processing pipelines.
|
|
106
|
-
|
|
107
|
-
## Features
|
|
108
|
-
|
|
109
|
-
- Vectorized API requests for processing multiple inputs at once.
|
|
110
|
-
- Seamless integration with Pandas DataFrames.
|
|
111
|
-
- A UDF builder for Apache Spark.
|
|
112
|
-
- Compatibility with multiple OpenAI clients, including Azure OpenAI.
|
|
113
|
-
|
|
114
|
-
## Key Benefits
|
|
115
|
-
|
|
116
|
-
- **🚀 Performance**: Vectorized processing handles thousands of records in minutes, not hours
|
|
117
|
-
- **💰 Cost Efficiency**: Automatic deduplication reduces API costs by 50-90% on typical datasets
|
|
118
|
-
- **🔗 Integration**: Works within existing pandas/Spark workflows without architectural changes
|
|
119
|
-
- **📈 Scalability**: Same API scales from exploratory analysis (100s of records) to production systems (millions of records)
|
|
120
|
-
- **🏢 Enterprise Ready**: Microsoft Fabric integration, Apache Spark UDFs, Azure OpenAI compatibility
|
|
121
|
-
|
|
122
|
-
## Requirements
|
|
123
|
-
|
|
124
|
-
- Python 3.10 or higher
|
|
125
|
-
|
|
126
|
-
## Installation
|
|
127
|
-
|
|
128
|
-
Install the package with:
|
|
129
|
-
|
|
130
|
-
```bash
|
|
131
|
-
pip install openaivec
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
If you want to uninstall the package, you can do so with:
|
|
135
|
-
|
|
136
|
-
```bash
|
|
137
|
-
pip uninstall openaivec
|
|
138
|
-
```
|
|
139
|
-
|
|
140
|
-
## Basic Usage
|
|
141
|
-
|
|
142
|
-
### Direct API Usage
|
|
143
|
-
|
|
144
|
-
For maximum control over batch processing:
|
|
145
|
-
|
|
146
|
-
```python
|
|
147
|
-
import os
|
|
148
|
-
from openai import OpenAI
|
|
149
|
-
from openaivec import BatchResponses
|
|
150
|
-
|
|
151
|
-
# Initialize the batch client
|
|
152
|
-
client = BatchResponses(
|
|
153
|
-
client=OpenAI(),
|
|
154
|
-
model_name="gpt-4o-mini",
|
|
155
|
-
system_message="Please answer only with 'xx family' and do not output anything else."
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
result = client.parse(["panda", "rabbit", "koala"], batch_size=32)
|
|
159
|
-
print(result) # Expected output: ['bear family', 'rabbit family', 'koala family']
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
📓 **[Complete tutorial →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
163
|
-
|
|
164
|
-
### Pandas Integration (Recommended)
|
|
165
|
-
|
|
166
|
-
The easiest way to get started with your DataFrames:
|
|
167
|
-
|
|
168
|
-
```python
|
|
169
|
-
import pandas as pd
|
|
170
|
-
from openaivec import pandas_ext
|
|
171
|
-
|
|
172
|
-
# Setup (optional - uses OPENAI_API_KEY environment variable by default)
|
|
173
|
-
pandas_ext.responses_model("gpt-4o-mini")
|
|
174
|
-
|
|
175
|
-
# Create your data
|
|
176
|
-
df = pd.DataFrame({"name": ["panda", "rabbit", "koala"]})
|
|
177
|
-
|
|
178
|
-
# Add AI-powered columns
|
|
179
|
-
result = df.assign(
|
|
180
|
-
family=lambda df: df.name.ai.responses("What animal family? Answer with 'X family'"),
|
|
181
|
-
habitat=lambda df: df.name.ai.responses("Primary habitat in one word"),
|
|
182
|
-
fun_fact=lambda df: df.name.ai.responses("One interesting fact in 10 words or less")
|
|
183
|
-
)
|
|
184
|
-
```
|
|
185
|
-
|
|
186
|
-
| name | family | habitat | fun_fact |
|
|
187
|
-
|--------|---------------|---------|-----------------------------|
|
|
188
|
-
| panda | bear family | forest | Eats bamboo 14 hours daily |
|
|
189
|
-
| rabbit | rabbit family | meadow | Can see nearly 360 degrees |
|
|
190
|
-
| koala | marsupial family | tree | Sleeps 22 hours per day |
|
|
191
|
-
|
|
192
|
-
📓 **[Interactive pandas examples →](https://microsoft.github.io/openaivec/examples/pandas/)**
|
|
193
|
-
|
|
194
|
-
## Using with Apache Spark UDFs
|
|
195
|
-
|
|
196
|
-
Scale to enterprise datasets with distributed processing:
|
|
197
|
-
|
|
198
|
-
📓 **[Complete Spark tutorial →](https://microsoft.github.io/openaivec/examples/spark/)**
|
|
199
|
-
|
|
200
|
-
First, obtain a Spark session:
|
|
201
|
-
|
|
202
|
-
```python
|
|
203
|
-
from pyspark.sql import SparkSession
|
|
204
|
-
|
|
205
|
-
spark = SparkSession.builder.getOrCreate()
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
Next, instantiate UDF builders using either OpenAI or Azure OpenAI credentials and register the UDFs.
|
|
209
|
-
|
|
210
|
-
```python
|
|
211
|
-
import os
|
|
212
|
-
from openaivec.spark import ResponsesUDFBuilder, EmbeddingsUDFBuilder, count_tokens_udf
|
|
213
|
-
from pydantic import BaseModel
|
|
214
|
-
|
|
215
|
-
# --- Option 1: Using OpenAI ---
|
|
216
|
-
resp_builder_openai = ResponsesUDFBuilder.of_openai(
|
|
217
|
-
api_key=os.getenv("OPENAI_API_KEY"),
|
|
218
|
-
model_name="gpt-4o-mini", # Model for responses
|
|
219
|
-
)
|
|
220
|
-
emb_builder_openai = EmbeddingsUDFBuilder.of_openai(
|
|
221
|
-
api_key=os.getenv("OPENAI_API_KEY"),
|
|
222
|
-
model_name="text-embedding-3-small", # Model for embeddings
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
# --- Option 2: Using Azure OpenAI ---
|
|
226
|
-
# resp_builder_azure = ResponsesUDFBuilder.of_azure_openai(
|
|
227
|
-
# api_key=os.getenv("AZURE_OPENAI_KEY"),
|
|
228
|
-
# endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
|
229
|
-
# api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
|
|
230
|
-
# model_name="<your-resp-deployment-name>", # Deployment for responses
|
|
231
|
-
# )
|
|
232
|
-
# emb_builder_azure = EmbeddingsUDFBuilder.of_azure_openai(
|
|
233
|
-
# api_key=os.getenv("AZURE_OPENAI_KEY"),
|
|
234
|
-
# endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
|
235
|
-
# api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
|
|
236
|
-
# model_name="<your-emb-deployment-name>", # Deployment for embeddings
|
|
237
|
-
# )
|
|
238
|
-
|
|
239
|
-
# --- Register Responses UDF (String Output) ---
|
|
240
|
-
# Use the builder corresponding to your setup (OpenAI or Azure)
|
|
241
|
-
spark.udf.register(
|
|
242
|
-
"parse_flavor",
|
|
243
|
-
resp_builder_openai.build( # or resp_builder_azure.build(...)
|
|
244
|
-
instructions="Extract flavor-related information. Return only the concise flavor name.",
|
|
245
|
-
response_format=str, # Specify string output
|
|
246
|
-
)
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
# --- Register Responses UDF (Structured Output with Pydantic) ---
|
|
250
|
-
class Translation(BaseModel):
|
|
251
|
-
en: str
|
|
252
|
-
fr: str
|
|
253
|
-
ja: str
|
|
254
|
-
|
|
255
|
-
spark.udf.register(
|
|
256
|
-
"translate_struct",
|
|
257
|
-
resp_builder_openai.build( # or resp_builder_azure.build(...)
|
|
258
|
-
instructions="Translate the text to English, French, and Japanese.",
|
|
259
|
-
response_format=Translation, # Specify Pydantic model for structured output
|
|
260
|
-
)
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
# --- Register Embeddings UDF ---
|
|
264
|
-
spark.udf.register(
|
|
265
|
-
"embed_text",
|
|
266
|
-
emb_builder_openai.build() # or emb_builder_azure.build()
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
# --- Register Token Counting UDF ---
|
|
270
|
-
spark.udf.register("count_tokens", count_tokens_udf("gpt-4o"))
|
|
271
|
-
|
|
272
|
-
```
|
|
273
|
-
|
|
274
|
-
You can now use these UDFs in Spark SQL:
|
|
275
|
-
|
|
276
|
-
```sql
|
|
277
|
-
-- Create a sample table (replace with your actual table)
|
|
278
|
-
CREATE OR REPLACE TEMP VIEW product_names AS SELECT * FROM VALUES
|
|
279
|
-
('4414732714624', 'Cafe Mocha Smoothie (Trial Size)'),
|
|
280
|
-
('4200162318339', 'Dark Chocolate Tea (New Product)'),
|
|
281
|
-
('4920122084098', 'Uji Matcha Tea (New Product)')
|
|
282
|
-
AS product_names(id, product_name);
|
|
283
|
-
|
|
284
|
-
-- Use the registered UDFs
|
|
285
|
-
SELECT
|
|
286
|
-
id,
|
|
287
|
-
product_name,
|
|
288
|
-
parse_flavor(product_name) AS flavor,
|
|
289
|
-
translate_struct(product_name) AS translation,
|
|
290
|
-
embed_text(product_name) AS embedding,
|
|
291
|
-
count_tokens(product_name) AS token_count
|
|
292
|
-
FROM product_names;
|
|
293
|
-
```
|
|
294
|
-
|
|
295
|
-
Example Output (structure might vary slightly):
|
|
296
|
-
|
|
297
|
-
| id | product_name | flavor | translation | embedding | token_count |
|
|
298
|
-
|---------------|-----------------------------------|-----------|----------------------------------|--------------------------------|-------------|
|
|
299
|
-
| 4414732714624 | Cafe Mocha Smoothie (Trial Size) | Mocha | {en: ..., fr: ..., ja: ...} | [0.1, -0.2, ..., 0.5] | 8 |
|
|
300
|
-
| 4200162318339 | Dark Chocolate Tea (New Product) | Chocolate | {en: ..., fr: ..., ja: ...} | [-0.3, 0.1, ..., -0.1] | 7 |
|
|
301
|
-
| 4920122084098 | Uji Matcha Tea (New Product) | Matcha | {en: ..., fr: ..., ja: ...} | [0.0, 0.4, ..., 0.2] | 8 |
|
|
302
|
-
|
|
303
|
-
## Building Prompts
|
|
304
|
-
|
|
305
|
-
Building prompt is a crucial step in using LLMs.
|
|
306
|
-
In particular, providing a few examples in a prompt can significantly improve an LLM’s performance,
|
|
307
|
-
a technique known as "few-shot learning." Typically, a few-shot prompt consists of a purpose, cautions,
|
|
308
|
-
and examples.
|
|
309
|
-
|
|
310
|
-
📓 **[Advanced prompting techniques →](https://microsoft.github.io/openaivec/examples/prompt/)**
|
|
311
|
-
|
|
312
|
-
The `FewShotPromptBuilder` helps you create structured, high-quality prompts with examples, cautions, and automatic improvement.
|
|
313
|
-
|
|
314
|
-
### Basic Usage
|
|
315
|
-
|
|
316
|
-
`FewShotPromptBuilder` requires simply a purpose, cautions, and examples, and `build` method will
|
|
317
|
-
return rendered prompt with XML format.
|
|
318
|
-
|
|
319
|
-
Here is an example:
|
|
320
|
-
|
|
321
|
-
```python
|
|
322
|
-
from openaivec.prompt import FewShotPromptBuilder
|
|
323
|
-
|
|
324
|
-
prompt: str = (
|
|
325
|
-
FewShotPromptBuilder()
|
|
326
|
-
.purpose("Return the smallest category that includes the given word")
|
|
327
|
-
.caution("Never use proper nouns as categories")
|
|
328
|
-
.example("Apple", "Fruit")
|
|
329
|
-
.example("Car", "Vehicle")
|
|
330
|
-
.example("Tokyo", "City")
|
|
331
|
-
.example("Keiichi Sogabe", "Musician")
|
|
332
|
-
.example("America", "Country")
|
|
333
|
-
.build()
|
|
334
|
-
)
|
|
335
|
-
print(prompt)
|
|
336
|
-
```
|
|
337
|
-
|
|
338
|
-
The output will be:
|
|
339
|
-
|
|
340
|
-
```xml
|
|
341
|
-
|
|
342
|
-
<Prompt>
|
|
343
|
-
<Purpose>Return the smallest category that includes the given word</Purpose>
|
|
344
|
-
<Cautions>
|
|
345
|
-
<Caution>Never use proper nouns as categories</Caution>
|
|
346
|
-
</Cautions>
|
|
347
|
-
<Examples>
|
|
348
|
-
<Example>
|
|
349
|
-
<Input>Apple</Input>
|
|
350
|
-
<Output>Fruit</Output>
|
|
351
|
-
</Example>
|
|
352
|
-
<Example>
|
|
353
|
-
<Input>Car</Input>
|
|
354
|
-
<Output>Vehicle</Output>
|
|
355
|
-
</Example>
|
|
356
|
-
<Example>
|
|
357
|
-
<Input>Tokyo</Input>
|
|
358
|
-
<Output>City</Output>
|
|
359
|
-
</Example>
|
|
360
|
-
<Example>
|
|
361
|
-
<Input>Keiichi Sogabe</Input>
|
|
362
|
-
<Output>Musician</Output>
|
|
363
|
-
</Example>
|
|
364
|
-
<Example>
|
|
365
|
-
<Input>America</Input>
|
|
366
|
-
<Output>Country</Output>
|
|
367
|
-
</Example>
|
|
368
|
-
</Examples>
|
|
369
|
-
</Prompt>
|
|
370
|
-
```
|
|
371
|
-
|
|
372
|
-
### Improve with OpenAI
|
|
373
|
-
|
|
374
|
-
For most users, it can be challenging to write a prompt entirely free of contradictions, ambiguities, or
|
|
375
|
-
redundancies.
|
|
376
|
-
`FewShotPromptBuilder` provides an `improve` method to refine your prompt using OpenAI's API.
|
|
377
|
-
|
|
378
|
-
`improve` method will try to eliminate contradictions, ambiguities, and redundancies in the prompt with OpenAI's API,
|
|
379
|
-
and iterate the process up to `max_iter` times.
|
|
380
|
-
|
|
381
|
-
Here is an example:
|
|
382
|
-
|
|
383
|
-
```python
|
|
384
|
-
from openai import OpenAI
|
|
385
|
-
from openaivec.prompt import FewShotPromptBuilder
|
|
386
|
-
|
|
387
|
-
client = OpenAI(...)
|
|
388
|
-
model_name = "<your-model-name>"
|
|
389
|
-
improved_prompt: str = (
|
|
390
|
-
FewShotPromptBuilder()
|
|
391
|
-
.purpose("Return the smallest category that includes the given word")
|
|
392
|
-
.caution("Never use proper nouns as categories")
|
|
393
|
-
# Examples which has contradictions, ambiguities, or redundancies
|
|
394
|
-
.example("Apple", "Fruit")
|
|
395
|
-
.example("Apple", "Technology")
|
|
396
|
-
.example("Apple", "Company")
|
|
397
|
-
.example("Apple", "Color")
|
|
398
|
-
.example("Apple", "Animal")
|
|
399
|
-
# improve the prompt with OpenAI's API
|
|
400
|
-
.improve(client, model_name)
|
|
401
|
-
.build()
|
|
402
|
-
)
|
|
403
|
-
print(improved_prompt)
|
|
404
|
-
```
|
|
405
|
-
|
|
406
|
-
Then we will get the improved prompt with extra examples, improved purpose, and cautions:
|
|
407
|
-
|
|
408
|
-
```xml
|
|
409
|
-
<Prompt>
|
|
410
|
-
<Purpose>Classify a given word into its most relevant category by considering its context and potential meanings.
|
|
411
|
-
The input is a word accompanied by context, and the output is the appropriate category based on that context.
|
|
412
|
-
This is useful for disambiguating words with multiple meanings, ensuring accurate understanding and
|
|
413
|
-
categorization.
|
|
414
|
-
</Purpose>
|
|
415
|
-
<Cautions>
|
|
416
|
-
<Caution>Ensure the context of the word is clear to avoid incorrect categorization.</Caution>
|
|
417
|
-
<Caution>Be aware of words with multiple meanings and provide the most relevant category.</Caution>
|
|
418
|
-
<Caution>Consider the possibility of new or uncommon contexts that may not fit traditional categories.</Caution>
|
|
419
|
-
</Cautions>
|
|
420
|
-
<Examples>
|
|
421
|
-
<Example>
|
|
422
|
-
<Input>Apple (as a fruit)</Input>
|
|
423
|
-
<Output>Fruit</Output>
|
|
424
|
-
</Example>
|
|
425
|
-
<Example>
|
|
426
|
-
<Input>Apple (as a tech company)</Input>
|
|
427
|
-
<Output>Technology</Output>
|
|
428
|
-
</Example>
|
|
429
|
-
<Example>
|
|
430
|
-
<Input>Java (as a programming language)</Input>
|
|
431
|
-
<Output>Technology</Output>
|
|
432
|
-
</Example>
|
|
433
|
-
<Example>
|
|
434
|
-
<Input>Java (as an island)</Input>
|
|
435
|
-
<Output>Geography</Output>
|
|
436
|
-
</Example>
|
|
437
|
-
<Example>
|
|
438
|
-
<Input>Mercury (as a planet)</Input>
|
|
439
|
-
<Output>Astronomy</Output>
|
|
440
|
-
</Example>
|
|
441
|
-
<Example>
|
|
442
|
-
<Input>Mercury (as an element)</Input>
|
|
443
|
-
<Output>Chemistry</Output>
|
|
444
|
-
</Example>
|
|
445
|
-
<Example>
|
|
446
|
-
<Input>Bark (as a sound made by a dog)</Input>
|
|
447
|
-
<Output>Animal Behavior</Output>
|
|
448
|
-
</Example>
|
|
449
|
-
<Example>
|
|
450
|
-
<Input>Bark (as the outer covering of a tree)</Input>
|
|
451
|
-
<Output>Botany</Output>
|
|
452
|
-
</Example>
|
|
453
|
-
<Example>
|
|
454
|
-
<Input>Bass (as a type of fish)</Input>
|
|
455
|
-
<Output>Aquatic Life</Output>
|
|
456
|
-
</Example>
|
|
457
|
-
<Example>
|
|
458
|
-
<Input>Bass (as a low-frequency sound)</Input>
|
|
459
|
-
<Output>Music</Output>
|
|
460
|
-
</Example>
|
|
461
|
-
</Examples>
|
|
462
|
-
</Prompt>
|
|
463
|
-
```
|
|
464
|
-
|
|
465
|
-
## Using with Microsoft Fabric
|
|
466
|
-
|
|
467
|
-
[Microsoft Fabric](https://www.microsoft.com/en-us/microsoft-fabric/) is a unified, cloud-based analytics platform that
|
|
468
|
-
seamlessly integrates data engineering, warehousing, and business intelligence to simplify the journey from raw data to
|
|
469
|
-
actionable insights.
|
|
470
|
-
|
|
471
|
-
This section provides instructions on how to integrate and use `openaivec` within Microsoft Fabric. Follow these
|
|
472
|
-
steps:
|
|
473
|
-
|
|
474
|
-
1. **Create an Environment in Microsoft Fabric:**
|
|
475
|
-
|
|
476
|
-
- In Microsoft Fabric, click on **New item** in your workspace.
|
|
477
|
-
- Select **Environment** to create a new environment for Apache Spark.
|
|
478
|
-
- Determine the environment name, eg. `openai-environment`.
|
|
479
|
-
- 
|
|
480
|
-
_Figure: Creating a new Environment in Microsoft Fabric._
|
|
481
|
-
|
|
482
|
-
2. **Add `openaivec` to the Environment from Public Library**
|
|
483
|
-
|
|
484
|
-
- Once your environment is set up, go to the **Custom Library** section within that environment.
|
|
485
|
-
- Click on **Add from PyPI** and search for latest version of `openaivec`.
|
|
486
|
-
- Save and publish to reflect the changes.
|
|
487
|
-
- 
|
|
488
|
-
_Figure: Add `openaivec` from PyPI to Public Library_
|
|
489
|
-
|
|
490
|
-
3. **Use the Environment from a Notebook:**
|
|
491
|
-
|
|
492
|
-
- Open a notebook within Microsoft Fabric.
|
|
493
|
-
- Select the environment you created in the previous steps.
|
|
494
|
-
- 
|
|
495
|
-
_Figure: Using custom environment from a notebook._
|
|
496
|
-
- In the notebook, import and use `openaivec.spark.ResponsesUDFBuilder` as you normally would. For example:
|
|
497
|
-
|
|
498
|
-
```python
|
|
499
|
-
from openaivec.spark import ResponsesUDFBuilder
|
|
500
|
-
|
|
501
|
-
resp_builder = ResponsesUDFBuilder.of_azure_openai(
|
|
502
|
-
api_key="<your-api-key>",
|
|
503
|
-
endpoint="https://<your-resource-name>.openai.azure.com",
|
|
504
|
-
api_version="2024-10-21",
|
|
505
|
-
model_name="<your-deployment-name>"
|
|
506
|
-
)
|
|
507
|
-
```
|
|
508
|
-
|
|
509
|
-
Following these steps allows you to successfully integrate and use `openaivec` within Microsoft Fabric.
|
|
510
|
-
|
|
511
|
-
## Contributing
|
|
512
|
-
|
|
513
|
-
We welcome contributions to this project! If you would like to contribute, please follow these guidelines:
|
|
514
|
-
|
|
515
|
-
1. Fork the repository and create your branch from `main`.
|
|
516
|
-
2. If you've added code that should be tested, add tests.
|
|
517
|
-
3. Ensure the test suite passes.
|
|
518
|
-
4. Make sure your code lints.
|
|
519
|
-
|
|
520
|
-
### Installing Dependencies
|
|
521
|
-
|
|
522
|
-
To install the necessary dependencies for development, run:
|
|
523
|
-
|
|
524
|
-
```bash
|
|
525
|
-
uv sync --all-extras --dev
|
|
526
|
-
```
|
|
527
|
-
|
|
528
|
-
### Code Formatting
|
|
529
|
-
|
|
530
|
-
To reformat the code, use the following command:
|
|
531
|
-
|
|
532
|
-
```bash
|
|
533
|
-
uv run ruff check . --fix
|
|
534
|
-
```
|
|
535
|
-
|
|
536
|
-
## Additional Resources
|
|
537
|
-
|
|
538
|
-
📓 **[Customer feedback analysis →](https://microsoft.github.io/openaivec/examples/customer_analysis/)** - Sentiment analysis & prioritization
|
|
539
|
-
📓 **[Survey data transformation →](https://microsoft.github.io/openaivec/examples/survey_transformation/)** - Unstructured to structured data
|
|
540
|
-
📓 **[Asynchronous processing examples →](https://microsoft.github.io/openaivec/examples/aio/)** - High-performance async workflows
|
|
541
|
-
📓 **[Auto-generate FAQs from documents →](https://microsoft.github.io/openaivec/examples/generate_faq/)** - Create FAQs using AI
|
|
542
|
-
📓 **[All examples →](https://microsoft.github.io/openaivec/examples/)** - Complete collection of tutorials and use cases
|
|
543
|
-
|
|
544
|
-
## Community
|
|
545
|
-
|
|
546
|
-
Join our Discord community for developers: https://discord.gg/vbb83Pgn
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
openaivec/__init__.py,sha256=CuUAtLtX5RhFUbgF94UmXjurgL2VaerHTFPjMl0rRlE,236
|
|
2
|
-
openaivec/embeddings.py,sha256=ZmGUuTMrZ_lNHxROpH_CcQIc54wzPqzzoqs_B4hQezg,6671
|
|
3
|
-
openaivec/log.py,sha256=GofgzUpv_xDVuGC-gYmit5Oyu06it1SBXRck6COR5go,1439
|
|
4
|
-
openaivec/pandas_ext.py,sha256=CUqbSX41YOVKhenfjmZNSO6QIUc0KsexpshIUTZm2GU,33834
|
|
5
|
-
openaivec/prompt.py,sha256=gY9aQmCig4--Fd9XZzwrqwXqJJlzRQYfmtYu8RwKYbw,17714
|
|
6
|
-
openaivec/responses.py,sha256=FDX2-5nsNGuqO75BIIQRe-xv23XXTRGb_gA-EZ6fdh8,14571
|
|
7
|
-
openaivec/serialize.py,sha256=I83VFpm2u0jJyZPlguKlcBpN3wvLhdQyHJDsRCviF4o,7328
|
|
8
|
-
openaivec/spark.py,sha256=_-bjjcYJm5J5TSIRZOA8UbRMOZIjE-kJazQJhn-M3iM,20996
|
|
9
|
-
openaivec/util.py,sha256=YDOkaavqV4mTlBxLVbfqy_AimM8Bmqa8MSMpk5tW6gY,9970
|
|
10
|
-
openaivec/task/__init__.py,sha256=8jz6Uwz9ZXIEsPXYfyVVHvHzyqmxDsolovulNY4xx14,6223
|
|
11
|
-
openaivec/task/model.py,sha256=6Q_W0FSSzXOiJo-asYdTLWIG1e421UHe0NQ8BnTgTLM,2716
|
|
12
|
-
openaivec/task/customer_support/__init__.py,sha256=K9gMgTHW8L8svx6TWDgfKRdwBIvj0e968ZMpjqOWSQ4,1068
|
|
13
|
-
openaivec/task/customer_support/customer_sentiment.py,sha256=vZFRc8SMh6H7IXQC_QfO2Emh0B6DMuIYKMLITOPzUE0,7555
|
|
14
|
-
openaivec/task/customer_support/inquiry_classification.py,sha256=Xlyipj5UxXvLb6oSJrmYaVMLVauTBYsPtPsxoVYG2CU,9222
|
|
15
|
-
openaivec/task/customer_support/inquiry_summary.py,sha256=fuxox2u2FoWfcb2dv8DFC0uIvCO6a1XaxQ97Xr4BWZ4,6970
|
|
16
|
-
openaivec/task/customer_support/intent_analysis.py,sha256=ycsS3BI--ySwiQVnMcX6_tPTeP6CnibdaEstNf1cahU,7404
|
|
17
|
-
openaivec/task/customer_support/response_suggestion.py,sha256=Y743zN6rTspY5KVXlI8AYPRdfmLiU8oPDBwzSWQRFEg,8319
|
|
18
|
-
openaivec/task/customer_support/urgency_analysis.py,sha256=7EKSxIqgNeqX6yq4Wziuo9vUfHuUWTGDcvTocTnh-BI,11478
|
|
19
|
-
openaivec/task/nlp/__init__.py,sha256=sKUekRVlmgrTPjpK5hKpJF_eQCeECd_tPTm3zBU5D3E,511
|
|
20
|
-
openaivec/task/nlp/dependency_parsing.py,sha256=oD9SKn2wMNJbsWyJRYvisOMzWByi_fhAs75vUoTWskc,2844
|
|
21
|
-
openaivec/task/nlp/keyword_extraction.py,sha256=7gi9aBbD3pFVzH8mn0t799YWibX4D_ReI-GKZMyWDLY,2853
|
|
22
|
-
openaivec/task/nlp/morphological_analysis.py,sha256=88TRLjsEMMNLGfcI3GkDYRoj1QgcxCun1odF9tZEhCQ,2435
|
|
23
|
-
openaivec/task/nlp/named_entity_recognition.py,sha256=OeZh0DCZYZ1WxHsmsbClMfXbUtddVyUBNDyioz5moN0,3097
|
|
24
|
-
openaivec/task/nlp/sentiment_analysis.py,sha256=VG5C3x9S8eKvKXU9gGJXGq27SBNVgApuk8gBgV4AQn4,3044
|
|
25
|
-
openaivec/task/nlp/translation.py,sha256=K4BMNF9Bjra8JDYDc9y5FpP8-v_ccH3ulhB27iqSehg,6696
|
|
26
|
-
openaivec-0.10.0.dist-info/METADATA,sha256=3B-D-ogt6LjFQL4REA_qJDTcCaqcOtc9-KQloKdlIHo,19226
|
|
27
|
-
openaivec-0.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
28
|
-
openaivec-0.10.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
29
|
-
openaivec-0.10.0.dist-info/RECORD,,
|
|
File without changes
|