openaivec 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/__init__.py +2 -2
- openaivec/di.py +3 -3
- openaivec/embeddings.py +9 -8
- openaivec/model.py +37 -1
- openaivec/pandas_ext.py +158 -37
- openaivec/prompt.py +34 -13
- openaivec/provider.py +70 -18
- openaivec/proxy.py +166 -28
- openaivec/responses.py +83 -34
- openaivec/serialize.py +1 -1
- openaivec/spark.py +23 -22
- openaivec/task/customer_support/__init__.py +6 -12
- openaivec/task/customer_support/customer_sentiment.py +12 -4
- openaivec/task/customer_support/inquiry_classification.py +11 -4
- openaivec/task/customer_support/inquiry_summary.py +8 -3
- openaivec/task/customer_support/intent_analysis.py +10 -4
- openaivec/task/customer_support/response_suggestion.py +10 -4
- openaivec/task/customer_support/urgency_analysis.py +8 -3
- openaivec/task/nlp/__init__.py +3 -3
- openaivec/task/nlp/dependency_parsing.py +4 -2
- openaivec/task/nlp/keyword_extraction.py +3 -2
- openaivec/task/nlp/morphological_analysis.py +4 -2
- openaivec/task/nlp/named_entity_recognition.py +4 -2
- openaivec/task/nlp/sentiment_analysis.py +7 -2
- openaivec/task/nlp/translation.py +1 -1
- openaivec/task/table/__init__.py +1 -1
- openaivec/task/table/fillna.py +4 -3
- openaivec/util.py +0 -1
- {openaivec-0.13.1.dist-info → openaivec-0.13.3.dist-info}/METADATA +42 -8
- openaivec-0.13.3.dist-info/RECORD +34 -0
- openaivec-0.13.1.dist-info/RECORD +0 -34
- {openaivec-0.13.1.dist-info → openaivec-0.13.3.dist-info}/WHEEL +0 -0
- {openaivec-0.13.1.dist-info → openaivec-0.13.3.dist-info}/licenses/LICENSE +0 -0
openaivec/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from .embeddings import
|
|
2
|
-
from .responses import
|
|
1
|
+
from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
2
|
+
from .responses import AsyncBatchResponses, BatchResponses
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
5
|
"BatchResponses",
|
openaivec/di.py
CHANGED
|
@@ -11,14 +11,14 @@ are created once and reused across multiple resolve calls.
|
|
|
11
11
|
Example:
|
|
12
12
|
```python
|
|
13
13
|
from openaivec.di import Container
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
class DatabaseService:
|
|
16
16
|
def __init__(self):
|
|
17
17
|
self.connection = "database://localhost"
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
container = Container()
|
|
20
20
|
container.register(DatabaseService, lambda: DatabaseService())
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
db1 = container.resolve(DatabaseService)
|
|
23
23
|
db2 = container.resolve(DatabaseService)
|
|
24
24
|
print(db1 is db2) # True - same instance
|
openaivec/embeddings.py
CHANGED
|
@@ -6,9 +6,9 @@ import numpy as np
|
|
|
6
6
|
from numpy.typing import NDArray
|
|
7
7
|
from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
|
|
8
8
|
|
|
9
|
-
from .log import observe
|
|
10
|
-
from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
11
|
-
from .util import backoff, backoff_async
|
|
9
|
+
from openaivec.log import observe
|
|
10
|
+
from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
11
|
+
from openaivec.util import backoff, backoff_async
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
14
14
|
"BatchEmbeddings",
|
|
@@ -24,7 +24,8 @@ class BatchEmbeddings:
|
|
|
24
24
|
|
|
25
25
|
Attributes:
|
|
26
26
|
client (OpenAI): Configured OpenAI client.
|
|
27
|
-
model_name (str):
|
|
27
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
|
|
28
|
+
(e.g., ``"text-embedding-3-small"``).
|
|
28
29
|
cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
|
|
29
30
|
"""
|
|
30
31
|
|
|
@@ -38,7 +39,7 @@ class BatchEmbeddings:
|
|
|
38
39
|
|
|
39
40
|
Args:
|
|
40
41
|
client (OpenAI): OpenAI client.
|
|
41
|
-
model_name (str):
|
|
42
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
42
43
|
batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
|
|
43
44
|
|
|
44
45
|
Returns:
|
|
@@ -90,7 +91,7 @@ class AsyncBatchEmbeddings:
|
|
|
90
91
|
import asyncio
|
|
91
92
|
import numpy as np
|
|
92
93
|
from openai import AsyncOpenAI
|
|
93
|
-
|
|
94
|
+
from openaivec import AsyncBatchEmbeddings
|
|
94
95
|
|
|
95
96
|
# Assuming openai_async_client is an initialized AsyncOpenAI client
|
|
96
97
|
openai_async_client = AsyncOpenAI() # Replace with your actual client initialization
|
|
@@ -119,7 +120,7 @@ class AsyncBatchEmbeddings:
|
|
|
119
120
|
|
|
120
121
|
Attributes:
|
|
121
122
|
client (AsyncOpenAI): Configured OpenAI async client.
|
|
122
|
-
model_name (str):
|
|
123
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
123
124
|
cache (AsyncBatchingMapProxy[str, NDArray[np.float32]]): Async batching proxy.
|
|
124
125
|
"""
|
|
125
126
|
|
|
@@ -141,7 +142,7 @@ class AsyncBatchEmbeddings:
|
|
|
141
142
|
|
|
142
143
|
Args:
|
|
143
144
|
client (AsyncOpenAI): OpenAI async client.
|
|
144
|
-
model_name (str):
|
|
145
|
+
model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name.
|
|
145
146
|
batch_size (int, optional): Max unique inputs per API call. Defaults to 128.
|
|
146
147
|
max_concurrency (int, optional): Max concurrent API calls. Defaults to 8.
|
|
147
148
|
|
openaivec/model.py
CHANGED
|
@@ -59,29 +59,65 @@ class PreparedTask:
|
|
|
59
59
|
|
|
60
60
|
@dataclass(frozen=True)
|
|
61
61
|
class ResponsesModelName:
|
|
62
|
+
"""Container for responses model name configuration.
|
|
63
|
+
|
|
64
|
+
Attributes:
|
|
65
|
+
value (str): The model name for OpenAI responses API.
|
|
66
|
+
"""
|
|
67
|
+
|
|
62
68
|
value: str
|
|
63
69
|
|
|
64
70
|
|
|
65
71
|
@dataclass(frozen=True)
|
|
66
72
|
class EmbeddingsModelName:
|
|
73
|
+
"""Container for embeddings model name configuration.
|
|
74
|
+
|
|
75
|
+
Attributes:
|
|
76
|
+
value (str): The model name for OpenAI embeddings API.
|
|
77
|
+
"""
|
|
78
|
+
|
|
67
79
|
value: str
|
|
68
80
|
|
|
69
81
|
|
|
70
82
|
@dataclass(frozen=True)
|
|
71
83
|
class OpenAIAPIKey:
|
|
84
|
+
"""Container for OpenAI API key configuration.
|
|
85
|
+
|
|
86
|
+
Attributes:
|
|
87
|
+
value (str): The API key for OpenAI services.
|
|
88
|
+
"""
|
|
89
|
+
|
|
72
90
|
value: str
|
|
73
91
|
|
|
74
92
|
|
|
75
93
|
@dataclass(frozen=True)
|
|
76
94
|
class AzureOpenAIAPIKey:
|
|
95
|
+
"""Container for Azure OpenAI API key configuration.
|
|
96
|
+
|
|
97
|
+
Attributes:
|
|
98
|
+
value (str): The API key for Azure OpenAI services.
|
|
99
|
+
"""
|
|
100
|
+
|
|
77
101
|
value: str
|
|
78
102
|
|
|
79
103
|
|
|
80
104
|
@dataclass(frozen=True)
|
|
81
|
-
class
|
|
105
|
+
class AzureOpenAIBaseURL:
|
|
106
|
+
"""Container for Azure OpenAI base URL configuration.
|
|
107
|
+
|
|
108
|
+
Attributes:
|
|
109
|
+
value (str): The base URL for Azure OpenAI services.
|
|
110
|
+
"""
|
|
111
|
+
|
|
82
112
|
value: str
|
|
83
113
|
|
|
84
114
|
|
|
85
115
|
@dataclass(frozen=True)
|
|
86
116
|
class AzureOpenAIAPIVersion:
|
|
117
|
+
"""Container for Azure OpenAI API version configuration.
|
|
118
|
+
|
|
119
|
+
Attributes:
|
|
120
|
+
value (str): The API version for Azure OpenAI services.
|
|
121
|
+
"""
|
|
122
|
+
|
|
87
123
|
value: str
|
openaivec/pandas_ext.py
CHANGED
|
@@ -7,7 +7,7 @@ from openaivec import pandas_ext
|
|
|
7
7
|
|
|
8
8
|
# Option 1: Use environment variables (automatic detection)
|
|
9
9
|
# Set OPENAI_API_KEY or Azure OpenAI environment variables
|
|
10
|
-
# (AZURE_OPENAI_API_KEY,
|
|
10
|
+
# (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
|
|
11
11
|
# No explicit setup needed - clients are automatically created
|
|
12
12
|
|
|
13
13
|
# Option 2: Use an existing OpenAI client instance
|
|
@@ -17,14 +17,18 @@ pandas_ext.use(client)
|
|
|
17
17
|
# Option 3: Use an existing Azure OpenAI client instance
|
|
18
18
|
azure_client = AzureOpenAI(
|
|
19
19
|
api_key="your-azure-key",
|
|
20
|
-
|
|
21
|
-
api_version="
|
|
20
|
+
base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
|
|
21
|
+
api_version="preview"
|
|
22
22
|
)
|
|
23
23
|
pandas_ext.use(azure_client)
|
|
24
24
|
|
|
25
|
-
# Option 4: Use async
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
# Option 4: Use async Azure OpenAI client instance
|
|
26
|
+
async_azure_client = AsyncAzureOpenAI(
|
|
27
|
+
api_key="your-azure-key",
|
|
28
|
+
base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
|
|
29
|
+
api_version="preview"
|
|
30
|
+
)
|
|
31
|
+
pandas_ext.use_async(async_azure_client)
|
|
28
32
|
|
|
29
33
|
# Set up model names (optional, defaults shown)
|
|
30
34
|
pandas_ext.responses_model("gpt-4.1-mini")
|
|
@@ -46,12 +50,12 @@ import tiktoken
|
|
|
46
50
|
from openai import AsyncOpenAI, OpenAI
|
|
47
51
|
from pydantic import BaseModel
|
|
48
52
|
|
|
49
|
-
from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
50
|
-
from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
|
|
51
|
-
from .provider import CONTAINER
|
|
52
|
-
from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
53
|
-
from .responses import AsyncBatchResponses, BatchResponses
|
|
54
|
-
from .task.table import FillNaResponse, fillna
|
|
53
|
+
from openaivec.embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
54
|
+
from openaivec.model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
|
|
55
|
+
from openaivec.provider import CONTAINER, _check_azure_v1_api_url
|
|
56
|
+
from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
57
|
+
from openaivec.responses import AsyncBatchResponses, BatchResponses
|
|
58
|
+
from openaivec.task.table import FillNaResponse, fillna
|
|
55
59
|
|
|
56
60
|
__all__ = [
|
|
57
61
|
"use",
|
|
@@ -74,6 +78,10 @@ def use(client: OpenAI) -> None:
|
|
|
74
78
|
`openai.AzureOpenAI` instance.
|
|
75
79
|
The same instance is reused by every helper in this module.
|
|
76
80
|
"""
|
|
81
|
+
# Check Azure v1 API URL if using AzureOpenAI client
|
|
82
|
+
if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
|
|
83
|
+
_check_azure_v1_api_url(str(client.base_url))
|
|
84
|
+
|
|
77
85
|
CONTAINER.register(OpenAI, lambda: client)
|
|
78
86
|
|
|
79
87
|
|
|
@@ -85,6 +93,10 @@ def use_async(client: AsyncOpenAI) -> None:
|
|
|
85
93
|
`openai.AsyncAzureOpenAI` instance.
|
|
86
94
|
The same instance is reused by every helper in this module.
|
|
87
95
|
"""
|
|
96
|
+
# Check Azure v1 API URL if using AsyncAzureOpenAI client
|
|
97
|
+
if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
|
|
98
|
+
_check_azure_v1_api_url(str(client.base_url))
|
|
99
|
+
|
|
88
100
|
CONTAINER.register(AsyncOpenAI, lambda: client)
|
|
89
101
|
|
|
90
102
|
|
|
@@ -92,7 +104,7 @@ def responses_model(name: str) -> None:
|
|
|
92
104
|
"""Override the model used for text responses.
|
|
93
105
|
|
|
94
106
|
Args:
|
|
95
|
-
name (str):
|
|
107
|
+
name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
|
|
96
108
|
(for example, ``gpt-4.1-mini``).
|
|
97
109
|
"""
|
|
98
110
|
CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
|
|
@@ -102,7 +114,8 @@ def embeddings_model(name: str) -> None:
|
|
|
102
114
|
"""Override the model used for text embeddings.
|
|
103
115
|
|
|
104
116
|
Args:
|
|
105
|
-
name (str):
|
|
117
|
+
name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name,
|
|
118
|
+
e.g. ``text-embedding-3-small``.
|
|
106
119
|
"""
|
|
107
120
|
CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
|
|
108
121
|
|
|
@@ -143,7 +156,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
143
156
|
instructions: str,
|
|
144
157
|
cache: BatchingMapProxy[str, ResponseFormat],
|
|
145
158
|
response_format: Type[ResponseFormat] = str,
|
|
146
|
-
temperature: float = 0.0,
|
|
159
|
+
temperature: float | None = 0.0,
|
|
147
160
|
top_p: float = 1.0,
|
|
148
161
|
) -> pd.Series:
|
|
149
162
|
client: BatchResponses = BatchResponses(
|
|
@@ -205,15 +218,25 @@ class OpenAIVecSeriesAccessor:
|
|
|
205
218
|
instructions: str,
|
|
206
219
|
response_format: Type[ResponseFormat] = str,
|
|
207
220
|
batch_size: int = 128,
|
|
208
|
-
temperature: float = 0.0,
|
|
221
|
+
temperature: float | None = 0.0,
|
|
209
222
|
top_p: float = 1.0,
|
|
223
|
+
show_progress: bool = False,
|
|
210
224
|
) -> pd.Series:
|
|
211
225
|
"""Call an LLM once for every Series element.
|
|
212
226
|
|
|
213
227
|
Example:
|
|
214
228
|
```python
|
|
215
229
|
animals = pd.Series(["cat", "dog", "elephant"])
|
|
230
|
+
# Basic usage
|
|
216
231
|
animals.ai.responses("translate to French")
|
|
232
|
+
|
|
233
|
+
# With progress bar in Jupyter notebooks
|
|
234
|
+
large_series = pd.Series(["data"] * 1000)
|
|
235
|
+
large_series.ai.responses(
|
|
236
|
+
"analyze this data",
|
|
237
|
+
batch_size=32,
|
|
238
|
+
show_progress=True
|
|
239
|
+
)
|
|
217
240
|
```
|
|
218
241
|
This method returns a Series of strings, each containing the
|
|
219
242
|
assistant's response to the corresponding input.
|
|
@@ -228,13 +251,14 @@ class OpenAIVecSeriesAccessor:
|
|
|
228
251
|
request. Defaults to ``128``.
|
|
229
252
|
temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
|
|
230
253
|
top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
|
|
254
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
231
255
|
|
|
232
256
|
Returns:
|
|
233
257
|
pandas.Series: Series whose values are instances of ``response_format``.
|
|
234
258
|
"""
|
|
235
259
|
return self.responses_with_cache(
|
|
236
260
|
instructions=instructions,
|
|
237
|
-
cache=BatchingMapProxy(batch_size=batch_size),
|
|
261
|
+
cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
|
|
238
262
|
response_format=response_format,
|
|
239
263
|
temperature=temperature,
|
|
240
264
|
top_p=top_p,
|
|
@@ -287,7 +311,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
287
311
|
)
|
|
288
312
|
return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
|
|
289
313
|
|
|
290
|
-
def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
|
|
314
|
+
def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
|
|
291
315
|
"""Execute a prepared task on every Series element.
|
|
292
316
|
|
|
293
317
|
This method applies a pre-configured task to each element in the Series,
|
|
@@ -302,7 +326,16 @@ class OpenAIVecSeriesAccessor:
|
|
|
302
326
|
sentiment_task = PreparedTask(...)
|
|
303
327
|
|
|
304
328
|
reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
|
|
329
|
+
# Basic usage
|
|
305
330
|
results = reviews.ai.task(sentiment_task)
|
|
331
|
+
|
|
332
|
+
# With progress bar for large datasets
|
|
333
|
+
large_reviews = pd.Series(["review text"] * 2000)
|
|
334
|
+
results = large_reviews.ai.task(
|
|
335
|
+
sentiment_task,
|
|
336
|
+
batch_size=50,
|
|
337
|
+
show_progress=True
|
|
338
|
+
)
|
|
306
339
|
```
|
|
307
340
|
This method returns a Series containing the task results for each
|
|
308
341
|
corresponding input element, following the task's defined structure.
|
|
@@ -312,6 +345,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
312
345
|
response format, and other parameters for processing the inputs.
|
|
313
346
|
batch_size (int, optional): Number of prompts grouped into a single
|
|
314
347
|
request to optimize API usage. Defaults to 128.
|
|
348
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
315
349
|
|
|
316
350
|
Returns:
|
|
317
351
|
pandas.Series: Series whose values are instances of the task's
|
|
@@ -319,16 +353,24 @@ class OpenAIVecSeriesAccessor:
|
|
|
319
353
|
"""
|
|
320
354
|
return self.task_with_cache(
|
|
321
355
|
task=task,
|
|
322
|
-
cache=BatchingMapProxy(batch_size=batch_size),
|
|
356
|
+
cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
|
|
323
357
|
)
|
|
324
358
|
|
|
325
|
-
def embeddings(self, batch_size: int = 128) -> pd.Series:
|
|
359
|
+
def embeddings(self, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
|
|
326
360
|
"""Compute OpenAI embeddings for every Series element.
|
|
327
361
|
|
|
328
362
|
Example:
|
|
329
363
|
```python
|
|
330
364
|
animals = pd.Series(["cat", "dog", "elephant"])
|
|
365
|
+
# Basic usage
|
|
331
366
|
animals.ai.embeddings()
|
|
367
|
+
|
|
368
|
+
# With progress bar for large datasets
|
|
369
|
+
large_texts = pd.Series(["text"] * 5000)
|
|
370
|
+
embeddings = large_texts.ai.embeddings(
|
|
371
|
+
batch_size=100,
|
|
372
|
+
show_progress=True
|
|
373
|
+
)
|
|
332
374
|
```
|
|
333
375
|
This method returns a Series of numpy arrays, each containing the
|
|
334
376
|
embedding vector for the corresponding input.
|
|
@@ -338,13 +380,14 @@ class OpenAIVecSeriesAccessor:
|
|
|
338
380
|
Args:
|
|
339
381
|
batch_size (int, optional): Number of inputs grouped into a
|
|
340
382
|
single request. Defaults to ``128``.
|
|
383
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
341
384
|
|
|
342
385
|
Returns:
|
|
343
386
|
pandas.Series: Series whose values are ``np.ndarray`` objects
|
|
344
387
|
(dtype ``float32``).
|
|
345
388
|
"""
|
|
346
389
|
return self.embeddings_with_cache(
|
|
347
|
-
cache=BatchingMapProxy(batch_size=batch_size),
|
|
390
|
+
cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
|
|
348
391
|
)
|
|
349
392
|
|
|
350
393
|
def count_tokens(self) -> pd.Series:
|
|
@@ -438,7 +481,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
438
481
|
instructions: str,
|
|
439
482
|
cache: BatchingMapProxy[str, ResponseFormat],
|
|
440
483
|
response_format: Type[ResponseFormat] = str,
|
|
441
|
-
temperature: float = 0.0,
|
|
484
|
+
temperature: float | None = 0.0,
|
|
442
485
|
top_p: float = 1.0,
|
|
443
486
|
) -> pd.Series:
|
|
444
487
|
"""Generate a response for each row after serialising it to JSON using a provided cache.
|
|
@@ -496,8 +539,9 @@ class OpenAIVecDataFrameAccessor:
|
|
|
496
539
|
instructions: str,
|
|
497
540
|
response_format: Type[ResponseFormat] = str,
|
|
498
541
|
batch_size: int = 128,
|
|
499
|
-
temperature: float = 0.0,
|
|
542
|
+
temperature: float | None = 0.0,
|
|
500
543
|
top_p: float = 1.0,
|
|
544
|
+
show_progress: bool = False,
|
|
501
545
|
) -> pd.Series:
|
|
502
546
|
"""Generate a response for each row after serialising it to JSON.
|
|
503
547
|
|
|
@@ -508,7 +552,16 @@ class OpenAIVecDataFrameAccessor:
|
|
|
508
552
|
{"name": "dog", "legs": 4},
|
|
509
553
|
{"name": "elephant", "legs": 4},
|
|
510
554
|
])
|
|
555
|
+
# Basic usage
|
|
511
556
|
df.ai.responses("what is the animal's name?")
|
|
557
|
+
|
|
558
|
+
# With progress bar for large datasets
|
|
559
|
+
large_df = pd.DataFrame({"id": list(range(1000))})
|
|
560
|
+
large_df.ai.responses(
|
|
561
|
+
"generate a name for this ID",
|
|
562
|
+
batch_size=20,
|
|
563
|
+
show_progress=True
|
|
564
|
+
)
|
|
512
565
|
```
|
|
513
566
|
This method returns a Series of strings, each containing the
|
|
514
567
|
assistant's response to the corresponding input.
|
|
@@ -524,19 +577,20 @@ class OpenAIVecDataFrameAccessor:
|
|
|
524
577
|
Defaults to ``128``.
|
|
525
578
|
temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
|
|
526
579
|
top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
|
|
580
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
527
581
|
|
|
528
582
|
Returns:
|
|
529
583
|
pandas.Series: Responses aligned with the DataFrame's original index.
|
|
530
584
|
"""
|
|
531
585
|
return self.responses_with_cache(
|
|
532
586
|
instructions=instructions,
|
|
533
|
-
cache=BatchingMapProxy(batch_size=batch_size),
|
|
587
|
+
cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
|
|
534
588
|
response_format=response_format,
|
|
535
589
|
temperature=temperature,
|
|
536
590
|
top_p=top_p,
|
|
537
591
|
)
|
|
538
592
|
|
|
539
|
-
def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
|
|
593
|
+
def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
|
|
540
594
|
"""Execute a prepared task on each DataFrame row after serialising it to JSON.
|
|
541
595
|
|
|
542
596
|
This method applies a pre-configured task to each row in the DataFrame,
|
|
@@ -566,6 +620,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
566
620
|
response format, and other parameters for processing the inputs.
|
|
567
621
|
batch_size (int, optional): Number of requests sent in one batch
|
|
568
622
|
to optimize API usage. Defaults to 128.
|
|
623
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
569
624
|
|
|
570
625
|
Returns:
|
|
571
626
|
pandas.Series: Series whose values are instances of the task's
|
|
@@ -575,7 +630,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
575
630
|
lambda df: (
|
|
576
631
|
df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
|
|
577
632
|
.map(lambda x: json.dumps(x, ensure_ascii=False))
|
|
578
|
-
.ai.task(task=task, batch_size=batch_size)
|
|
633
|
+
.ai.task(task=task, batch_size=batch_size, show_progress=show_progress)
|
|
579
634
|
)
|
|
580
635
|
)
|
|
581
636
|
|
|
@@ -681,7 +736,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
681
736
|
instructions: str,
|
|
682
737
|
cache: AsyncBatchingMapProxy[str, ResponseFormat],
|
|
683
738
|
response_format: Type[ResponseFormat] = str,
|
|
684
|
-
temperature: float = 0.0,
|
|
739
|
+
temperature: float | None = 0.0,
|
|
685
740
|
top_p: float = 1.0,
|
|
686
741
|
) -> pd.Series:
|
|
687
742
|
"""Call an LLM once for every Series element using a provided cache (asynchronously).
|
|
@@ -848,9 +903,10 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
848
903
|
instructions: str,
|
|
849
904
|
response_format: Type[ResponseFormat] = str,
|
|
850
905
|
batch_size: int = 128,
|
|
851
|
-
temperature: float = 0.0,
|
|
906
|
+
temperature: float | None = 0.0,
|
|
852
907
|
top_p: float = 1.0,
|
|
853
908
|
max_concurrency: int = 8,
|
|
909
|
+
show_progress: bool = False,
|
|
854
910
|
) -> pd.Series:
|
|
855
911
|
"""Call an LLM once for every Series element (asynchronously).
|
|
856
912
|
|
|
@@ -859,6 +915,15 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
859
915
|
animals = pd.Series(["cat", "dog", "elephant"])
|
|
860
916
|
# Must be awaited
|
|
861
917
|
results = await animals.aio.responses("translate to French")
|
|
918
|
+
|
|
919
|
+
# With progress bar for large datasets
|
|
920
|
+
large_series = pd.Series(["data"] * 1000)
|
|
921
|
+
results = await large_series.aio.responses(
|
|
922
|
+
"analyze this data",
|
|
923
|
+
batch_size=32,
|
|
924
|
+
max_concurrency=4,
|
|
925
|
+
show_progress=True
|
|
926
|
+
)
|
|
862
927
|
```
|
|
863
928
|
This method returns a Series of strings, each containing the
|
|
864
929
|
assistant's response to the corresponding input.
|
|
@@ -875,6 +940,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
875
940
|
top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
|
|
876
941
|
max_concurrency (int, optional): Maximum number of concurrent
|
|
877
942
|
requests. Defaults to ``8``.
|
|
943
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
878
944
|
|
|
879
945
|
Returns:
|
|
880
946
|
pandas.Series: Series whose values are instances of ``response_format``.
|
|
@@ -884,13 +950,17 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
884
950
|
"""
|
|
885
951
|
return await self.responses_with_cache(
|
|
886
952
|
instructions=instructions,
|
|
887
|
-
cache=AsyncBatchingMapProxy(
|
|
953
|
+
cache=AsyncBatchingMapProxy(
|
|
954
|
+
batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
|
|
955
|
+
),
|
|
888
956
|
response_format=response_format,
|
|
889
957
|
temperature=temperature,
|
|
890
958
|
top_p=top_p,
|
|
891
959
|
)
|
|
892
960
|
|
|
893
|
-
async def embeddings(
|
|
961
|
+
async def embeddings(
|
|
962
|
+
self, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
|
|
963
|
+
) -> pd.Series:
|
|
894
964
|
"""Compute OpenAI embeddings for every Series element (asynchronously).
|
|
895
965
|
|
|
896
966
|
Example:
|
|
@@ -898,6 +968,14 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
898
968
|
animals = pd.Series(["cat", "dog", "elephant"])
|
|
899
969
|
# Must be awaited
|
|
900
970
|
embeddings = await animals.aio.embeddings()
|
|
971
|
+
|
|
972
|
+
# With progress bar for large datasets
|
|
973
|
+
large_texts = pd.Series(["text"] * 5000)
|
|
974
|
+
embeddings = await large_texts.aio.embeddings(
|
|
975
|
+
batch_size=100,
|
|
976
|
+
max_concurrency=4,
|
|
977
|
+
show_progress=True
|
|
978
|
+
)
|
|
901
979
|
```
|
|
902
980
|
This method returns a Series of numpy arrays, each containing the
|
|
903
981
|
embedding vector for the corresponding input.
|
|
@@ -909,6 +987,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
909
987
|
single request. Defaults to ``128``.
|
|
910
988
|
max_concurrency (int, optional): Maximum number of concurrent
|
|
911
989
|
requests. Defaults to ``8``.
|
|
990
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
912
991
|
|
|
913
992
|
Returns:
|
|
914
993
|
pandas.Series: Series whose values are ``np.ndarray`` objects
|
|
@@ -918,10 +997,14 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
918
997
|
This is an asynchronous method and must be awaited.
|
|
919
998
|
"""
|
|
920
999
|
return await self.embeddings_with_cache(
|
|
921
|
-
cache=AsyncBatchingMapProxy(
|
|
1000
|
+
cache=AsyncBatchingMapProxy(
|
|
1001
|
+
batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
|
|
1002
|
+
),
|
|
922
1003
|
)
|
|
923
1004
|
|
|
924
|
-
async def task(
|
|
1005
|
+
async def task(
|
|
1006
|
+
self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
|
|
1007
|
+
) -> pd.Series:
|
|
925
1008
|
"""Execute a prepared task on every Series element (asynchronously).
|
|
926
1009
|
|
|
927
1010
|
This method applies a pre-configured task to each element in the Series,
|
|
@@ -938,6 +1021,15 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
938
1021
|
reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
|
|
939
1022
|
# Must be awaited
|
|
940
1023
|
results = await reviews.aio.task(sentiment_task)
|
|
1024
|
+
|
|
1025
|
+
# With progress bar for large datasets
|
|
1026
|
+
large_reviews = pd.Series(["review text"] * 2000)
|
|
1027
|
+
results = await large_reviews.aio.task(
|
|
1028
|
+
sentiment_task,
|
|
1029
|
+
batch_size=50,
|
|
1030
|
+
max_concurrency=4,
|
|
1031
|
+
show_progress=True
|
|
1032
|
+
)
|
|
941
1033
|
```
|
|
942
1034
|
This method returns a Series containing the task results for each
|
|
943
1035
|
corresponding input element, following the task's defined structure.
|
|
@@ -949,6 +1041,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
949
1041
|
request to optimize API usage. Defaults to 128.
|
|
950
1042
|
max_concurrency (int, optional): Maximum number of concurrent
|
|
951
1043
|
requests. Defaults to 8.
|
|
1044
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
952
1045
|
|
|
953
1046
|
Returns:
|
|
954
1047
|
pandas.Series: Series whose values are instances of the task's
|
|
@@ -959,7 +1052,9 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
959
1052
|
"""
|
|
960
1053
|
return await self.task_with_cache(
|
|
961
1054
|
task=task,
|
|
962
|
-
cache=AsyncBatchingMapProxy(
|
|
1055
|
+
cache=AsyncBatchingMapProxy(
|
|
1056
|
+
batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
|
|
1057
|
+
),
|
|
963
1058
|
)
|
|
964
1059
|
|
|
965
1060
|
|
|
@@ -975,7 +1070,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
975
1070
|
instructions: str,
|
|
976
1071
|
cache: AsyncBatchingMapProxy[str, ResponseFormat],
|
|
977
1072
|
response_format: Type[ResponseFormat] = str,
|
|
978
|
-
temperature: float = 0.0,
|
|
1073
|
+
temperature: float | None = 0.0,
|
|
979
1074
|
top_p: float = 1.0,
|
|
980
1075
|
) -> pd.Series:
|
|
981
1076
|
"""Generate a response for each row after serialising it to JSON using a provided cache (asynchronously).
|
|
@@ -1040,9 +1135,10 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1040
1135
|
instructions: str,
|
|
1041
1136
|
response_format: Type[ResponseFormat] = str,
|
|
1042
1137
|
batch_size: int = 128,
|
|
1043
|
-
temperature: float = 0.0,
|
|
1138
|
+
temperature: float | None = 0.0,
|
|
1044
1139
|
top_p: float = 1.0,
|
|
1045
1140
|
max_concurrency: int = 8,
|
|
1141
|
+
show_progress: bool = False,
|
|
1046
1142
|
) -> pd.Series:
|
|
1047
1143
|
"""Generate a response for each row after serialising it to JSON (asynchronously).
|
|
1048
1144
|
|
|
@@ -1055,6 +1151,15 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1055
1151
|
])
|
|
1056
1152
|
# Must be awaited
|
|
1057
1153
|
results = await df.aio.responses(\"what is the animal\'s name?\")
|
|
1154
|
+
|
|
1155
|
+
# With progress bar for large datasets
|
|
1156
|
+
large_df = pd.DataFrame({\"id\": list(range(1000))})
|
|
1157
|
+
results = await large_df.aio.responses(
|
|
1158
|
+
\"generate a name for this ID\",
|
|
1159
|
+
batch_size=20,
|
|
1160
|
+
max_concurrency=4,
|
|
1161
|
+
show_progress=True
|
|
1162
|
+
)
|
|
1058
1163
|
```
|
|
1059
1164
|
This method returns a Series of strings, each containing the
|
|
1060
1165
|
assistant's response to the corresponding input.
|
|
@@ -1072,6 +1177,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1072
1177
|
top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
|
|
1073
1178
|
max_concurrency (int, optional): Maximum number of concurrent
|
|
1074
1179
|
requests. Defaults to ``8``.
|
|
1180
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
1075
1181
|
|
|
1076
1182
|
Returns:
|
|
1077
1183
|
pandas.Series: Responses aligned with the DataFrame's original index.
|
|
@@ -1081,13 +1187,17 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1081
1187
|
"""
|
|
1082
1188
|
return await self.responses_with_cache(
|
|
1083
1189
|
instructions=instructions,
|
|
1084
|
-
cache=AsyncBatchingMapProxy(
|
|
1190
|
+
cache=AsyncBatchingMapProxy(
|
|
1191
|
+
batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
|
|
1192
|
+
),
|
|
1085
1193
|
response_format=response_format,
|
|
1086
1194
|
temperature=temperature,
|
|
1087
1195
|
top_p=top_p,
|
|
1088
1196
|
)
|
|
1089
1197
|
|
|
1090
|
-
async def task(
|
|
1198
|
+
async def task(
|
|
1199
|
+
self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
|
|
1200
|
+
) -> pd.Series:
|
|
1091
1201
|
"""Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
|
|
1092
1202
|
|
|
1093
1203
|
This method applies a pre-configured task to each row in the DataFrame,
|
|
@@ -1109,6 +1219,15 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1109
1219
|
])
|
|
1110
1220
|
# Must be awaited
|
|
1111
1221
|
results = await df.aio.task(analysis_task)
|
|
1222
|
+
|
|
1223
|
+
# With progress bar for large datasets
|
|
1224
|
+
large_df = pd.DataFrame({"id": list(range(1000))})
|
|
1225
|
+
results = await large_df.aio.task(
|
|
1226
|
+
analysis_task,
|
|
1227
|
+
batch_size=50,
|
|
1228
|
+
max_concurrency=4,
|
|
1229
|
+
show_progress=True
|
|
1230
|
+
)
|
|
1112
1231
|
```
|
|
1113
1232
|
This method returns a Series containing the task results for each
|
|
1114
1233
|
corresponding row, following the task's defined structure.
|
|
@@ -1120,6 +1239,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1120
1239
|
to optimize API usage. Defaults to 128.
|
|
1121
1240
|
max_concurrency (int, optional): Maximum number of concurrent
|
|
1122
1241
|
requests. Defaults to 8.
|
|
1242
|
+
show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
|
|
1123
1243
|
|
|
1124
1244
|
Returns:
|
|
1125
1245
|
pandas.Series: Series whose values are instances of the task's
|
|
@@ -1140,6 +1260,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1140
1260
|
task=task,
|
|
1141
1261
|
batch_size=batch_size,
|
|
1142
1262
|
max_concurrency=max_concurrency,
|
|
1263
|
+
show_progress=show_progress,
|
|
1143
1264
|
)
|
|
1144
1265
|
|
|
1145
1266
|
async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:
|