openaivec 0.14.13__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/pandas_ext.py +67 -36
- openaivec/spark.py +66 -17
- {openaivec-0.14.13.dist-info → openaivec-0.15.0.dist-info}/METADATA +8 -6
- {openaivec-0.14.13.dist-info → openaivec-0.15.0.dist-info}/RECORD +6 -6
- {openaivec-0.14.13.dist-info → openaivec-0.15.0.dist-info}/WHEEL +0 -0
- {openaivec-0.14.13.dist-info → openaivec-0.15.0.dist-info}/licenses/LICENSE +0 -0
openaivec/pandas_ext.py
CHANGED
|
@@ -10,29 +10,32 @@ from openaivec import pandas_ext
|
|
|
10
10
|
# (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
|
|
11
11
|
# No explicit setup needed - clients are automatically created
|
|
12
12
|
|
|
13
|
-
# Option 2:
|
|
13
|
+
# Option 2: Register an existing OpenAI client instance
|
|
14
14
|
client = OpenAI(api_key="your-api-key")
|
|
15
|
-
pandas_ext.
|
|
15
|
+
pandas_ext.set_client(client)
|
|
16
16
|
|
|
17
|
-
# Option 3:
|
|
17
|
+
# Option 3: Register an Azure OpenAI client instance
|
|
18
18
|
azure_client = AzureOpenAI(
|
|
19
19
|
api_key="your-azure-key",
|
|
20
20
|
base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
|
|
21
21
|
api_version="preview"
|
|
22
22
|
)
|
|
23
|
-
pandas_ext.
|
|
23
|
+
pandas_ext.set_client(azure_client)
|
|
24
24
|
|
|
25
|
-
# Option 4:
|
|
25
|
+
# Option 4: Register an async Azure OpenAI client instance
|
|
26
26
|
async_azure_client = AsyncAzureOpenAI(
|
|
27
27
|
api_key="your-azure-key",
|
|
28
28
|
base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
|
|
29
29
|
api_version="preview"
|
|
30
30
|
)
|
|
31
|
-
pandas_ext.
|
|
31
|
+
pandas_ext.set_async_client(async_azure_client)
|
|
32
32
|
|
|
33
33
|
# Set up model names (optional, defaults shown)
|
|
34
|
-
pandas_ext.
|
|
35
|
-
pandas_ext.
|
|
34
|
+
pandas_ext.set_responses_model("gpt-4.1-mini")
|
|
35
|
+
pandas_ext.set_embeddings_model("text-embedding-3-small")
|
|
36
|
+
|
|
37
|
+
# Inspect current configuration
|
|
38
|
+
configured_model = pandas_ext.get_responses_model()
|
|
36
39
|
```
|
|
37
40
|
|
|
38
41
|
This module provides `.ai` and `.aio` accessors for pandas Series and DataFrames
|
|
@@ -49,15 +52,6 @@ import numpy as np
|
|
|
49
52
|
import pandas as pd
|
|
50
53
|
import tiktoken
|
|
51
54
|
from openai import AsyncOpenAI, OpenAI
|
|
52
|
-
|
|
53
|
-
from openaivec._schema import InferredSchema, SchemaInferenceInput, SchemaInferer
|
|
54
|
-
|
|
55
|
-
__all__ = [
|
|
56
|
-
"embeddings_model",
|
|
57
|
-
"responses_model",
|
|
58
|
-
"use",
|
|
59
|
-
"use_async",
|
|
60
|
-
]
|
|
61
55
|
from pydantic import BaseModel
|
|
62
56
|
|
|
63
57
|
from openaivec._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
@@ -65,13 +59,18 @@ from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat,
|
|
|
65
59
|
from openaivec._provider import CONTAINER, _check_azure_v1_api_url
|
|
66
60
|
from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
|
|
67
61
|
from openaivec._responses import AsyncBatchResponses, BatchResponses
|
|
62
|
+
from openaivec._schema import InferredSchema, SchemaInferenceInput, SchemaInferer
|
|
68
63
|
from openaivec.task.table import FillNaResponse, fillna
|
|
69
64
|
|
|
70
65
|
__all__ = [
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
74
|
-
"
|
|
66
|
+
"get_async_client",
|
|
67
|
+
"get_client",
|
|
68
|
+
"get_embeddings_model",
|
|
69
|
+
"get_responses_model",
|
|
70
|
+
"set_async_client",
|
|
71
|
+
"set_client",
|
|
72
|
+
"set_embeddings_model",
|
|
73
|
+
"set_responses_model",
|
|
75
74
|
]
|
|
76
75
|
|
|
77
76
|
_LOGGER = logging.getLogger(__name__)
|
|
@@ -95,37 +94,51 @@ def _df_rows_to_json_series(df: pd.DataFrame) -> pd.Series:
|
|
|
95
94
|
T = TypeVar("T") # For pipe function return type
|
|
96
95
|
|
|
97
96
|
|
|
98
|
-
def
|
|
99
|
-
"""Register a custom OpenAI
|
|
97
|
+
def set_client(client: OpenAI) -> None:
|
|
98
|
+
"""Register a custom OpenAI-compatible client for pandas helpers.
|
|
100
99
|
|
|
101
100
|
Args:
|
|
102
|
-
client (OpenAI): A pre
|
|
103
|
-
`openai.AzureOpenAI` instance.
|
|
104
|
-
The same instance is reused by every helper in this module.
|
|
101
|
+
client (OpenAI): A pre-configured `openai.OpenAI` or
|
|
102
|
+
`openai.AzureOpenAI` instance reused by every helper in this module.
|
|
105
103
|
"""
|
|
106
|
-
# Check Azure v1 API URL if using AzureOpenAI client
|
|
107
104
|
if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
|
|
108
105
|
_check_azure_v1_api_url(str(client.base_url))
|
|
109
106
|
|
|
110
107
|
CONTAINER.register(OpenAI, lambda: client)
|
|
111
108
|
|
|
112
109
|
|
|
113
|
-
def
|
|
114
|
-
"""
|
|
110
|
+
def get_client() -> OpenAI:
|
|
111
|
+
"""Get the currently registered OpenAI-compatible client.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
OpenAI: The registered `openai.OpenAI` or `openai.AzureOpenAI` instance.
|
|
115
|
+
"""
|
|
116
|
+
return CONTAINER.resolve(OpenAI)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def set_async_client(client: AsyncOpenAI) -> None:
|
|
120
|
+
"""Register a custom asynchronous OpenAI-compatible client.
|
|
115
121
|
|
|
116
122
|
Args:
|
|
117
|
-
client (AsyncOpenAI): A pre
|
|
118
|
-
`openai.AsyncAzureOpenAI` instance.
|
|
119
|
-
The same instance is reused by every helper in this module.
|
|
123
|
+
client (AsyncOpenAI): A pre-configured `openai.AsyncOpenAI` or
|
|
124
|
+
`openai.AsyncAzureOpenAI` instance reused by every helper in this module.
|
|
120
125
|
"""
|
|
121
|
-
# Check Azure v1 API URL if using AsyncAzureOpenAI client
|
|
122
126
|
if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
|
|
123
127
|
_check_azure_v1_api_url(str(client.base_url))
|
|
124
128
|
|
|
125
129
|
CONTAINER.register(AsyncOpenAI, lambda: client)
|
|
126
130
|
|
|
127
131
|
|
|
128
|
-
def
|
|
132
|
+
def get_async_client() -> AsyncOpenAI:
|
|
133
|
+
"""Get the currently registered asynchronous OpenAI-compatible client.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
AsyncOpenAI: The registered `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` instance.
|
|
137
|
+
"""
|
|
138
|
+
return CONTAINER.resolve(AsyncOpenAI)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def set_responses_model(name: str) -> None:
|
|
129
142
|
"""Override the model used for text responses.
|
|
130
143
|
|
|
131
144
|
Args:
|
|
@@ -135,7 +148,16 @@ def responses_model(name: str) -> None:
|
|
|
135
148
|
CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
|
|
136
149
|
|
|
137
150
|
|
|
138
|
-
def
|
|
151
|
+
def get_responses_model() -> str:
|
|
152
|
+
"""Get the currently registered model name for text responses.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
str: The model name (for example, ``gpt-4.1-mini``).
|
|
156
|
+
"""
|
|
157
|
+
return CONTAINER.resolve(ResponsesModelName).value
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def set_embeddings_model(name: str) -> None:
|
|
139
161
|
"""Override the model used for text embeddings.
|
|
140
162
|
|
|
141
163
|
Args:
|
|
@@ -145,6 +167,15 @@ def embeddings_model(name: str) -> None:
|
|
|
145
167
|
CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
|
|
146
168
|
|
|
147
169
|
|
|
170
|
+
def get_embeddings_model() -> str:
|
|
171
|
+
"""Get the currently registered model name for text embeddings.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
str: The model name (for example, ``text-embedding-3-small``).
|
|
175
|
+
"""
|
|
176
|
+
return CONTAINER.resolve(EmbeddingsModelName).value
|
|
177
|
+
|
|
178
|
+
|
|
148
179
|
def _extract_value(x, series_name):
|
|
149
180
|
"""Return a homogeneous ``dict`` representation of any Series value.
|
|
150
181
|
|
|
@@ -639,7 +670,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
639
670
|
animals.ai.count_tokens()
|
|
640
671
|
```
|
|
641
672
|
This method uses the `tiktoken` library to count tokens based on the
|
|
642
|
-
model name
|
|
673
|
+
model name configured via `set_responses_model`.
|
|
643
674
|
|
|
644
675
|
Returns:
|
|
645
676
|
pandas.Series: Token counts for each element.
|
openaivec/spark.py
CHANGED
|
@@ -193,8 +193,6 @@ def setup(
|
|
|
193
193
|
CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(responses_model_name))
|
|
194
194
|
|
|
195
195
|
if embeddings_model_name:
|
|
196
|
-
from openaivec._model import EmbeddingsModelName
|
|
197
|
-
|
|
198
196
|
CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(embeddings_model_name))
|
|
199
197
|
|
|
200
198
|
CONTAINER.clear_singletons()
|
|
@@ -244,6 +242,50 @@ def setup_azure(
|
|
|
244
242
|
CONTAINER.clear_singletons()
|
|
245
243
|
|
|
246
244
|
|
|
245
|
+
def set_responses_model(model_name: str):
|
|
246
|
+
"""Set the default model name for response generation in the DI container.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
model_name (str): The model name to set as default for responses.
|
|
250
|
+
"""
|
|
251
|
+
CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(model_name))
|
|
252
|
+
CONTAINER.clear_singletons()
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def get_responses_model() -> str | None:
|
|
256
|
+
"""Get the default model name for response generation from the DI container.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
str | None: The default model name for responses, or None if not set.
|
|
260
|
+
"""
|
|
261
|
+
try:
|
|
262
|
+
return CONTAINER.resolve(ResponsesModelName).value
|
|
263
|
+
except Exception:
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def set_embeddings_model(model_name: str):
|
|
268
|
+
"""Set the default model name for embeddings in the DI container.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
model_name (str): The model name to set as default for embeddings.
|
|
272
|
+
"""
|
|
273
|
+
CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(model_name))
|
|
274
|
+
CONTAINER.clear_singletons()
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def get_embeddings_model() -> str | None:
|
|
278
|
+
"""Get the default model name for embeddings from the DI container.
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
str | None: The default model name for embeddings, or None if not set.
|
|
282
|
+
"""
|
|
283
|
+
try:
|
|
284
|
+
return CONTAINER.resolve(EmbeddingsModelName).value
|
|
285
|
+
except Exception:
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
|
|
247
289
|
def _python_type_to_spark(python_type):
|
|
248
290
|
origin = get_origin(python_type)
|
|
249
291
|
|
|
@@ -322,7 +364,7 @@ def _safe_dump(x: BaseModel | None) -> dict:
|
|
|
322
364
|
def responses_udf(
|
|
323
365
|
instructions: str,
|
|
324
366
|
response_format: type[ResponseFormat] = str,
|
|
325
|
-
model_name: str =
|
|
367
|
+
model_name: str | None = None,
|
|
326
368
|
batch_size: int | None = None,
|
|
327
369
|
max_concurrency: int = 8,
|
|
328
370
|
**api_kwargs,
|
|
@@ -351,8 +393,9 @@ def responses_udf(
|
|
|
351
393
|
instructions (str): The system prompt or instructions for the model.
|
|
352
394
|
response_format (type[ResponseFormat]): The desired output format. Either `str` for plain text
|
|
353
395
|
or a Pydantic `BaseModel` for structured JSON output. Defaults to `str`.
|
|
354
|
-
model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
355
|
-
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
|
|
396
|
+
model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
397
|
+
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
|
|
398
|
+
via ResponsesModelName if not provided.
|
|
356
399
|
batch_size (int | None): Number of rows per async batch request within each partition.
|
|
357
400
|
Larger values reduce API call overhead but increase memory usage.
|
|
358
401
|
Defaults to None (automatic batch size optimization that dynamically
|
|
@@ -382,13 +425,15 @@ def responses_udf(
|
|
|
382
425
|
- Consider your OpenAI tier limits: total_requests = max_concurrency × executors
|
|
383
426
|
- Use Spark UI to optimize partition sizes relative to batch_size
|
|
384
427
|
"""
|
|
428
|
+
_model_name = model_name or CONTAINER.resolve(ResponsesModelName).value
|
|
429
|
+
|
|
385
430
|
if issubclass(response_format, BaseModel):
|
|
386
431
|
spark_schema = _pydantic_to_spark_schema(response_format)
|
|
387
432
|
json_schema_string = serialize_base_model(response_format)
|
|
388
433
|
|
|
389
434
|
@pandas_udf(returnType=spark_schema) # type: ignore[call-overload]
|
|
390
435
|
def structure_udf(col: Iterator[pd.Series]) -> Iterator[pd.DataFrame]:
|
|
391
|
-
pandas_ext.
|
|
436
|
+
pandas_ext.set_responses_model(_model_name)
|
|
392
437
|
response_format = deserialize_base_model(json_schema_string)
|
|
393
438
|
cache = AsyncBatchingMapProxy[str, response_format](
|
|
394
439
|
batch_size=batch_size,
|
|
@@ -415,7 +460,7 @@ def responses_udf(
|
|
|
415
460
|
|
|
416
461
|
@pandas_udf(returnType=StringType()) # type: ignore[call-overload]
|
|
417
462
|
def string_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
|
|
418
|
-
pandas_ext.
|
|
463
|
+
pandas_ext.set_responses_model(_model_name)
|
|
419
464
|
cache = AsyncBatchingMapProxy[str, str](
|
|
420
465
|
batch_size=batch_size,
|
|
421
466
|
max_concurrency=max_concurrency,
|
|
@@ -443,7 +488,7 @@ def responses_udf(
|
|
|
443
488
|
|
|
444
489
|
def task_udf(
|
|
445
490
|
task: PreparedTask[ResponseFormat],
|
|
446
|
-
model_name: str =
|
|
491
|
+
model_name: str | None = None,
|
|
447
492
|
batch_size: int | None = None,
|
|
448
493
|
max_concurrency: int = 8,
|
|
449
494
|
**api_kwargs,
|
|
@@ -459,8 +504,9 @@ def task_udf(
|
|
|
459
504
|
Args:
|
|
460
505
|
task (PreparedTask): A predefined task configuration containing instructions,
|
|
461
506
|
response format, and API parameters.
|
|
462
|
-
model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
463
|
-
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
|
|
507
|
+
model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
508
|
+
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
|
|
509
|
+
via ResponsesModelName if not provided.
|
|
464
510
|
batch_size (int | None): Number of rows per async batch request within each partition.
|
|
465
511
|
Larger values reduce API call overhead but increase memory usage.
|
|
466
512
|
Defaults to None (automatic batch size optimization that dynamically
|
|
@@ -550,7 +596,7 @@ def parse_udf(
|
|
|
550
596
|
example_table_name: str | None = None,
|
|
551
597
|
example_field_name: str | None = None,
|
|
552
598
|
max_examples: int = 100,
|
|
553
|
-
model_name: str =
|
|
599
|
+
model_name: str | None = None,
|
|
554
600
|
batch_size: int | None = None,
|
|
555
601
|
max_concurrency: int = 8,
|
|
556
602
|
**api_kwargs,
|
|
@@ -574,8 +620,9 @@ def parse_udf(
|
|
|
574
620
|
If provided, `example_table_name` must also be specified.
|
|
575
621
|
max_examples (int): Maximum number of examples to retrieve for schema inference.
|
|
576
622
|
Defaults to 100.
|
|
577
|
-
model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
578
|
-
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
|
|
623
|
+
model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
624
|
+
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
|
|
625
|
+
via ResponsesModelName if not provided.
|
|
579
626
|
batch_size (int | None): Number of rows per async batch request within each partition.
|
|
580
627
|
Larger values reduce API call overhead but increase memory usage.
|
|
581
628
|
Defaults to None (automatic batch size optimization that dynamically
|
|
@@ -622,7 +669,7 @@ def parse_udf(
|
|
|
622
669
|
|
|
623
670
|
|
|
624
671
|
def embeddings_udf(
|
|
625
|
-
model_name: str =
|
|
672
|
+
model_name: str | None = None,
|
|
626
673
|
batch_size: int | None = None,
|
|
627
674
|
max_concurrency: int = 8,
|
|
628
675
|
**api_kwargs,
|
|
@@ -648,9 +695,9 @@ def embeddings_udf(
|
|
|
648
695
|
sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
|
|
649
696
|
|
|
650
697
|
Args:
|
|
651
|
-
model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-embedding-deployment").
|
|
698
|
+
model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-embedding-deployment").
|
|
652
699
|
For OpenAI, use the model name (e.g., "text-embedding-3-small").
|
|
653
|
-
Defaults to configured model in DI container.
|
|
700
|
+
Defaults to configured model in DI container via EmbeddingsModelName if not provided.
|
|
654
701
|
batch_size (int | None): Number of rows per async batch request within each partition.
|
|
655
702
|
Larger values reduce API call overhead but increase memory usage.
|
|
656
703
|
Defaults to None (automatic batch size optimization that dynamically
|
|
@@ -678,9 +725,11 @@ def embeddings_udf(
|
|
|
678
725
|
- Use larger batch_size for embeddings compared to response generation
|
|
679
726
|
"""
|
|
680
727
|
|
|
728
|
+
_model_name = model_name or CONTAINER.resolve(EmbeddingsModelName).value
|
|
729
|
+
|
|
681
730
|
@pandas_udf(returnType=ArrayType(FloatType())) # type: ignore[call-overload,misc]
|
|
682
731
|
def _embeddings_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
|
|
683
|
-
pandas_ext.
|
|
732
|
+
pandas_ext.set_embeddings_model(_model_name)
|
|
684
733
|
cache = AsyncBatchingMapProxy[str, np.ndarray](
|
|
685
734
|
batch_size=batch_size,
|
|
686
735
|
max_concurrency=max_concurrency,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openaivec
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.15.0
|
|
4
4
|
Summary: Generative mutation for tabular calculation
|
|
5
5
|
Project-URL: Homepage, https://microsoft.github.io/openaivec/
|
|
6
6
|
Project-URL: Repository, https://github.com/microsoft/openaivec
|
|
@@ -26,6 +26,8 @@ Description-Content-Type: text/markdown
|
|
|
26
26
|
|
|
27
27
|
# openaivec
|
|
28
28
|
|
|
29
|
+
[Contributor guidelines](AGENTS.md)
|
|
30
|
+
|
|
29
31
|
**Transform your data analysis with AI-powered text processing at scale.**
|
|
30
32
|
|
|
31
33
|
**openaivec** enables data analysts to seamlessly integrate OpenAI's language models into their pandas and Spark workflows. Process thousands of text records with natural language instructions, turning unstructured data into actionable insights with just a few lines of code.
|
|
@@ -187,13 +189,13 @@ os.environ["OPENAI_API_KEY"] = "your-api-key-here"
|
|
|
187
189
|
|
|
188
190
|
# Authentication Option 2: Custom client (optional)
|
|
189
191
|
# from openai import OpenAI, AsyncOpenAI
|
|
190
|
-
# pandas_ext.
|
|
192
|
+
# pandas_ext.set_client(OpenAI())
|
|
191
193
|
# For async operations:
|
|
192
|
-
# pandas_ext.
|
|
194
|
+
# pandas_ext.set_async_client(AsyncOpenAI())
|
|
193
195
|
|
|
194
196
|
# Configure model (optional - defaults to gpt-4.1-mini)
|
|
195
197
|
# For Azure OpenAI: use your deployment name, for OpenAI: use model name
|
|
196
|
-
pandas_ext.
|
|
198
|
+
pandas_ext.set_responses_model("gpt-4.1-mini")
|
|
197
199
|
|
|
198
200
|
# Create your data
|
|
199
201
|
df = pd.DataFrame({"name": ["panda", "rabbit", "koala"]})
|
|
@@ -220,7 +222,7 @@ When using reasoning models (o1-preview, o1-mini, o3-mini, etc.), you must set `
|
|
|
220
222
|
|
|
221
223
|
```python
|
|
222
224
|
# For reasoning models like o1-preview, o1-mini, o3-mini
|
|
223
|
-
pandas_ext.
|
|
225
|
+
pandas_ext.set_responses_model("o1-mini") # Set your reasoning model
|
|
224
226
|
|
|
225
227
|
# MUST use temperature=None with reasoning models
|
|
226
228
|
result = df.assign(
|
|
@@ -291,7 +293,7 @@ import pandas as pd
|
|
|
291
293
|
from openaivec import pandas_ext
|
|
292
294
|
|
|
293
295
|
# Setup (same as synchronous version)
|
|
294
|
-
pandas_ext.
|
|
296
|
+
pandas_ext.set_responses_model("gpt-4.1-mini")
|
|
295
297
|
|
|
296
298
|
df = pd.DataFrame({"text": [
|
|
297
299
|
"This product is amazing!",
|
|
@@ -12,8 +12,8 @@ openaivec/_responses.py,sha256=qBrYv4qblDIs5dRvj9t96r8UfAJmy4ZvtAe6csNZ7oM,20412
|
|
|
12
12
|
openaivec/_schema.py,sha256=iOeR5J_ihZRDZtzmqvOK1ZtInKcx4OnoR38DB3VmmQw,15666
|
|
13
13
|
openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
|
|
14
14
|
openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
|
|
15
|
-
openaivec/pandas_ext.py,sha256=
|
|
16
|
-
openaivec/spark.py,sha256=
|
|
15
|
+
openaivec/pandas_ext.py,sha256=1euz52rwKpUBvWRCKvkDjwCFf_zNYnf60wF5OXHiCqw,86727
|
|
16
|
+
openaivec/spark.py,sha256=8-Hap36D0kcyV8RMA-PyFjZxfAnMfgtcp9gKASRnUwU,34032
|
|
17
17
|
openaivec/task/__init__.py,sha256=RkYIKrcE83M_9Um9cSMkeGzL9kPRAovajfRvr31YxLE,6178
|
|
18
18
|
openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
|
|
19
19
|
openaivec/task/customer_support/customer_sentiment.py,sha256=d8spZUtImjePK0xWGvIW98ghbdyOZ0KEZmaUpG8QB7M,7532
|
|
@@ -31,7 +31,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=u-zpqAaQYcr7I3mqMv_CTJXkfxtoLft3
|
|
|
31
31
|
openaivec/task/nlp/translation.py,sha256=kgWj2oN8pUId3vuHTJNx636gB49AGEKXWICA_XJgE_0,6628
|
|
32
32
|
openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
|
|
33
33
|
openaivec/task/table/fillna.py,sha256=zL6m5hGD4kamV7qHETnn__B59wIY540Ks0EzNgUJgdI,6888
|
|
34
|
-
openaivec-0.
|
|
35
|
-
openaivec-0.
|
|
36
|
-
openaivec-0.
|
|
37
|
-
openaivec-0.
|
|
34
|
+
openaivec-0.15.0.dist-info/METADATA,sha256=cVTYsT6TOMij_vagDgsIbo886U24Ys5dkah7ZvdEkdw,28278
|
|
35
|
+
openaivec-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
36
|
+
openaivec-0.15.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
37
|
+
openaivec-0.15.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|