openaivec 0.14.14__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/pandas_ext.py CHANGED
@@ -10,29 +10,32 @@ from openaivec import pandas_ext
10
10
  # (AZURE_OPENAI_API_KEY, AZURE_OPENAI_BASE_URL, AZURE_OPENAI_API_VERSION)
11
11
  # No explicit setup needed - clients are automatically created
12
12
 
13
- # Option 2: Use an existing OpenAI client instance
13
+ # Option 2: Register an existing OpenAI client instance
14
14
  client = OpenAI(api_key="your-api-key")
15
- pandas_ext.use(client)
15
+ pandas_ext.set_client(client)
16
16
 
17
- # Option 3: Use an existing Azure OpenAI client instance
17
+ # Option 3: Register an Azure OpenAI client instance
18
18
  azure_client = AzureOpenAI(
19
19
  api_key="your-azure-key",
20
20
  base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
21
21
  api_version="preview"
22
22
  )
23
- pandas_ext.use(azure_client)
23
+ pandas_ext.set_client(azure_client)
24
24
 
25
- # Option 4: Use async Azure OpenAI client instance
25
+ # Option 4: Register an async Azure OpenAI client instance
26
26
  async_azure_client = AsyncAzureOpenAI(
27
27
  api_key="your-azure-key",
28
28
  base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
29
29
  api_version="preview"
30
30
  )
31
- pandas_ext.use_async(async_azure_client)
31
+ pandas_ext.set_async_client(async_azure_client)
32
32
 
33
33
  # Set up model names (optional, defaults shown)
34
- pandas_ext.responses_model("gpt-4.1-mini")
35
- pandas_ext.embeddings_model("text-embedding-3-small")
34
+ pandas_ext.set_responses_model("gpt-4.1-mini")
35
+ pandas_ext.set_embeddings_model("text-embedding-3-small")
36
+
37
+ # Inspect current configuration
38
+ configured_model = pandas_ext.get_responses_model()
36
39
  ```
37
40
 
38
41
  This module provides `.ai` and `.aio` accessors for pandas Series and DataFrames
@@ -49,15 +52,6 @@ import numpy as np
49
52
  import pandas as pd
50
53
  import tiktoken
51
54
  from openai import AsyncOpenAI, OpenAI
52
-
53
- from openaivec._schema import InferredSchema, SchemaInferenceInput, SchemaInferer
54
-
55
- __all__ = [
56
- "embeddings_model",
57
- "responses_model",
58
- "use",
59
- "use_async",
60
- ]
61
55
  from pydantic import BaseModel
62
56
 
63
57
  from openaivec._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
@@ -65,13 +59,18 @@ from openaivec._model import EmbeddingsModelName, PreparedTask, ResponseFormat,
65
59
  from openaivec._provider import CONTAINER, _check_azure_v1_api_url
66
60
  from openaivec._proxy import AsyncBatchingMapProxy, BatchingMapProxy
67
61
  from openaivec._responses import AsyncBatchResponses, BatchResponses
62
+ from openaivec._schema import InferredSchema, SchemaInferenceInput, SchemaInferer
68
63
  from openaivec.task.table import FillNaResponse, fillna
69
64
 
70
65
  __all__ = [
71
- "use",
72
- "use_async",
73
- "responses_model",
74
- "embeddings_model",
66
+ "get_async_client",
67
+ "get_client",
68
+ "get_embeddings_model",
69
+ "get_responses_model",
70
+ "set_async_client",
71
+ "set_client",
72
+ "set_embeddings_model",
73
+ "set_responses_model",
75
74
  ]
76
75
 
77
76
  _LOGGER = logging.getLogger(__name__)
@@ -95,37 +94,51 @@ def _df_rows_to_json_series(df: pd.DataFrame) -> pd.Series:
95
94
  T = TypeVar("T") # For pipe function return type
96
95
 
97
96
 
98
- def use(client: OpenAI) -> None:
99
- """Register a custom OpenAIcompatible client.
97
+ def set_client(client: OpenAI) -> None:
98
+ """Register a custom OpenAI-compatible client for pandas helpers.
100
99
 
101
100
  Args:
102
- client (OpenAI): A preconfigured `openai.OpenAI` or
103
- `openai.AzureOpenAI` instance.
104
- The same instance is reused by every helper in this module.
101
+ client (OpenAI): A pre-configured `openai.OpenAI` or
102
+ `openai.AzureOpenAI` instance reused by every helper in this module.
105
103
  """
106
- # Check Azure v1 API URL if using AzureOpenAI client
107
104
  if client.__class__.__name__ == "AzureOpenAI" and hasattr(client, "base_url"):
108
105
  _check_azure_v1_api_url(str(client.base_url))
109
106
 
110
107
  CONTAINER.register(OpenAI, lambda: client)
111
108
 
112
109
 
113
- def use_async(client: AsyncOpenAI) -> None:
114
- """Register a custom asynchronous OpenAIcompatible client.
110
+ def get_client() -> OpenAI:
111
+ """Get the currently registered OpenAI-compatible client.
112
+
113
+ Returns:
114
+ OpenAI: The registered `openai.OpenAI` or `openai.AzureOpenAI` instance.
115
+ """
116
+ return CONTAINER.resolve(OpenAI)
117
+
118
+
119
+ def set_async_client(client: AsyncOpenAI) -> None:
120
+ """Register a custom asynchronous OpenAI-compatible client.
115
121
 
116
122
  Args:
117
- client (AsyncOpenAI): A preconfigured `openai.AsyncOpenAI` or
118
- `openai.AsyncAzureOpenAI` instance.
119
- The same instance is reused by every helper in this module.
123
+ client (AsyncOpenAI): A pre-configured `openai.AsyncOpenAI` or
124
+ `openai.AsyncAzureOpenAI` instance reused by every helper in this module.
120
125
  """
121
- # Check Azure v1 API URL if using AsyncAzureOpenAI client
122
126
  if client.__class__.__name__ == "AsyncAzureOpenAI" and hasattr(client, "base_url"):
123
127
  _check_azure_v1_api_url(str(client.base_url))
124
128
 
125
129
  CONTAINER.register(AsyncOpenAI, lambda: client)
126
130
 
127
131
 
128
- def responses_model(name: str) -> None:
132
+ def get_async_client() -> AsyncOpenAI:
133
+ """Get the currently registered asynchronous OpenAI-compatible client.
134
+
135
+ Returns:
136
+ AsyncOpenAI: The registered `openai.AsyncOpenAI` or `openai.AsyncAzureOpenAI` instance.
137
+ """
138
+ return CONTAINER.resolve(AsyncOpenAI)
139
+
140
+
141
+ def set_responses_model(name: str) -> None:
129
142
  """Override the model used for text responses.
130
143
 
131
144
  Args:
@@ -135,7 +148,16 @@ def responses_model(name: str) -> None:
135
148
  CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(name))
136
149
 
137
150
 
138
- def embeddings_model(name: str) -> None:
151
+ def get_responses_model() -> str:
152
+ """Get the currently registered model name for text responses.
153
+
154
+ Returns:
155
+ str: The model name (for example, ``gpt-4.1-mini``).
156
+ """
157
+ return CONTAINER.resolve(ResponsesModelName).value
158
+
159
+
160
+ def set_embeddings_model(name: str) -> None:
139
161
  """Override the model used for text embeddings.
140
162
 
141
163
  Args:
@@ -145,6 +167,15 @@ def embeddings_model(name: str) -> None:
145
167
  CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(name))
146
168
 
147
169
 
170
+ def get_embeddings_model() -> str:
171
+ """Get the currently registered model name for text embeddings.
172
+
173
+ Returns:
174
+ str: The model name (for example, ``text-embedding-3-small``).
175
+ """
176
+ return CONTAINER.resolve(EmbeddingsModelName).value
177
+
178
+
148
179
  def _extract_value(x, series_name):
149
180
  """Return a homogeneous ``dict`` representation of any Series value.
150
181
 
@@ -639,7 +670,7 @@ class OpenAIVecSeriesAccessor:
639
670
  animals.ai.count_tokens()
640
671
  ```
641
672
  This method uses the `tiktoken` library to count tokens based on the
642
- model name set by `responses_model`.
673
+ model name configured via `set_responses_model`.
643
674
 
644
675
  Returns:
645
676
  pandas.Series: Token counts for each element.
openaivec/spark.py CHANGED
@@ -433,7 +433,7 @@ def responses_udf(
433
433
 
434
434
  @pandas_udf(returnType=spark_schema) # type: ignore[call-overload]
435
435
  def structure_udf(col: Iterator[pd.Series]) -> Iterator[pd.DataFrame]:
436
- pandas_ext.responses_model(_model_name)
436
+ pandas_ext.set_responses_model(_model_name)
437
437
  response_format = deserialize_base_model(json_schema_string)
438
438
  cache = AsyncBatchingMapProxy[str, response_format](
439
439
  batch_size=batch_size,
@@ -460,7 +460,7 @@ def responses_udf(
460
460
 
461
461
  @pandas_udf(returnType=StringType()) # type: ignore[call-overload]
462
462
  def string_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
463
- pandas_ext.responses_model(_model_name)
463
+ pandas_ext.set_responses_model(_model_name)
464
464
  cache = AsyncBatchingMapProxy[str, str](
465
465
  batch_size=batch_size,
466
466
  max_concurrency=max_concurrency,
@@ -729,7 +729,7 @@ def embeddings_udf(
729
729
 
730
730
  @pandas_udf(returnType=ArrayType(FloatType())) # type: ignore[call-overload,misc]
731
731
  def _embeddings_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
732
- pandas_ext.embeddings_model(_model_name)
732
+ pandas_ext.set_embeddings_model(_model_name)
733
733
  cache = AsyncBatchingMapProxy[str, np.ndarray](
734
734
  batch_size=batch_size,
735
735
  max_concurrency=max_concurrency,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.14
3
+ Version: 0.15.0
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -26,6 +26,8 @@ Description-Content-Type: text/markdown
26
26
 
27
27
  # openaivec
28
28
 
29
+ [Contributor guidelines](AGENTS.md)
30
+
29
31
  **Transform your data analysis with AI-powered text processing at scale.**
30
32
 
31
33
  **openaivec** enables data analysts to seamlessly integrate OpenAI's language models into their pandas and Spark workflows. Process thousands of text records with natural language instructions, turning unstructured data into actionable insights with just a few lines of code.
@@ -187,13 +189,13 @@ os.environ["OPENAI_API_KEY"] = "your-api-key-here"
187
189
 
188
190
  # Authentication Option 2: Custom client (optional)
189
191
  # from openai import OpenAI, AsyncOpenAI
190
- # pandas_ext.use(OpenAI())
192
+ # pandas_ext.set_client(OpenAI())
191
193
  # For async operations:
192
- # pandas_ext.use_async(AsyncOpenAI())
194
+ # pandas_ext.set_async_client(AsyncOpenAI())
193
195
 
194
196
  # Configure model (optional - defaults to gpt-4.1-mini)
195
197
  # For Azure OpenAI: use your deployment name, for OpenAI: use model name
196
- pandas_ext.responses_model("gpt-4.1-mini")
198
+ pandas_ext.set_responses_model("gpt-4.1-mini")
197
199
 
198
200
  # Create your data
199
201
  df = pd.DataFrame({"name": ["panda", "rabbit", "koala"]})
@@ -220,7 +222,7 @@ When using reasoning models (o1-preview, o1-mini, o3-mini, etc.), you must set `
220
222
 
221
223
  ```python
222
224
  # For reasoning models like o1-preview, o1-mini, o3-mini
223
- pandas_ext.responses_model("o1-mini") # Set your reasoning model
225
+ pandas_ext.set_responses_model("o1-mini") # Set your reasoning model
224
226
 
225
227
  # MUST use temperature=None with reasoning models
226
228
  result = df.assign(
@@ -291,7 +293,7 @@ import pandas as pd
291
293
  from openaivec import pandas_ext
292
294
 
293
295
  # Setup (same as synchronous version)
294
- pandas_ext.responses_model("gpt-4.1-mini")
296
+ pandas_ext.set_responses_model("gpt-4.1-mini")
295
297
 
296
298
  df = pd.DataFrame({"text": [
297
299
  "This product is amazing!",
@@ -12,8 +12,8 @@ openaivec/_responses.py,sha256=qBrYv4qblDIs5dRvj9t96r8UfAJmy4ZvtAe6csNZ7oM,20412
12
12
  openaivec/_schema.py,sha256=iOeR5J_ihZRDZtzmqvOK1ZtInKcx4OnoR38DB3VmmQw,15666
13
13
  openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
14
14
  openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
15
- openaivec/pandas_ext.py,sha256=r2jpFqDnWcQYK3pMv5hCtOStOMltccDyLkpprLmIOls,85715
16
- openaivec/spark.py,sha256=5-89uy2K-23Z_j1aRa84Gvl8DV0lusnkRI1zxuFeOEA,34020
15
+ openaivec/pandas_ext.py,sha256=1euz52rwKpUBvWRCKvkDjwCFf_zNYnf60wF5OXHiCqw,86727
16
+ openaivec/spark.py,sha256=8-Hap36D0kcyV8RMA-PyFjZxfAnMfgtcp9gKASRnUwU,34032
17
17
  openaivec/task/__init__.py,sha256=RkYIKrcE83M_9Um9cSMkeGzL9kPRAovajfRvr31YxLE,6178
18
18
  openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
19
19
  openaivec/task/customer_support/customer_sentiment.py,sha256=d8spZUtImjePK0xWGvIW98ghbdyOZ0KEZmaUpG8QB7M,7532
@@ -31,7 +31,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=u-zpqAaQYcr7I3mqMv_CTJXkfxtoLft3
31
31
  openaivec/task/nlp/translation.py,sha256=kgWj2oN8pUId3vuHTJNx636gB49AGEKXWICA_XJgE_0,6628
32
32
  openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
33
33
  openaivec/task/table/fillna.py,sha256=zL6m5hGD4kamV7qHETnn__B59wIY540Ks0EzNgUJgdI,6888
34
- openaivec-0.14.14.dist-info/METADATA,sha256=SlUl_cvN1l-4ZxO5-g8jXxCupez29wDeRICq0c6qH3k,28216
35
- openaivec-0.14.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
- openaivec-0.14.14.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
- openaivec-0.14.14.dist-info/RECORD,,
34
+ openaivec-0.15.0.dist-info/METADATA,sha256=cVTYsT6TOMij_vagDgsIbo886U24Ys5dkah7ZvdEkdw,28278
35
+ openaivec-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
+ openaivec-0.15.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
+ openaivec-0.15.0.dist-info/RECORD,,