openaivec 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/_proxy.py +24 -2
- openaivec/_responses.py +77 -25
- openaivec/_schema.py +454 -0
- openaivec/pandas_ext.py +559 -423
- openaivec/spark.py +21 -1
- {openaivec-0.14.2.dist-info → openaivec-0.14.4.dist-info}/METADATA +1 -1
- {openaivec-0.14.2.dist-info → openaivec-0.14.4.dist-info}/RECORD +9 -8
- {openaivec-0.14.2.dist-info → openaivec-0.14.4.dist-info}/WHEEL +0 -0
- {openaivec-0.14.2.dist-info → openaivec-0.14.4.dist-info}/licenses/LICENSE +0 -0
openaivec/spark.py
CHANGED
|
@@ -237,6 +237,7 @@ def responses_udf(
|
|
|
237
237
|
temperature: float | None = 0.0,
|
|
238
238
|
top_p: float = 1.0,
|
|
239
239
|
max_concurrency: int = 8,
|
|
240
|
+
**api_kwargs,
|
|
240
241
|
) -> UserDefinedFunction:
|
|
241
242
|
"""Create an asynchronous Spark pandas UDF for generating responses.
|
|
242
243
|
|
|
@@ -276,6 +277,11 @@ def responses_udf(
|
|
|
276
277
|
Higher values increase throughput but may hit OpenAI rate limits.
|
|
277
278
|
Recommended: 4-12 per executor. Defaults to 8.
|
|
278
279
|
|
|
280
|
+
Additional Keyword Args:
|
|
281
|
+
Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
|
|
282
|
+
``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
|
|
283
|
+
These parameters are applied to all API requests made by the UDF.
|
|
284
|
+
|
|
279
285
|
Returns:
|
|
280
286
|
UserDefinedFunction: A Spark pandas UDF configured to generate responses asynchronously.
|
|
281
287
|
Output schema is `StringType` or a struct derived from `response_format`.
|
|
@@ -313,6 +319,7 @@ def responses_udf(
|
|
|
313
319
|
temperature=temperature,
|
|
314
320
|
top_p=top_p,
|
|
315
321
|
cache=cache,
|
|
322
|
+
**api_kwargs,
|
|
316
323
|
)
|
|
317
324
|
)
|
|
318
325
|
yield pd.DataFrame(predictions.map(_safe_dump).tolist())
|
|
@@ -340,6 +347,7 @@ def responses_udf(
|
|
|
340
347
|
temperature=temperature,
|
|
341
348
|
top_p=top_p,
|
|
342
349
|
cache=cache,
|
|
350
|
+
**api_kwargs,
|
|
343
351
|
)
|
|
344
352
|
)
|
|
345
353
|
yield predictions.map(_safe_cast_str)
|
|
@@ -357,6 +365,7 @@ def task_udf(
|
|
|
357
365
|
model_name: str = "gpt-4.1-mini",
|
|
358
366
|
batch_size: int | None = None,
|
|
359
367
|
max_concurrency: int = 8,
|
|
368
|
+
**api_kwargs,
|
|
360
369
|
) -> UserDefinedFunction:
|
|
361
370
|
"""Create an asynchronous Spark pandas UDF from a predefined task.
|
|
362
371
|
|
|
@@ -381,6 +390,12 @@ def task_udf(
|
|
|
381
390
|
Higher values increase throughput but may hit OpenAI rate limits.
|
|
382
391
|
Recommended: 4-12 per executor. Defaults to 8.
|
|
383
392
|
|
|
393
|
+
Additional Keyword Args:
|
|
394
|
+
Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
|
|
395
|
+
``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
|
|
396
|
+
These parameters are applied to all API requests made by the UDF and override any
|
|
397
|
+
parameters set in the task configuration.
|
|
398
|
+
|
|
384
399
|
Returns:
|
|
385
400
|
UserDefinedFunction: A Spark pandas UDF configured to execute the specified task
|
|
386
401
|
asynchronously with automatic caching for duplicate inputs within each partition.
|
|
@@ -429,6 +444,7 @@ def task_udf(
|
|
|
429
444
|
temperature=task_temperature,
|
|
430
445
|
top_p=task_top_p,
|
|
431
446
|
cache=cache,
|
|
447
|
+
**api_kwargs,
|
|
432
448
|
)
|
|
433
449
|
)
|
|
434
450
|
yield pd.DataFrame(predictions.map(_safe_dump).tolist())
|
|
@@ -456,6 +472,7 @@ def task_udf(
|
|
|
456
472
|
temperature=task_temperature,
|
|
457
473
|
top_p=task_top_p,
|
|
458
474
|
cache=cache,
|
|
475
|
+
**api_kwargs,
|
|
459
476
|
)
|
|
460
477
|
)
|
|
461
478
|
yield predictions.map(_safe_cast_str)
|
|
@@ -594,7 +611,10 @@ def similarity_udf() -> UserDefinedFunction:
|
|
|
594
611
|
Cosine similarity between the two vectors.
|
|
595
612
|
"""
|
|
596
613
|
# Import pandas_ext to ensure .ai accessor is available in Spark workers
|
|
597
|
-
from openaivec import pandas_ext
|
|
614
|
+
from openaivec import pandas_ext
|
|
615
|
+
|
|
616
|
+
# Explicitly reference pandas_ext to satisfy linters
|
|
617
|
+
assert pandas_ext is not None
|
|
598
618
|
|
|
599
619
|
return pd.DataFrame({"a": a, "b": b}).ai.similarity("a", "b")
|
|
600
620
|
|
|
@@ -6,12 +6,13 @@ openaivec/_model.py,sha256=xg3s9Ljqb2xK1t_a5bwWxGJfFSIuaNrFGMgQq4nQKrM,3351
|
|
|
6
6
|
openaivec/_optimize.py,sha256=-mKjD5YV_d1Z2nqfGfAcmx6mTKn6AODjFTrIKJPbAXQ,3851
|
|
7
7
|
openaivec/_prompt.py,sha256=KoJbFK4gTEDRtu9OMweJq_jQLkSPFy2Kcvao30qKhAQ,20844
|
|
8
8
|
openaivec/_provider.py,sha256=dNr9Y2C97GK-pkY81odurKoDup59dLK31V3EGT2HOwE,6711
|
|
9
|
-
openaivec/_proxy.py,sha256=
|
|
10
|
-
openaivec/_responses.py,sha256=
|
|
9
|
+
openaivec/_proxy.py,sha256=J0qGDcZqSab26ScA8OXxzornfwuXtrVycqup-JPq464,29719
|
|
10
|
+
openaivec/_responses.py,sha256=xtkiOn01RkauHq2FAKRAcjPglH8rmbaSz0-VE0ClTe8,24026
|
|
11
|
+
openaivec/_schema.py,sha256=9enwqE2idLLUKbQxjiNn09uhdKz14kihEwUXglRqxx0,20543
|
|
11
12
|
openaivec/_serialize.py,sha256=NLCKl4opc1WS24_duwpI2UGBepQ8SBh4YRxBlLwzDLw,8403
|
|
12
13
|
openaivec/_util.py,sha256=dFWwjouJyvF-tqNPs2933OAt5Fw9I2Q2BvmGIfGH5k4,6423
|
|
13
|
-
openaivec/pandas_ext.py,sha256=
|
|
14
|
-
openaivec/spark.py,sha256=
|
|
14
|
+
openaivec/pandas_ext.py,sha256=m4H6mrE__Jmr5R6hl6d8yc2JhVT0-wdf5GOKWIITeLU,63366
|
|
15
|
+
openaivec/spark.py,sha256=lI-noacLvuxu6gBztKdcYd9vfK3eNI3aCGwJylkzv7E,25367
|
|
15
16
|
openaivec/task/__init__.py,sha256=lrgoc9UIox7XnxZ96dQRl88a-8QfuZRFBHshxctpMB8,6178
|
|
16
17
|
openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
|
|
17
18
|
openaivec/task/customer_support/customer_sentiment.py,sha256=r_NJEz11zdMCw6x8S2jqEhcFZSJDn0Plgf0ED8JlvxQ,7618
|
|
@@ -29,7 +30,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=BNwWtNT-MNA76eIJbb31641upukmRwM9
|
|
|
29
30
|
openaivec/task/nlp/translation.py,sha256=XTZM11JFjbgTK9wHnxFgVDabXZ5bqbabXK_bq2nEkyQ,6627
|
|
30
31
|
openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
|
|
31
32
|
openaivec/task/table/fillna.py,sha256=ZVcOpuh7ULVhrt1VsWy5fPhk53XNaiD7kXGCPhh83M8,6636
|
|
32
|
-
openaivec-0.14.
|
|
33
|
-
openaivec-0.14.
|
|
34
|
-
openaivec-0.14.
|
|
35
|
-
openaivec-0.14.
|
|
33
|
+
openaivec-0.14.4.dist-info/METADATA,sha256=RF6rZDL5B4qYCqXIbC0jexv-IzHv48WBDV-MZtNHcvY,27566
|
|
34
|
+
openaivec-0.14.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
35
|
+
openaivec-0.14.4.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
36
|
+
openaivec-0.14.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|