openaivec 0.14.13__py3-none-any.whl → 0.14.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/spark.py CHANGED
@@ -193,8 +193,6 @@ def setup(
193
193
  CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(responses_model_name))
194
194
 
195
195
  if embeddings_model_name:
196
- from openaivec._model import EmbeddingsModelName
197
-
198
196
  CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(embeddings_model_name))
199
197
 
200
198
  CONTAINER.clear_singletons()
@@ -244,6 +242,50 @@ def setup_azure(
244
242
  CONTAINER.clear_singletons()
245
243
 
246
244
 
245
+ def set_responses_model(model_name: str):
246
+ """Set the default model name for response generation in the DI container.
247
+
248
+ Args:
249
+ model_name (str): The model name to set as default for responses.
250
+ """
251
+ CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(model_name))
252
+ CONTAINER.clear_singletons()
253
+
254
+
255
+ def get_responses_model() -> str | None:
256
+ """Get the default model name for response generation from the DI container.
257
+
258
+ Returns:
259
+ str | None: The default model name for responses, or None if not set.
260
+ """
261
+ try:
262
+ return CONTAINER.resolve(ResponsesModelName).value
263
+ except Exception:
264
+ return None
265
+
266
+
267
+ def set_embeddings_model(model_name: str):
268
+ """Set the default model name for embeddings in the DI container.
269
+
270
+ Args:
271
+ model_name (str): The model name to set as default for embeddings.
272
+ """
273
+ CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(model_name))
274
+ CONTAINER.clear_singletons()
275
+
276
+
277
+ def get_embeddings_model() -> str | None:
278
+ """Get the default model name for embeddings from the DI container.
279
+
280
+ Returns:
281
+ str | None: The default model name for embeddings, or None if not set.
282
+ """
283
+ try:
284
+ return CONTAINER.resolve(EmbeddingsModelName).value
285
+ except Exception:
286
+ return None
287
+
288
+
247
289
  def _python_type_to_spark(python_type):
248
290
  origin = get_origin(python_type)
249
291
 
@@ -322,7 +364,7 @@ def _safe_dump(x: BaseModel | None) -> dict:
322
364
  def responses_udf(
323
365
  instructions: str,
324
366
  response_format: type[ResponseFormat] = str,
325
- model_name: str = CONTAINER.resolve(ResponsesModelName).value,
367
+ model_name: str | None = None,
326
368
  batch_size: int | None = None,
327
369
  max_concurrency: int = 8,
328
370
  **api_kwargs,
@@ -351,8 +393,9 @@ def responses_udf(
351
393
  instructions (str): The system prompt or instructions for the model.
352
394
  response_format (type[ResponseFormat]): The desired output format. Either `str` for plain text
353
395
  or a Pydantic `BaseModel` for structured JSON output. Defaults to `str`.
354
- model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
355
- For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
396
+ model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
397
+ For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
398
+ via ResponsesModelName if not provided.
356
399
  batch_size (int | None): Number of rows per async batch request within each partition.
357
400
  Larger values reduce API call overhead but increase memory usage.
358
401
  Defaults to None (automatic batch size optimization that dynamically
@@ -382,13 +425,15 @@ def responses_udf(
382
425
  - Consider your OpenAI tier limits: total_requests = max_concurrency × executors
383
426
  - Use Spark UI to optimize partition sizes relative to batch_size
384
427
  """
428
+ _model_name = model_name or CONTAINER.resolve(ResponsesModelName).value
429
+
385
430
  if issubclass(response_format, BaseModel):
386
431
  spark_schema = _pydantic_to_spark_schema(response_format)
387
432
  json_schema_string = serialize_base_model(response_format)
388
433
 
389
434
  @pandas_udf(returnType=spark_schema) # type: ignore[call-overload]
390
435
  def structure_udf(col: Iterator[pd.Series]) -> Iterator[pd.DataFrame]:
391
- pandas_ext.responses_model(model_name)
436
+ pandas_ext.responses_model(_model_name)
392
437
  response_format = deserialize_base_model(json_schema_string)
393
438
  cache = AsyncBatchingMapProxy[str, response_format](
394
439
  batch_size=batch_size,
@@ -415,7 +460,7 @@ def responses_udf(
415
460
 
416
461
  @pandas_udf(returnType=StringType()) # type: ignore[call-overload]
417
462
  def string_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
418
- pandas_ext.responses_model(model_name)
463
+ pandas_ext.responses_model(_model_name)
419
464
  cache = AsyncBatchingMapProxy[str, str](
420
465
  batch_size=batch_size,
421
466
  max_concurrency=max_concurrency,
@@ -443,7 +488,7 @@ def responses_udf(
443
488
 
444
489
  def task_udf(
445
490
  task: PreparedTask[ResponseFormat],
446
- model_name: str = CONTAINER.resolve(ResponsesModelName).value,
491
+ model_name: str | None = None,
447
492
  batch_size: int | None = None,
448
493
  max_concurrency: int = 8,
449
494
  **api_kwargs,
@@ -459,8 +504,9 @@ def task_udf(
459
504
  Args:
460
505
  task (PreparedTask): A predefined task configuration containing instructions,
461
506
  response format, and API parameters.
462
- model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
463
- For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
507
+ model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
508
+ For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
509
+ via ResponsesModelName if not provided.
464
510
  batch_size (int | None): Number of rows per async batch request within each partition.
465
511
  Larger values reduce API call overhead but increase memory usage.
466
512
  Defaults to None (automatic batch size optimization that dynamically
@@ -550,7 +596,7 @@ def parse_udf(
550
596
  example_table_name: str | None = None,
551
597
  example_field_name: str | None = None,
552
598
  max_examples: int = 100,
553
- model_name: str = CONTAINER.resolve(ResponsesModelName).value,
599
+ model_name: str | None = None,
554
600
  batch_size: int | None = None,
555
601
  max_concurrency: int = 8,
556
602
  **api_kwargs,
@@ -574,8 +620,9 @@ def parse_udf(
574
620
  If provided, `example_table_name` must also be specified.
575
621
  max_examples (int): Maximum number of examples to retrieve for schema inference.
576
622
  Defaults to 100.
577
- model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
578
- For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
623
+ model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
624
+ For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container
625
+ via ResponsesModelName if not provided.
579
626
  batch_size (int | None): Number of rows per async batch request within each partition.
580
627
  Larger values reduce API call overhead but increase memory usage.
581
628
  Defaults to None (automatic batch size optimization that dynamically
@@ -622,7 +669,7 @@ def parse_udf(
622
669
 
623
670
 
624
671
  def embeddings_udf(
625
- model_name: str = CONTAINER.resolve(EmbeddingsModelName).value,
672
+ model_name: str | None = None,
626
673
  batch_size: int | None = None,
627
674
  max_concurrency: int = 8,
628
675
  **api_kwargs,
@@ -648,9 +695,9 @@ def embeddings_udf(
648
695
  sc.environment["AZURE_OPENAI_API_VERSION"] = "preview"
649
696
 
650
697
  Args:
651
- model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-embedding-deployment").
698
+ model_name (str | None): For Azure OpenAI, use your deployment name (e.g., "my-embedding-deployment").
652
699
  For OpenAI, use the model name (e.g., "text-embedding-3-small").
653
- Defaults to configured model in DI container.
700
+ Defaults to configured model in DI container via EmbeddingsModelName if not provided.
654
701
  batch_size (int | None): Number of rows per async batch request within each partition.
655
702
  Larger values reduce API call overhead but increase memory usage.
656
703
  Defaults to None (automatic batch size optimization that dynamically
@@ -678,9 +725,11 @@ def embeddings_udf(
678
725
  - Use larger batch_size for embeddings compared to response generation
679
726
  """
680
727
 
728
+ _model_name = model_name or CONTAINER.resolve(EmbeddingsModelName).value
729
+
681
730
  @pandas_udf(returnType=ArrayType(FloatType())) # type: ignore[call-overload,misc]
682
731
  def _embeddings_udf(col: Iterator[pd.Series]) -> Iterator[pd.Series]:
683
- pandas_ext.embeddings_model(model_name)
732
+ pandas_ext.embeddings_model(_model_name)
684
733
  cache = AsyncBatchingMapProxy[str, np.ndarray](
685
734
  batch_size=batch_size,
686
735
  max_concurrency=max_concurrency,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.13
3
+ Version: 0.14.14
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -13,7 +13,7 @@ openaivec/_schema.py,sha256=iOeR5J_ihZRDZtzmqvOK1ZtInKcx4OnoR38DB3VmmQw,15666
13
13
  openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
14
14
  openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
15
15
  openaivec/pandas_ext.py,sha256=r2jpFqDnWcQYK3pMv5hCtOStOMltccDyLkpprLmIOls,85715
16
- openaivec/spark.py,sha256=zaEivVOe3ukG8coa9JEUyISQ1YcMqCvAbhaarvn2SOM,32507
16
+ openaivec/spark.py,sha256=5-89uy2K-23Z_j1aRa84Gvl8DV0lusnkRI1zxuFeOEA,34020
17
17
  openaivec/task/__init__.py,sha256=RkYIKrcE83M_9Um9cSMkeGzL9kPRAovajfRvr31YxLE,6178
18
18
  openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
19
19
  openaivec/task/customer_support/customer_sentiment.py,sha256=d8spZUtImjePK0xWGvIW98ghbdyOZ0KEZmaUpG8QB7M,7532
@@ -31,7 +31,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=u-zpqAaQYcr7I3mqMv_CTJXkfxtoLft3
31
31
  openaivec/task/nlp/translation.py,sha256=kgWj2oN8pUId3vuHTJNx636gB49AGEKXWICA_XJgE_0,6628
32
32
  openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
33
33
  openaivec/task/table/fillna.py,sha256=zL6m5hGD4kamV7qHETnn__B59wIY540Ks0EzNgUJgdI,6888
34
- openaivec-0.14.13.dist-info/METADATA,sha256=rB_WJhIVX11WUoA-r2Ryn57QIuTWj0q0JhjPlz6wXv4,28216
35
- openaivec-0.14.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
- openaivec-0.14.13.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
- openaivec-0.14.13.dist-info/RECORD,,
34
+ openaivec-0.14.14.dist-info/METADATA,sha256=SlUl_cvN1l-4ZxO5-g8jXxCupez29wDeRICq0c6qH3k,28216
35
+ openaivec-0.14.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
+ openaivec-0.14.14.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
+ openaivec-0.14.14.dist-info/RECORD,,