openaivec 0.15.0__py3-none-any.whl → 0.99.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/__init__.py CHANGED
@@ -1,13 +1,18 @@
1
1
  from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
2
2
  from ._model import PreparedTask
3
- from ._prompt import FewShotPromptBuilder
3
+ from ._prompt import FewShotPrompt, FewShotPromptBuilder
4
4
  from ._responses import AsyncBatchResponses, BatchResponses
5
+ from ._schema import InferredSchema, SchemaInferenceInput, SchemaInferer
5
6
 
6
7
  __all__ = [
7
8
  "AsyncBatchEmbeddings",
8
9
  "AsyncBatchResponses",
9
10
  "BatchEmbeddings",
10
11
  "BatchResponses",
12
+ "FewShotPrompt",
11
13
  "FewShotPromptBuilder",
14
+ "InferredSchema",
12
15
  "PreparedTask",
16
+ "SchemaInferenceInput",
17
+ "SchemaInferer",
13
18
  ]
openaivec/_proxy.py CHANGED
@@ -199,7 +199,7 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
199
199
  # - If positive integer: Fixed batch size
200
200
  # - If <= 0: Process all items at once
201
201
  batch_size: int | None = None
202
- show_progress: bool = False
202
+ show_progress: bool = True
203
203
  suggester: BatchSizeSuggester = field(default_factory=BatchSizeSuggester, repr=False)
204
204
 
205
205
  # internals
@@ -509,7 +509,7 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
509
509
  # - If <= 0: Process all items at once
510
510
  batch_size: int | None = None
511
511
  max_concurrency: int = 8
512
- show_progress: bool = False
512
+ show_progress: bool = True
513
513
  suggester: BatchSizeSuggester = field(default_factory=BatchSizeSuggester, repr=False)
514
514
 
515
515
  # internals
openaivec/pandas_ext.py CHANGED
@@ -249,7 +249,7 @@ class OpenAIVecSeriesAccessor:
249
249
  instructions: str,
250
250
  response_format: type[ResponseFormat] = str,
251
251
  batch_size: int | None = None,
252
- show_progress: bool = False,
252
+ show_progress: bool = True,
253
253
  **api_kwargs,
254
254
  ) -> pd.Series:
255
255
  """Call an LLM once for every Series element.
@@ -282,7 +282,7 @@ class OpenAIVecSeriesAccessor:
282
282
  batch_size (int | None, optional): Number of prompts grouped into a single
283
283
  request. Defaults to ``None`` (automatic batch size optimization
284
284
  based on execution time). Set to a positive integer for fixed batch size.
285
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
285
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
286
286
  **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
287
287
 
288
288
  Returns:
@@ -341,7 +341,7 @@ class OpenAIVecSeriesAccessor:
341
341
  name=self._obj.name,
342
342
  )
343
343
 
344
- def embeddings(self, batch_size: int | None = None, show_progress: bool = False, **api_kwargs) -> pd.Series:
344
+ def embeddings(self, batch_size: int | None = None, show_progress: bool = True, **api_kwargs) -> pd.Series:
345
345
  """Compute OpenAI embeddings for every Series element.
346
346
 
347
347
  Example:
@@ -362,7 +362,7 @@ class OpenAIVecSeriesAccessor:
362
362
  batch_size (int | None, optional): Number of inputs grouped into a
363
363
  single request. Defaults to ``None`` (automatic batch size optimization
364
364
  based on execution time). Set to a positive integer for fixed batch size.
365
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
365
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
366
366
  **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
367
367
 
368
368
  Returns:
@@ -417,7 +417,7 @@ class OpenAIVecSeriesAccessor:
417
417
  self,
418
418
  task: PreparedTask,
419
419
  batch_size: int | None = None,
420
- show_progress: bool = False,
420
+ show_progress: bool = True,
421
421
  ) -> pd.Series:
422
422
  """Execute a prepared task on every Series element.
423
423
 
@@ -447,7 +447,7 @@ class OpenAIVecSeriesAccessor:
447
447
  batch_size (int | None, optional): Number of prompts grouped into a single
448
448
  request to optimize API usage. Defaults to ``None`` (automatic batch size
449
449
  optimization based on execution time). Set to a positive integer for fixed batch size.
450
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
450
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
451
451
 
452
452
  Note:
453
453
  The task's stored API parameters are used. Core batching / routing keys
@@ -520,7 +520,7 @@ class OpenAIVecSeriesAccessor:
520
520
  response_format: type[ResponseFormat] | None = None,
521
521
  max_examples: int = 100,
522
522
  batch_size: int | None = None,
523
- show_progress: bool = False,
523
+ show_progress: bool = True,
524
524
  **api_kwargs,
525
525
  ) -> pd.Series:
526
526
  """Parse Series values into structured data using an LLM.
@@ -544,7 +544,7 @@ class OpenAIVecSeriesAccessor:
544
544
  batch_size (int | None, optional): Number of requests to process
545
545
  per batch. None enables automatic optimization. Defaults to None.
546
546
  show_progress (bool, optional): Display progress bar in Jupyter
547
- notebooks. Defaults to False.
547
+ notebooks. Defaults to True.
548
548
  **api_kwargs: Additional OpenAI API parameters (temperature, top_p,
549
549
  frequency_penalty, presence_penalty, seed, etc.).
550
550
 
@@ -770,7 +770,7 @@ class OpenAIVecDataFrameAccessor:
770
770
  instructions: str,
771
771
  response_format: type[ResponseFormat] = str,
772
772
  batch_size: int | None = None,
773
- show_progress: bool = False,
773
+ show_progress: bool = True,
774
774
  **api_kwargs,
775
775
  ) -> pd.Series:
776
776
  """Generate a response for each row after serializing it to JSON.
@@ -801,7 +801,7 @@ class OpenAIVecDataFrameAccessor:
801
801
  batch_size (int | None, optional): Number of requests sent in one batch.
802
802
  Defaults to ``None`` (automatic batch size optimization
803
803
  based on execution time). Set to a positive integer for fixed batch size.
804
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
804
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
805
805
  **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
806
806
 
807
807
  Returns:
@@ -840,7 +840,7 @@ class OpenAIVecDataFrameAccessor:
840
840
  self,
841
841
  task: PreparedTask,
842
842
  batch_size: int | None = None,
843
- show_progress: bool = False,
843
+ show_progress: bool = True,
844
844
  ) -> pd.Series:
845
845
  """Execute a prepared task on each DataFrame row after serializing it to JSON.
846
846
 
@@ -874,7 +874,7 @@ class OpenAIVecDataFrameAccessor:
874
874
  batch_size (int | None, optional): Number of requests sent in one batch
875
875
  to optimize API usage. Defaults to ``None`` (automatic batch size
876
876
  optimization based on execution time). Set to a positive integer for fixed batch size.
877
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
877
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
878
878
 
879
879
  Note:
880
880
  The task's stored API parameters are used. Core batching / routing
@@ -939,7 +939,7 @@ class OpenAIVecDataFrameAccessor:
939
939
  response_format: type[ResponseFormat] | None = None,
940
940
  max_examples: int = 100,
941
941
  batch_size: int | None = None,
942
- show_progress: bool = False,
942
+ show_progress: bool = True,
943
943
  **api_kwargs,
944
944
  ) -> pd.Series:
945
945
  """Parse DataFrame rows into structured data using an LLM.
@@ -960,7 +960,7 @@ class OpenAIVecDataFrameAccessor:
960
960
  batch_size (int | None, optional): Rows per API batch. None
961
961
  enables automatic optimization. Defaults to None.
962
962
  show_progress (bool, optional): Show progress bar in Jupyter
963
- notebooks. Defaults to False.
963
+ notebooks. Defaults to True.
964
964
  **api_kwargs: Additional OpenAI API parameters.
965
965
 
966
966
  Returns:
@@ -1084,7 +1084,7 @@ class OpenAIVecDataFrameAccessor:
1084
1084
  target_column_name: str,
1085
1085
  max_examples: int = 500,
1086
1086
  batch_size: int | None = None,
1087
- show_progress: bool = False,
1087
+ show_progress: bool = True,
1088
1088
  ) -> pd.DataFrame:
1089
1089
  """Fill missing values in a DataFrame column using AI-powered inference.
1090
1090
 
@@ -1102,7 +1102,7 @@ class OpenAIVecDataFrameAccessor:
1102
1102
  batch_size (int | None, optional): Number of requests sent in one batch
1103
1103
  to optimize API usage. Defaults to ``None`` (automatic batch size
1104
1104
  optimization based on execution time). Set to a positive integer for fixed batch size.
1105
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1105
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
1106
1106
 
1107
1107
  Returns:
1108
1108
  pandas.DataFrame: A new DataFrame with missing values filled in the target
@@ -1251,7 +1251,7 @@ class AsyncOpenAIVecSeriesAccessor:
1251
1251
  response_format: type[ResponseFormat] = str,
1252
1252
  batch_size: int | None = None,
1253
1253
  max_concurrency: int = 8,
1254
- show_progress: bool = False,
1254
+ show_progress: bool = True,
1255
1255
  **api_kwargs,
1256
1256
  ) -> pd.Series:
1257
1257
  """Call an LLM once for every Series element (asynchronously).
@@ -1281,7 +1281,7 @@ class AsyncOpenAIVecSeriesAccessor:
1281
1281
  based on execution time). Set to a positive integer for fixed batch size.
1282
1282
  max_concurrency (int, optional): Maximum number of concurrent
1283
1283
  requests. Defaults to ``8``.
1284
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1284
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
1285
1285
  **api_kwargs: Additional keyword arguments forwarded verbatim to
1286
1286
  ``AsyncOpenAI.responses.parse`` (e.g. ``temperature``, ``top_p``,
1287
1287
  ``max_output_tokens``, penalties, future parameters). Core batching keys
@@ -1360,7 +1360,7 @@ class AsyncOpenAIVecSeriesAccessor:
1360
1360
  )
1361
1361
 
1362
1362
  async def embeddings(
1363
- self, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = False, **api_kwargs
1363
+ self, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = True, **api_kwargs
1364
1364
  ) -> pd.Series:
1365
1365
  """Compute OpenAI embeddings for every Series element (asynchronously).
1366
1366
 
@@ -1385,7 +1385,7 @@ class AsyncOpenAIVecSeriesAccessor:
1385
1385
  based on execution time). Set to a positive integer for fixed batch size.
1386
1386
  max_concurrency (int, optional): Maximum number of concurrent
1387
1387
  requests. Defaults to ``8``.
1388
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1388
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
1389
1389
  **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
1390
1390
 
1391
1391
  Returns:
@@ -1467,7 +1467,7 @@ class AsyncOpenAIVecSeriesAccessor:
1467
1467
  task: PreparedTask,
1468
1468
  batch_size: int | None = None,
1469
1469
  max_concurrency: int = 8,
1470
- show_progress: bool = False,
1470
+ show_progress: bool = True,
1471
1471
  ) -> pd.Series:
1472
1472
  """Execute a prepared task on every Series element (asynchronously).
1473
1473
 
@@ -1500,7 +1500,7 @@ class AsyncOpenAIVecSeriesAccessor:
1500
1500
  optimization based on execution time). Set to a positive integer for fixed batch size.
1501
1501
  max_concurrency (int, optional): Maximum number of concurrent
1502
1502
  requests. Defaults to 8.
1503
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1503
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
1504
1504
 
1505
1505
  Note:
1506
1506
  The task's stored API parameters are used. Core batching / routing
@@ -1575,7 +1575,7 @@ class AsyncOpenAIVecSeriesAccessor:
1575
1575
  max_examples: int = 100,
1576
1576
  batch_size: int | None = None,
1577
1577
  max_concurrency: int = 8,
1578
- show_progress: bool = False,
1578
+ show_progress: bool = True,
1579
1579
  **api_kwargs,
1580
1580
  ) -> pd.Series:
1581
1581
  """Parse Series values into structured data using an LLM (asynchronously).
@@ -1594,7 +1594,7 @@ class AsyncOpenAIVecSeriesAccessor:
1594
1594
  automatic optimization. Defaults to None.
1595
1595
  max_concurrency (int, optional): Maximum concurrent API requests.
1596
1596
  Defaults to 8.
1597
- show_progress (bool, optional): Show progress bar. Defaults to False.
1597
+ show_progress (bool, optional): Show progress bar. Defaults to True.
1598
1598
  **api_kwargs: Additional OpenAI API parameters.
1599
1599
 
1600
1600
  Returns:
@@ -1696,7 +1696,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1696
1696
  response_format: type[ResponseFormat] = str,
1697
1697
  batch_size: int | None = None,
1698
1698
  max_concurrency: int = 8,
1699
- show_progress: bool = False,
1699
+ show_progress: bool = True,
1700
1700
  **api_kwargs,
1701
1701
  ) -> pd.Series:
1702
1702
  """Generate a response for each row after serializing it to JSON (asynchronously).
@@ -1731,7 +1731,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1731
1731
  **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
1732
1732
  max_concurrency (int, optional): Maximum number of concurrent
1733
1733
  requests. Defaults to ``8``.
1734
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1734
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
1735
1735
 
1736
1736
  Returns:
1737
1737
  pandas.Series: Responses aligned with the DataFrame's original index.
@@ -1780,7 +1780,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1780
1780
  task: PreparedTask,
1781
1781
  batch_size: int | None = None,
1782
1782
  max_concurrency: int = 8,
1783
- show_progress: bool = False,
1783
+ show_progress: bool = True,
1784
1784
  ) -> pd.Series:
1785
1785
  """Execute a prepared task on each DataFrame row after serializing it to JSON (asynchronously).
1786
1786
 
@@ -1817,7 +1817,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1817
1817
  optimization based on execution time). Set to a positive integer for fixed batch size.
1818
1818
  max_concurrency (int, optional): Maximum number of concurrent
1819
1819
  requests. Defaults to 8.
1820
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1820
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
1821
1821
 
1822
1822
  Note:
1823
1823
  The task's stored API parameters are used. Core batching / routing
@@ -1884,7 +1884,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1884
1884
  max_examples: int = 100,
1885
1885
  batch_size: int | None = None,
1886
1886
  max_concurrency: int = 8,
1887
- show_progress: bool = False,
1887
+ show_progress: bool = True,
1888
1888
  **api_kwargs,
1889
1889
  ) -> pd.Series:
1890
1890
  """Parse DataFrame rows into structured data using an LLM (asynchronously).
@@ -1903,7 +1903,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1903
1903
  automatic optimization. Defaults to None.
1904
1904
  max_concurrency (int, optional): Maximum concurrent requests.
1905
1905
  Defaults to 8.
1906
- show_progress (bool, optional): Show progress bar. Defaults to False.
1906
+ show_progress (bool, optional): Show progress bar. Defaults to True.
1907
1907
  **api_kwargs: Additional OpenAI API parameters.
1908
1908
 
1909
1909
  Returns:
@@ -2036,7 +2036,7 @@ class AsyncOpenAIVecDataFrameAccessor:
2036
2036
  max_examples: int = 500,
2037
2037
  batch_size: int | None = None,
2038
2038
  max_concurrency: int = 8,
2039
- show_progress: bool = False,
2039
+ show_progress: bool = True,
2040
2040
  ) -> pd.DataFrame:
2041
2041
  """Fill missing values in a DataFrame column using AI-powered inference (asynchronously).
2042
2042
 
@@ -2056,7 +2056,7 @@ class AsyncOpenAIVecDataFrameAccessor:
2056
2056
  optimization based on execution time). Set to a positive integer for fixed batch size.
2057
2057
  max_concurrency (int, optional): Maximum number of concurrent
2058
2058
  requests. Defaults to 8.
2059
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
2059
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``True``.
2060
2060
 
2061
2061
  Returns:
2062
2062
  pandas.DataFrame: A new DataFrame with missing values filled in the target
openaivec/spark.py CHANGED
@@ -150,6 +150,8 @@ from openaivec._serialize import deserialize_base_model, serialize_base_model
150
150
  from openaivec._util import TextChunker
151
151
 
152
152
  __all__ = [
153
+ "setup",
154
+ "setup_azure",
153
155
  "responses_udf",
154
156
  "task_udf",
155
157
  "embeddings_udf",
@@ -242,50 +244,6 @@ def setup_azure(
242
244
  CONTAINER.clear_singletons()
243
245
 
244
246
 
245
- def set_responses_model(model_name: str):
246
- """Set the default model name for response generation in the DI container.
247
-
248
- Args:
249
- model_name (str): The model name to set as default for responses.
250
- """
251
- CONTAINER.register(ResponsesModelName, lambda: ResponsesModelName(model_name))
252
- CONTAINER.clear_singletons()
253
-
254
-
255
- def get_responses_model() -> str | None:
256
- """Get the default model name for response generation from the DI container.
257
-
258
- Returns:
259
- str | None: The default model name for responses, or None if not set.
260
- """
261
- try:
262
- return CONTAINER.resolve(ResponsesModelName).value
263
- except Exception:
264
- return None
265
-
266
-
267
- def set_embeddings_model(model_name: str):
268
- """Set the default model name for embeddings in the DI container.
269
-
270
- Args:
271
- model_name (str): The model name to set as default for embeddings.
272
- """
273
- CONTAINER.register(EmbeddingsModelName, lambda: EmbeddingsModelName(model_name))
274
- CONTAINER.clear_singletons()
275
-
276
-
277
- def get_embeddings_model() -> str | None:
278
- """Get the default model name for embeddings from the DI container.
279
-
280
- Returns:
281
- str | None: The default model name for embeddings, or None if not set.
282
- """
283
- try:
284
- return CONTAINER.resolve(EmbeddingsModelName).value
285
- except Exception:
286
- return None
287
-
288
-
289
247
  def _python_type_to_spark(python_type):
290
248
  origin = get_origin(python_type)
291
249
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.15.0
3
+ Version: 0.99.0
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -32,6 +32,27 @@ Description-Content-Type: text/markdown
32
32
 
33
33
  **openaivec** enables data analysts to seamlessly integrate OpenAI's language models into their pandas and Spark workflows. Process thousands of text records with natural language instructions, turning unstructured data into actionable insights with just a few lines of code.
34
34
 
35
+ ## Contents
36
+ - [Why openaivec?](#why-openaivec)
37
+ - [Quick Start](#-quick-start-from-text-to-insights-in-seconds)
38
+ - [Real-World Impact](#-real-world-impact)
39
+ - [Overview](#overview)
40
+ - [Core Workflows](#core-workflows)
41
+ - [Using with Apache Spark UDFs](#using-with-apache-spark-udfs)
42
+ - [Building Prompts](#building-prompts)
43
+ - [Using with Microsoft Fabric](#using-with-microsoft-fabric)
44
+ - [Contributing](#contributing)
45
+ - [Additional Resources](#additional-resources)
46
+ - [Community](#community)
47
+
48
+ ## Why openaivec?
49
+ - Drop-in `.ai` and `.aio` DataFrame accessors keep pandas analysts in their favorite tools.
50
+ - Smart batching (`BatchingMapProxy`) deduplicates prompts, enforces ordered outputs, and shortens runtimes without manual tuning.
51
+ - Built-in caches, retry logic, and reasoning model safeguards cut noisy boilerplate from production pipelines.
52
+ - Ready-made Spark UDF builders and Microsoft Fabric guides take AI workloads from notebooks into enterprise-scale ETL.
53
+ - Pre-configured task library and `FewShotPromptBuilder` ship curated prompts and structured outputs validated by Pydantic.
54
+ - Supports OpenAI and Azure OpenAI clients interchangeably, including async workloads and embeddings.
55
+
35
56
  ## 🚀 Quick Start: From Text to Insights in Seconds
36
57
 
37
58
  Imagine analyzing 10,000 customer reviews. Instead of manual work, just write:
@@ -111,21 +132,28 @@ This approach helps reduce latency and simplifies your code.
111
132
  Additionally, it integrates effortlessly with Pandas DataFrames and Apache Spark UDFs, making it easy to incorporate
112
133
  into your data processing pipelines.
113
134
 
114
- ## Features
135
+ Behind the scenes, `BatchingMapProxy` and `AsyncBatchingMapProxy` deduplicate repeated inputs, guarantee response order,
136
+ and unblock waiters even when upstream APIs error. Caches created via helpers such as `responses_with_cache` plug into
137
+ this batching layer so expensive prompts are reused across pandas, Spark, and async flows. Progress bars surface
138
+ automatically in notebook environments when `show_progress=True`.
139
+
140
+ ## Core Capabilities
115
141
 
116
- - Vectorized API requests for processing multiple inputs at once.
117
- - Seamless integration with Pandas DataFrames.
118
- - A UDF builder for Apache Spark.
119
- - Compatibility with multiple OpenAI clients, including Azure OpenAI.
142
+ - Vectorized request batching with automatic deduplication, retries, and cache hooks for any OpenAI-compatible client.
143
+ - pandas `.ai` and `.aio` accessors for synchronous and asynchronous DataFrame pipelines, including `ai.extract` helpers.
144
+ - Task library with Pydantic-backed schemas for consistent structured outputs across pandas and Spark jobs.
145
+ - Spark UDF builders (`responses_udf`, `embeddings_udf`, `parse_udf`, `task_udf`, etc.) for large-scale ETL and BI.
146
+ - Embeddings, token counting, and similarity utilities for search and retrieval use cases.
147
+ - Prompt tooling (`FewShotPromptBuilder`, `improve`) to craft and iterate production-ready instructions.
120
148
 
121
149
  ## Key Benefits
122
150
 
123
- - **🚀 Performance**: Vectorized processing handles thousands of records in minutes, not hours
124
- - **💰 Cost Efficiency**: Automatic deduplication significantly reduces API costs on typical datasets
125
- - **🔗 Integration**: Works within existing pandas/Spark workflows without architectural changes
126
- - **📈 Scalability**: Same API scales from exploratory analysis (100s of records) to production systems (millions of records)
127
- - **🎯 Pre-configured Tasks**: Ready-to-use task library with optimized prompts for common use cases
128
- - **🏢 Enterprise Ready**: Microsoft Fabric integration, Apache Spark UDFs, Azure OpenAI compatibility
151
+ - **🚀 Throughput**: Smart batching and concurrency tuning process thousands of records in minutes, not hours.
152
+ - **💰 Cost Efficiency**: Input deduplication and optional caches cut redundant token usage on real-world datasets.
153
+ - **🛡️ Reliability**: Guardrails for reasoning models, informative errors, and automatic waiter release keep pipelines healthy.
154
+ - **🔗 Integration**: pandas, Spark, async, and Fabric workflows share the same API surface—no bespoke adapters required.
155
+ - **🎯 Consistency**: Pre-configured tasks and extractors deliver structured outputs validated with Pydantic models.
156
+ - **🏢 Enterprise Ready**: Azure OpenAI parity, Microsoft Fabric walkthroughs, and Spark UDFs shorten the path to production.
129
157
 
130
158
  ## Requirements
131
159
 
@@ -145,7 +173,7 @@ If you want to uninstall the package, you can do so with:
145
173
  pip uninstall openaivec
146
174
  ```
147
175
 
148
- ## Basic Usage
176
+ ## Core Workflows
149
177
 
150
178
  ### Direct API Usage
151
179
 
@@ -1,4 +1,4 @@
1
- openaivec/__init__.py,sha256=mXCGNNTjYbmE4CAXGvAs78soxUsoy_mxxnvaCk_CL6Y,361
1
+ openaivec/__init__.py,sha256=qHgiPPAPijdZHx0KjifOkdo7U6nl_lMQLiCe4SHjqiE,541
2
2
  openaivec/_di.py,sha256=Cl1ZoNBlQsJL1bpzoMDl08uT9pZFVSlqOdLbS3_MwPE,11462
3
3
  openaivec/_dynamic.py,sha256=7ZaC59w2Edemnao57XeZVO4qmSOA-Kus6TchZC3Dd5o,14821
4
4
  openaivec/_embeddings.py,sha256=nirLqOu69fTB7aSCYhbbRbwAA6ggwEYJiQoPDsHqAqQ,8200
@@ -7,13 +7,13 @@ openaivec/_model.py,sha256=71oiENUKwpY58ilj1LE7fDOAhs7PUSiZRiUHKUIuu7Y,3235
7
7
  openaivec/_optimize.py,sha256=3nS8VehbS7iGC1tPDDQh-iAgyKHbVYmMbCRBWM77U_U,3827
8
8
  openaivec/_prompt.py,sha256=NWE7jZKYphkD856haynJLmRadPugJ68emT42pd7Ciso,20633
9
9
  openaivec/_provider.py,sha256=8z8gPYY5-Z7rzDlj_NC6hR__DUqVAH7VLHJn6LalzRg,6158
10
- openaivec/_proxy.py,sha256=AiGuC1MCFjZCRXCac-pHUI3Np3nf1HIpWY6nC9ZVCFY,29671
10
+ openaivec/_proxy.py,sha256=AvTM2ESEJnScP7vxN-ISLE_HPUnMGsDGwYs9YILeDIY,29669
11
11
  openaivec/_responses.py,sha256=qBrYv4qblDIs5dRvj9t96r8UfAJmy4ZvtAe6csNZ7oM,20412
12
12
  openaivec/_schema.py,sha256=iOeR5J_ihZRDZtzmqvOK1ZtInKcx4OnoR38DB3VmmQw,15666
13
13
  openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
14
14
  openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
15
- openaivec/pandas_ext.py,sha256=1euz52rwKpUBvWRCKvkDjwCFf_zNYnf60wF5OXHiCqw,86727
16
- openaivec/spark.py,sha256=8-Hap36D0kcyV8RMA-PyFjZxfAnMfgtcp9gKASRnUwU,34032
15
+ openaivec/pandas_ext.py,sha256=W-n2dlcouJHVAyyEnDrJ3zUFUCWFcnIYlJweuy5x4zs,86695
16
+ openaivec/spark.py,sha256=ooRyeS75WDoh_3ePvThWZbmF_DzEprAJurLTXZrvFQo,32743
17
17
  openaivec/task/__init__.py,sha256=RkYIKrcE83M_9Um9cSMkeGzL9kPRAovajfRvr31YxLE,6178
18
18
  openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
19
19
  openaivec/task/customer_support/customer_sentiment.py,sha256=d8spZUtImjePK0xWGvIW98ghbdyOZ0KEZmaUpG8QB7M,7532
@@ -31,7 +31,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=u-zpqAaQYcr7I3mqMv_CTJXkfxtoLft3
31
31
  openaivec/task/nlp/translation.py,sha256=kgWj2oN8pUId3vuHTJNx636gB49AGEKXWICA_XJgE_0,6628
32
32
  openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
33
33
  openaivec/task/table/fillna.py,sha256=zL6m5hGD4kamV7qHETnn__B59wIY540Ks0EzNgUJgdI,6888
34
- openaivec-0.15.0.dist-info/METADATA,sha256=cVTYsT6TOMij_vagDgsIbo886U24Ys5dkah7ZvdEkdw,28278
35
- openaivec-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
- openaivec-0.15.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
- openaivec-0.15.0.dist-info/RECORD,,
34
+ openaivec-0.99.0.dist-info/METADATA,sha256=f8fEKrs-vz8bXbC23vHXeSLJRH-MjoYjsQDGwgvRso4,30443
35
+ openaivec-0.99.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
+ openaivec-0.99.0.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
+ openaivec-0.99.0.dist-info/RECORD,,