PyPI - openaivec - Versions diffs - 0.13.2__tar.gz → 0.13.3__tar.gz - Mend

openaivec 0.13.2tar.gz → 0.13.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

{openaivec-0.13.2 → openaivec-0.13.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openaivec
-Version: 0.13.2
+Version: 0.13.3
 Summary: Generative mutation for tabular calculation
 Project-URL: Homepage, https://microsoft.github.io/openaivec/
 Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -15,9 +15,11 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Python: >=3.10
+Requires-Dist: ipywidgets>=8.1.7
 Requires-Dist: openai>=1.74.0
 Requires-Dist: pandas>=2.2.3
 Requires-Dist: tiktoken>=0.9.0
+Requires-Dist: tqdm>=4.67.1
 Provides-Extra: spark
 Requires-Dist: pyspark>=3.5.5; extra == 'spark'
 Description-Content-Type: text/markdown
@@ -590,7 +592,7 @@ improved_prompt: str = (
     .example("Apple", "Color")
     .example("Apple", "Animal")
     # improve the prompt with OpenAI's API
-    .improve(client, model_name)
+    .improve()
     .build()
 )
 print(improved_prompt)

{openaivec-0.13.2 → openaivec-0.13.3}/README.md RENAMED Viewed

@@ -566,7 +566,7 @@ improved_prompt: str = (
     .example("Apple", "Color")
     .example("Apple", "Animal")
     # improve the prompt with OpenAI's API
-    .improve(client, model_name)
+    .improve()
     .build()
 )
 print(improved_prompt)

{openaivec-0.13.2 → openaivec-0.13.3}/pyproject.toml RENAMED Viewed

@@ -26,9 +26,11 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
+    "ipywidgets>=8.1.7",
     "openai>=1.74.0",
     "pandas>=2.2.3",
     "tiktoken>=0.9.0",
+    "tqdm>=4.67.1",
 ]
 [dependency-groups]
@@ -62,6 +64,26 @@ spark = [
 line-length = 120
 target-version = "py310"
+[tool.ruff.lint]
+select = [
+    "E",     # pycodestyle errors
+    "W",     # pycodestyle warnings
+    "F",     # pyflakes
+    "I",     # isort
+    "TID",   # flake8-tidy-imports
+]
+# ignore = []  # グローバルではE501を有効化
+[tool.ruff.lint.flake8-tidy-imports]
+# Enforce absolute imports - ban relative imports (except in __init__.py files)
+ban-relative-imports = "all"
+[tool.ruff.lint.per-file-ignores]
+# Allow relative imports in __init__.py files
+"**/__init__.py" = ["TID252"]
+# Test files contain long test data - ignore line length
+"tests/**/*.py" = ["E501"]
 [project.urls]
 Homepage = "https://microsoft.github.io/openaivec/"
 Repository = "https://github.com/microsoft/openaivec"

openaivec-0.13.3/src/openaivec/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
+from .responses import AsyncBatchResponses, BatchResponses
+__all__ = [
+    "BatchResponses",
+    "AsyncBatchResponses",
+    "BatchEmbeddings",
+    "AsyncBatchEmbeddings",
+]

{openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/di.py RENAMED Viewed

@@ -11,14 +11,14 @@ are created once and reused across multiple resolve calls.
 Example:
     ```python
     from openaivec.di import Container
     class DatabaseService:
         def __init__(self):
             self.connection = "database://localhost"
     container = Container()
     container.register(DatabaseService, lambda: DatabaseService())
     db1 = container.resolve(DatabaseService)
     db2 = container.resolve(DatabaseService)
     print(db1 is db2)  # True - same instance

{openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/embeddings.py RENAMED Viewed

@@ -6,9 +6,9 @@ import numpy as np
 from numpy.typing import NDArray
 from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
-from .log import observe
-from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
-from .util import backoff, backoff_async
+from openaivec.log import observe
+from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
+from openaivec.util import backoff, backoff_async
 __all__ = [
     "BatchEmbeddings",
@@ -24,7 +24,8 @@ class BatchEmbeddings:
     Attributes:
         client (OpenAI): Configured OpenAI client.
-        model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name (e.g., ``"text-embedding-3-small"``).
+        model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
+            (e.g., ``"text-embedding-3-small"``).
         cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
     """

{openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/pandas_ext.py RENAMED Viewed

@@ -50,12 +50,12 @@ import tiktoken
 from openai import AsyncOpenAI, OpenAI
 from pydantic import BaseModel
-from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
-from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
-from .provider import CONTAINER, _check_azure_v1_api_url
-from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
-from .responses import AsyncBatchResponses, BatchResponses
-from .task.table import FillNaResponse, fillna
+from openaivec.embeddings import AsyncBatchEmbeddings, BatchEmbeddings
+from openaivec.model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
+from openaivec.provider import CONTAINER, _check_azure_v1_api_url
+from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
+from openaivec.responses import AsyncBatchResponses, BatchResponses
+from openaivec.task.table import FillNaResponse, fillna
 __all__ = [
     "use",
@@ -220,13 +220,23 @@ class OpenAIVecSeriesAccessor:
         batch_size: int = 128,
         temperature: float | None = 0.0,
         top_p: float = 1.0,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Call an LLM once for every Series element.
         Example:
             ```python
             animals = pd.Series(["cat", "dog", "elephant"])
+            # Basic usage
             animals.ai.responses("translate to French")
+            # With progress bar in Jupyter notebooks
+            large_series = pd.Series(["data"] * 1000)
+            large_series.ai.responses(
+                "analyze this data",
+                batch_size=32,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -241,13 +251,14 @@ class OpenAIVecSeriesAccessor:
                 request. Defaults to ``128``.
             temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
         """
         return self.responses_with_cache(
             instructions=instructions,
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
@@ -300,7 +311,7 @@ class OpenAIVecSeriesAccessor:
         )
         return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
-    def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
+    def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
         """Execute a prepared task on every Series element.
         This method applies a pre-configured task to each element in the Series,
@@ -315,7 +326,16 @@ class OpenAIVecSeriesAccessor:
             sentiment_task = PreparedTask(...)
             reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
+            # Basic usage
             results = reviews.ai.task(sentiment_task)
+            # With progress bar for large datasets
+            large_reviews = pd.Series(["review text"] * 2000)
+            results = large_reviews.ai.task(
+                sentiment_task,
+                batch_size=50,
+                show_progress=True
+            )
             ```
             This method returns a Series containing the task results for each
             corresponding input element, following the task's defined structure.
@@ -325,6 +345,7 @@ class OpenAIVecSeriesAccessor:
                 response format, and other parameters for processing the inputs.
             batch_size (int, optional): Number of prompts grouped into a single
                 request to optimize API usage. Defaults to 128.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -332,16 +353,24 @@ class OpenAIVecSeriesAccessor:
         """
         return self.task_with_cache(
             task=task,
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
         )
-    def embeddings(self, batch_size: int = 128) -> pd.Series:
+    def embeddings(self, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
         """Compute OpenAI embeddings for every Series element.
         Example:
             ```python
             animals = pd.Series(["cat", "dog", "elephant"])
+            # Basic usage
             animals.ai.embeddings()
+            # With progress bar for large datasets
+            large_texts = pd.Series(["text"] * 5000)
+            embeddings = large_texts.ai.embeddings(
+                batch_size=100,
+                show_progress=True
+            )
             ```
             This method returns a Series of numpy arrays, each containing the
             embedding vector for the corresponding input.
@@ -351,13 +380,14 @@ class OpenAIVecSeriesAccessor:
         Args:
             batch_size (int, optional): Number of inputs grouped into a
                 single request. Defaults to ``128``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
                 (dtype ``float32``).
         """
         return self.embeddings_with_cache(
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
         )
     def count_tokens(self) -> pd.Series:
@@ -511,6 +541,7 @@ class OpenAIVecDataFrameAccessor:
         batch_size: int = 128,
         temperature: float | None = 0.0,
         top_p: float = 1.0,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Generate a response for each row after serialising it to JSON.
@@ -521,7 +552,16 @@ class OpenAIVecDataFrameAccessor:
                 {"name": "dog", "legs": 4},
                 {"name": "elephant", "legs": 4},
             ])
+            # Basic usage
             df.ai.responses("what is the animal's name?")
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            large_df.ai.responses(
+                "generate a name for this ID",
+                batch_size=20,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -537,19 +577,20 @@ class OpenAIVecDataFrameAccessor:
                 Defaults to ``128``.
             temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Responses aligned with the DataFrame's original index.
         """
         return self.responses_with_cache(
             instructions=instructions,
-            cache=BatchingMapProxy(batch_size=batch_size),
+            cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
         )
-    def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
+    def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
         """Execute a prepared task on each DataFrame row after serialising it to JSON.
         This method applies a pre-configured task to each row in the DataFrame,
@@ -579,6 +620,7 @@ class OpenAIVecDataFrameAccessor:
                 response format, and other parameters for processing the inputs.
             batch_size (int, optional): Number of requests sent in one batch
                 to optimize API usage. Defaults to 128.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -588,7 +630,7 @@ class OpenAIVecDataFrameAccessor:
             lambda df: (
                 df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
                 .map(lambda x: json.dumps(x, ensure_ascii=False))
-                .ai.task(task=task, batch_size=batch_size)
+                .ai.task(task=task, batch_size=batch_size, show_progress=show_progress)
             )
         )
@@ -864,6 +906,7 @@ class AsyncOpenAIVecSeriesAccessor:
         temperature: float | None = 0.0,
         top_p: float = 1.0,
         max_concurrency: int = 8,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Call an LLM once for every Series element (asynchronously).
@@ -872,6 +915,15 @@ class AsyncOpenAIVecSeriesAccessor:
             animals = pd.Series(["cat", "dog", "elephant"])
             # Must be awaited
             results = await animals.aio.responses("translate to French")
+            # With progress bar for large datasets
+            large_series = pd.Series(["data"] * 1000)
+            results = await large_series.aio.responses(
+                "analyze this data",
+                batch_size=32,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -888,6 +940,7 @@ class AsyncOpenAIVecSeriesAccessor:
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of ``response_format``.
@@ -897,13 +950,17 @@ class AsyncOpenAIVecSeriesAccessor:
         """
         return await self.responses_with_cache(
             instructions=instructions,
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
         )
-    async def embeddings(self, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
+    async def embeddings(
+        self, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
+    ) -> pd.Series:
         """Compute OpenAI embeddings for every Series element (asynchronously).
         Example:
@@ -911,6 +968,14 @@ class AsyncOpenAIVecSeriesAccessor:
             animals = pd.Series(["cat", "dog", "elephant"])
             # Must be awaited
             embeddings = await animals.aio.embeddings()
+            # With progress bar for large datasets
+            large_texts = pd.Series(["text"] * 5000)
+            embeddings = await large_texts.aio.embeddings(
+                batch_size=100,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series of numpy arrays, each containing the
             embedding vector for the corresponding input.
@@ -922,6 +987,7 @@ class AsyncOpenAIVecSeriesAccessor:
                 single request. Defaults to ``128``.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -931,10 +997,14 @@ class AsyncOpenAIVecSeriesAccessor:
             This is an asynchronous method and must be awaited.
         """
         return await self.embeddings_with_cache(
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
         )
-    async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
+    async def task(
+        self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
+    ) -> pd.Series:
         """Execute a prepared task on every Series element (asynchronously).
         This method applies a pre-configured task to each element in the Series,
@@ -951,6 +1021,15 @@ class AsyncOpenAIVecSeriesAccessor:
             reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
             # Must be awaited
             results = await reviews.aio.task(sentiment_task)
+            # With progress bar for large datasets
+            large_reviews = pd.Series(["review text"] * 2000)
+            results = await large_reviews.aio.task(
+                sentiment_task,
+                batch_size=50,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series containing the task results for each
             corresponding input element, following the task's defined structure.
@@ -962,6 +1041,7 @@ class AsyncOpenAIVecSeriesAccessor:
                 request to optimize API usage. Defaults to 128.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to 8.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -972,7 +1052,9 @@ class AsyncOpenAIVecSeriesAccessor:
         """
         return await self.task_with_cache(
             task=task,
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
         )
@@ -1056,6 +1138,7 @@ class AsyncOpenAIVecDataFrameAccessor:
         temperature: float | None = 0.0,
         top_p: float = 1.0,
         max_concurrency: int = 8,
+        show_progress: bool = False,
     ) -> pd.Series:
         """Generate a response for each row after serialising it to JSON (asynchronously).
@@ -1068,6 +1151,15 @@ class AsyncOpenAIVecDataFrameAccessor:
             ])
             # Must be awaited
             results = await df.aio.responses(\"what is the animal\'s name?\")
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({\"id\": list(range(1000))})
+            results = await large_df.aio.responses(
+                \"generate a name for this ID\",
+                batch_size=20,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series of strings, each containing the
             assistant's response to the corresponding input.
@@ -1085,6 +1177,7 @@ class AsyncOpenAIVecDataFrameAccessor:
             top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to ``8``.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Responses aligned with the DataFrame's original index.
@@ -1094,13 +1187,17 @@ class AsyncOpenAIVecDataFrameAccessor:
         """
         return await self.responses_with_cache(
             instructions=instructions,
-            cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
+            cache=AsyncBatchingMapProxy(
+                batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
+            ),
             response_format=response_format,
             temperature=temperature,
             top_p=top_p,
         )
-    async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
+    async def task(
+        self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
+    ) -> pd.Series:
         """Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
         This method applies a pre-configured task to each row in the DataFrame,
@@ -1122,6 +1219,15 @@ class AsyncOpenAIVecDataFrameAccessor:
             ])
             # Must be awaited
             results = await df.aio.task(analysis_task)
+            # With progress bar for large datasets
+            large_df = pd.DataFrame({"id": list(range(1000))})
+            results = await large_df.aio.task(
+                analysis_task,
+                batch_size=50,
+                max_concurrency=4,
+                show_progress=True
+            )
             ```
             This method returns a Series containing the task results for each
             corresponding row, following the task's defined structure.
@@ -1133,6 +1239,7 @@ class AsyncOpenAIVecDataFrameAccessor:
                 to optimize API usage. Defaults to 128.
             max_concurrency (int, optional): Maximum number of concurrent
                 requests. Defaults to 8.
+            show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
         Returns:
             pandas.Series: Series whose values are instances of the task's
@@ -1153,6 +1260,7 @@ class AsyncOpenAIVecDataFrameAccessor:
             task=task,
             batch_size=batch_size,
             max_concurrency=max_concurrency,
+            show_progress=show_progress,
         )
     async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:

{openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/prompt.py RENAMED Viewed

@@ -51,6 +51,9 @@ from openai import OpenAI
 from openai.types.responses import ParsedResponse
 from pydantic import BaseModel
+from openaivec.model import ResponsesModelName
+from openaivec.provider import CONTAINER
 __all__ = [
     "FewShotPrompt",
     "FewShotPromptBuilder",
@@ -203,7 +206,9 @@ _PROMPT: str = """
   "iterations": [
     {
       "id": 1,
-      "analysis": "The original purpose was vague and did not explicitly state the main objective. This ambiguity could lead to confusion about the task. In this iteration, we refined the purpose to clearly specify that the goal is to determine the correct category for a given word based on its context.",
+      "analysis": "The original purpose was vague and did not explicitly state the main objective.
+        This ambiguity could lead to confusion about the task. In this iteration, we refined the purpose to
+        clearly specify that the goal is to determine the correct category for a given word based on its context.",
       "prompt": {
         "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
         "cautions": [
@@ -225,7 +230,10 @@ _PROMPT: str = """
     },
     {
       "id": 2,
-      "analysis": "Next, we focused solely on the cautions section. The original cautions were generic and did not mention potential pitfalls like homonyms or polysemy. Failing to address these could result in misclassification. Therefore, we added a specific caution regarding homonyms while keeping the purpose and examples unchanged.",
+      "analysis": "Next, we focused solely on the cautions section. The original cautions were generic and
+        did not mention potential pitfalls like homonyms or polysemy. Failing to address these could result in
+        misclassification. Therefore, we added a specific caution regarding homonyms while keeping the purpose
+        and examples unchanged.",
       "prompt": {
         "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
         "cautions": [
@@ -248,7 +256,10 @@ _PROMPT: str = """
     },
     {
       "id": 3,
-      "analysis": "In this step, we improved the examples section to cover a broader range of scenarios and address potential ambiguities. By adding examples that include words with multiple interpretations (such as 'Mercury' for both a planet and an element), we enhance clarity and ensure better coverage. This iteration only modifies the examples section, leaving purpose and cautions intact.",
+      "analysis": "In this step, we improved the examples section to cover a broader range of scenarios and
+        address potential ambiguities. By adding examples that include words with multiple interpretations
+        (such as 'Mercury' for both a planet and an element), we enhance clarity and ensure better coverage.
+        This iteration only modifies the examples section, leaving purpose and cautions intact.",
       "prompt": {
         "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
         "cautions": [
@@ -409,28 +420,34 @@ class FewShotPromptBuilder:
     def improve(
         self,
-        client: OpenAI,
-        model_name: str,
-        temperature: float = 0.0,
-        top_p: float = 1.0,
+        client: OpenAI | None = None,
+        model_name: str | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
     ) -> "FewShotPromptBuilder":
         """Iteratively refine the prompt using an LLM.
         The method calls a single LLM request that returns multiple
         editing steps and stores each step for inspection.
+        When client is None, automatically creates a client using environment variables:
+        - For OpenAI: ``OPENAI_API_KEY``
+        - For Azure OpenAI: ``AZURE_OPENAI_API_KEY``, ``AZURE_OPENAI_BASE_URL``, ``AZURE_OPENAI_API_VERSION``
         Args:
-            client (openai.OpenAI): Configured OpenAI client.
-            model_name (str): Model identifier (e.g. ``gpt-4.1-mini``).
-            temperature (float, optional): Sampling temperature. Defaults to 0.0.
-            top_p (float, optional): Nucleus sampling parameter. Defaults to 1.0.
+            client (OpenAI | None): Configured OpenAI client. If None, uses DI container with environment variables.
+            model_name (str | None): Model identifier. If None, uses default ``gpt-4.1-mini``.
+            temperature (float | None): Sampling temperature. If None, uses model default.
+            top_p (float | None): Nucleus sampling parameter. If None, uses model default.
         Returns:
             FewShotPromptBuilder: The current builder instance containing the refined prompt and iteration history.
         """
+        _client = client or CONTAINER.resolve(OpenAI)
+        _model_name = model_name or CONTAINER.resolve(ResponsesModelName).value
-        response: ParsedResponse[Response] = client.responses.parse(
-            model=model_name,
+        response: ParsedResponse[Response] = _client.responses.parse(
+            model=_model_name,
             instructions=_PROMPT,
             input=Request(prompt=self._prompt).model_dump_json(),
             temperature=temperature,
@@ -456,6 +473,10 @@ class FewShotPromptBuilder:
         Returns:
             FewShotPromptBuilder: The current builder instance.
         """
+        if not hasattr(self, "_steps") or not self._steps:
+            print("No improvement steps available. Call improve() first.")
+            return self
         for previous, current in zip(self._steps, self._steps[1:]):
             print(f"=== Iteration {current.id} ===\n")
             print(f"Instruction: {current.analysis}")

{openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/provider.py RENAMED Viewed

@@ -4,8 +4,8 @@ import warnings
 import tiktoken
 from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
-from . import di
-from .model import (
+from openaivec import di
+from openaivec.model import (
     AzureOpenAIAPIKey,
     AzureOpenAIAPIVersion,
     AzureOpenAIBaseURL,
@@ -13,7 +13,7 @@ from .model import (
     OpenAIAPIKey,
     ResponsesModelName,
 )
-from .util import TextChunker
+from openaivec.util import TextChunker
 CONTAINER = di.Container()

openaivec 0.13.2__tar.gz → 0.13.3__tar.gz

openaivec 0.13.2tar.gz → 0.13.3tar.gz