openaivec 0.13.2__tar.gz → 0.13.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {openaivec-0.13.2 → openaivec-0.13.3}/PKG-INFO +4 -2
  2. {openaivec-0.13.2 → openaivec-0.13.3}/README.md +1 -1
  3. {openaivec-0.13.2 → openaivec-0.13.3}/pyproject.toml +22 -0
  4. openaivec-0.13.3/src/openaivec/__init__.py +9 -0
  5. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/di.py +3 -3
  6. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/embeddings.py +5 -4
  7. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/pandas_ext.py +129 -21
  8. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/prompt.py +34 -13
  9. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/provider.py +3 -3
  10. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/proxy.py +166 -28
  11. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/responses.py +6 -5
  12. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/serialize.py +1 -1
  13. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/spark.py +8 -7
  14. openaivec-0.13.3/src/openaivec/task/customer_support/__init__.py +26 -0
  15. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/customer_support/customer_sentiment.py +12 -4
  16. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/customer_support/inquiry_classification.py +11 -4
  17. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/customer_support/inquiry_summary.py +8 -3
  18. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/customer_support/intent_analysis.py +10 -4
  19. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/customer_support/response_suggestion.py +10 -4
  20. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/customer_support/urgency_analysis.py +8 -3
  21. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/nlp/dependency_parsing.py +4 -2
  22. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/nlp/keyword_extraction.py +3 -2
  23. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/nlp/morphological_analysis.py +4 -2
  24. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/nlp/named_entity_recognition.py +4 -2
  25. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/nlp/sentiment_analysis.py +7 -2
  26. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/nlp/translation.py +1 -1
  27. openaivec-0.13.3/src/openaivec/task/table/__init__.py +3 -0
  28. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/table/fillna.py +4 -3
  29. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/util.py +0 -1
  30. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_pandas_ext.py +4 -2
  31. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_prompt.py +44 -0
  32. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_provider.py +1 -0
  33. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_proxy.py +250 -0
  34. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_util.py +2 -1
  35. {openaivec-0.13.2 → openaivec-0.13.3}/uv.lock +1258 -1207
  36. openaivec-0.13.2/src/openaivec/__init__.py +0 -9
  37. openaivec-0.13.2/src/openaivec/task/customer_support/__init__.py +0 -32
  38. openaivec-0.13.2/src/openaivec/task/table/__init__.py +0 -3
  39. {openaivec-0.13.2 → openaivec-0.13.3}/.env.example +0 -0
  40. {openaivec-0.13.2 → openaivec-0.13.3}/.github/workflows/python-mkdocs.yml +0 -0
  41. {openaivec-0.13.2 → openaivec-0.13.3}/.github/workflows/python-package.yml +0 -0
  42. {openaivec-0.13.2 → openaivec-0.13.3}/.github/workflows/python-test.yml +0 -0
  43. {openaivec-0.13.2 → openaivec-0.13.3}/.github/workflows/python-update.yml +0 -0
  44. {openaivec-0.13.2 → openaivec-0.13.3}/.gitignore +0 -0
  45. {openaivec-0.13.2 → openaivec-0.13.3}/CODE_OF_CONDUCT.md +0 -0
  46. {openaivec-0.13.2 → openaivec-0.13.3}/LICENSE +0 -0
  47. {openaivec-0.13.2 → openaivec-0.13.3}/SECURITY.md +0 -0
  48. {openaivec-0.13.2 → openaivec-0.13.3}/SUPPORT.md +0 -0
  49. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/di.md +0 -0
  50. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/embeddings.md +0 -0
  51. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/pandas_ext.md +0 -0
  52. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/prompt.md +0 -0
  53. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/proxy.md +0 -0
  54. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/responses.md +0 -0
  55. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/spark.md +0 -0
  56. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/task.md +0 -0
  57. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  58. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  59. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  60. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  61. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  62. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  63. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  64. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  65. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  66. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  67. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  68. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/tasks/nlp/translation.md +0 -0
  69. {openaivec-0.13.2 → openaivec-0.13.3}/docs/api/util.md +0 -0
  70. {openaivec-0.13.2 → openaivec-0.13.3}/docs/index.md +0 -0
  71. {openaivec-0.13.2 → openaivec-0.13.3}/docs/robots.txt +0 -0
  72. {openaivec-0.13.2 → openaivec-0.13.3}/mkdocs.yml +0 -0
  73. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/log.py +0 -0
  74. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/model.py +0 -0
  75. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/__init__.py +0 -0
  76. {openaivec-0.13.2 → openaivec-0.13.3}/src/openaivec/task/nlp/__init__.py +3 -3
  77. {openaivec-0.13.2 → openaivec-0.13.3}/tests/__init__.py +0 -0
  78. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_di.py +0 -0
  79. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_embeddings.py +0 -0
  80. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_responses.py +0 -0
  81. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_serialize.py +0 -0
  82. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_spark.py +4 -4
  83. {openaivec-0.13.2 → openaivec-0.13.3}/tests/test_task.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.13.2
3
+ Version: 0.13.3
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -15,9 +15,11 @@ Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
17
  Requires-Python: >=3.10
18
+ Requires-Dist: ipywidgets>=8.1.7
18
19
  Requires-Dist: openai>=1.74.0
19
20
  Requires-Dist: pandas>=2.2.3
20
21
  Requires-Dist: tiktoken>=0.9.0
22
+ Requires-Dist: tqdm>=4.67.1
21
23
  Provides-Extra: spark
22
24
  Requires-Dist: pyspark>=3.5.5; extra == 'spark'
23
25
  Description-Content-Type: text/markdown
@@ -590,7 +592,7 @@ improved_prompt: str = (
590
592
  .example("Apple", "Color")
591
593
  .example("Apple", "Animal")
592
594
  # improve the prompt with OpenAI's API
593
- .improve(client, model_name)
595
+ .improve()
594
596
  .build()
595
597
  )
596
598
  print(improved_prompt)
@@ -566,7 +566,7 @@ improved_prompt: str = (
566
566
  .example("Apple", "Color")
567
567
  .example("Apple", "Animal")
568
568
  # improve the prompt with OpenAI's API
569
- .improve(client, model_name)
569
+ .improve()
570
570
  .build()
571
571
  )
572
572
  print(improved_prompt)
@@ -26,9 +26,11 @@ classifiers = [
26
26
 
27
27
  requires-python = ">=3.10"
28
28
  dependencies = [
29
+ "ipywidgets>=8.1.7",
29
30
  "openai>=1.74.0",
30
31
  "pandas>=2.2.3",
31
32
  "tiktoken>=0.9.0",
33
+ "tqdm>=4.67.1",
32
34
  ]
33
35
 
34
36
  [dependency-groups]
@@ -62,6 +64,26 @@ spark = [
62
64
  line-length = 120
63
65
  target-version = "py310"
64
66
 
67
+ [tool.ruff.lint]
68
+ select = [
69
+ "E", # pycodestyle errors
70
+ "W", # pycodestyle warnings
71
+ "F", # pyflakes
72
+ "I", # isort
73
+ "TID", # flake8-tidy-imports
74
+ ]
75
+ # ignore = [] # グローバルではE501を有効化
76
+
77
+ [tool.ruff.lint.flake8-tidy-imports]
78
+ # Enforce absolute imports - ban relative imports (except in __init__.py files)
79
+ ban-relative-imports = "all"
80
+
81
+ [tool.ruff.lint.per-file-ignores]
82
+ # Allow relative imports in __init__.py files
83
+ "**/__init__.py" = ["TID252"]
84
+ # Test files contain long test data - ignore line length
85
+ "tests/**/*.py" = ["E501"]
86
+
65
87
  [project.urls]
66
88
  Homepage = "https://microsoft.github.io/openaivec/"
67
89
  Repository = "https://github.com/microsoft/openaivec"
@@ -0,0 +1,9 @@
1
+ from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
2
+ from .responses import AsyncBatchResponses, BatchResponses
3
+
4
+ __all__ = [
5
+ "BatchResponses",
6
+ "AsyncBatchResponses",
7
+ "BatchEmbeddings",
8
+ "AsyncBatchEmbeddings",
9
+ ]
@@ -11,14 +11,14 @@ are created once and reused across multiple resolve calls.
11
11
  Example:
12
12
  ```python
13
13
  from openaivec.di import Container
14
-
14
+
15
15
  class DatabaseService:
16
16
  def __init__(self):
17
17
  self.connection = "database://localhost"
18
-
18
+
19
19
  container = Container()
20
20
  container.register(DatabaseService, lambda: DatabaseService())
21
-
21
+
22
22
  db1 = container.resolve(DatabaseService)
23
23
  db2 = container.resolve(DatabaseService)
24
24
  print(db1 is db2) # True - same instance
@@ -6,9 +6,9 @@ import numpy as np
6
6
  from numpy.typing import NDArray
7
7
  from openai import AsyncOpenAI, InternalServerError, OpenAI, RateLimitError
8
8
 
9
- from .log import observe
10
- from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
11
- from .util import backoff, backoff_async
9
+ from openaivec.log import observe
10
+ from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
11
+ from openaivec.util import backoff, backoff_async
12
12
 
13
13
  __all__ = [
14
14
  "BatchEmbeddings",
@@ -24,7 +24,8 @@ class BatchEmbeddings:
24
24
 
25
25
  Attributes:
26
26
  client (OpenAI): Configured OpenAI client.
27
- model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name (e.g., ``"text-embedding-3-small"``).
27
+ model_name (str): For Azure OpenAI, use your deployment name. For OpenAI, use the model name
28
+ (e.g., ``"text-embedding-3-small"``).
28
29
  cache (BatchingMapProxy[str, NDArray[np.float32]]): Batching proxy for ordered, cached mapping.
29
30
  """
30
31
 
@@ -50,12 +50,12 @@ import tiktoken
50
50
  from openai import AsyncOpenAI, OpenAI
51
51
  from pydantic import BaseModel
52
52
 
53
- from .embeddings import AsyncBatchEmbeddings, BatchEmbeddings
54
- from .model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
55
- from .provider import CONTAINER, _check_azure_v1_api_url
56
- from .proxy import AsyncBatchingMapProxy, BatchingMapProxy
57
- from .responses import AsyncBatchResponses, BatchResponses
58
- from .task.table import FillNaResponse, fillna
53
+ from openaivec.embeddings import AsyncBatchEmbeddings, BatchEmbeddings
54
+ from openaivec.model import EmbeddingsModelName, PreparedTask, ResponseFormat, ResponsesModelName
55
+ from openaivec.provider import CONTAINER, _check_azure_v1_api_url
56
+ from openaivec.proxy import AsyncBatchingMapProxy, BatchingMapProxy
57
+ from openaivec.responses import AsyncBatchResponses, BatchResponses
58
+ from openaivec.task.table import FillNaResponse, fillna
59
59
 
60
60
  __all__ = [
61
61
  "use",
@@ -220,13 +220,23 @@ class OpenAIVecSeriesAccessor:
220
220
  batch_size: int = 128,
221
221
  temperature: float | None = 0.0,
222
222
  top_p: float = 1.0,
223
+ show_progress: bool = False,
223
224
  ) -> pd.Series:
224
225
  """Call an LLM once for every Series element.
225
226
 
226
227
  Example:
227
228
  ```python
228
229
  animals = pd.Series(["cat", "dog", "elephant"])
230
+ # Basic usage
229
231
  animals.ai.responses("translate to French")
232
+
233
+ # With progress bar in Jupyter notebooks
234
+ large_series = pd.Series(["data"] * 1000)
235
+ large_series.ai.responses(
236
+ "analyze this data",
237
+ batch_size=32,
238
+ show_progress=True
239
+ )
230
240
  ```
231
241
  This method returns a Series of strings, each containing the
232
242
  assistant's response to the corresponding input.
@@ -241,13 +251,14 @@ class OpenAIVecSeriesAccessor:
241
251
  request. Defaults to ``128``.
242
252
  temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
243
253
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
254
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
244
255
 
245
256
  Returns:
246
257
  pandas.Series: Series whose values are instances of ``response_format``.
247
258
  """
248
259
  return self.responses_with_cache(
249
260
  instructions=instructions,
250
- cache=BatchingMapProxy(batch_size=batch_size),
261
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
251
262
  response_format=response_format,
252
263
  temperature=temperature,
253
264
  top_p=top_p,
@@ -300,7 +311,7 @@ class OpenAIVecSeriesAccessor:
300
311
  )
301
312
  return pd.Series(client.parse(self._obj.tolist()), index=self._obj.index, name=self._obj.name)
302
313
 
303
- def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
314
+ def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
304
315
  """Execute a prepared task on every Series element.
305
316
 
306
317
  This method applies a pre-configured task to each element in the Series,
@@ -315,7 +326,16 @@ class OpenAIVecSeriesAccessor:
315
326
  sentiment_task = PreparedTask(...)
316
327
 
317
328
  reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
329
+ # Basic usage
318
330
  results = reviews.ai.task(sentiment_task)
331
+
332
+ # With progress bar for large datasets
333
+ large_reviews = pd.Series(["review text"] * 2000)
334
+ results = large_reviews.ai.task(
335
+ sentiment_task,
336
+ batch_size=50,
337
+ show_progress=True
338
+ )
319
339
  ```
320
340
  This method returns a Series containing the task results for each
321
341
  corresponding input element, following the task's defined structure.
@@ -325,6 +345,7 @@ class OpenAIVecSeriesAccessor:
325
345
  response format, and other parameters for processing the inputs.
326
346
  batch_size (int, optional): Number of prompts grouped into a single
327
347
  request to optimize API usage. Defaults to 128.
348
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
328
349
 
329
350
  Returns:
330
351
  pandas.Series: Series whose values are instances of the task's
@@ -332,16 +353,24 @@ class OpenAIVecSeriesAccessor:
332
353
  """
333
354
  return self.task_with_cache(
334
355
  task=task,
335
- cache=BatchingMapProxy(batch_size=batch_size),
356
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
336
357
  )
337
358
 
338
- def embeddings(self, batch_size: int = 128) -> pd.Series:
359
+ def embeddings(self, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
339
360
  """Compute OpenAI embeddings for every Series element.
340
361
 
341
362
  Example:
342
363
  ```python
343
364
  animals = pd.Series(["cat", "dog", "elephant"])
365
+ # Basic usage
344
366
  animals.ai.embeddings()
367
+
368
+ # With progress bar for large datasets
369
+ large_texts = pd.Series(["text"] * 5000)
370
+ embeddings = large_texts.ai.embeddings(
371
+ batch_size=100,
372
+ show_progress=True
373
+ )
345
374
  ```
346
375
  This method returns a Series of numpy arrays, each containing the
347
376
  embedding vector for the corresponding input.
@@ -351,13 +380,14 @@ class OpenAIVecSeriesAccessor:
351
380
  Args:
352
381
  batch_size (int, optional): Number of inputs grouped into a
353
382
  single request. Defaults to ``128``.
383
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
354
384
 
355
385
  Returns:
356
386
  pandas.Series: Series whose values are ``np.ndarray`` objects
357
387
  (dtype ``float32``).
358
388
  """
359
389
  return self.embeddings_with_cache(
360
- cache=BatchingMapProxy(batch_size=batch_size),
390
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
361
391
  )
362
392
 
363
393
  def count_tokens(self) -> pd.Series:
@@ -511,6 +541,7 @@ class OpenAIVecDataFrameAccessor:
511
541
  batch_size: int = 128,
512
542
  temperature: float | None = 0.0,
513
543
  top_p: float = 1.0,
544
+ show_progress: bool = False,
514
545
  ) -> pd.Series:
515
546
  """Generate a response for each row after serialising it to JSON.
516
547
 
@@ -521,7 +552,16 @@ class OpenAIVecDataFrameAccessor:
521
552
  {"name": "dog", "legs": 4},
522
553
  {"name": "elephant", "legs": 4},
523
554
  ])
555
+ # Basic usage
524
556
  df.ai.responses("what is the animal's name?")
557
+
558
+ # With progress bar for large datasets
559
+ large_df = pd.DataFrame({"id": list(range(1000))})
560
+ large_df.ai.responses(
561
+ "generate a name for this ID",
562
+ batch_size=20,
563
+ show_progress=True
564
+ )
525
565
  ```
526
566
  This method returns a Series of strings, each containing the
527
567
  assistant's response to the corresponding input.
@@ -537,19 +577,20 @@ class OpenAIVecDataFrameAccessor:
537
577
  Defaults to ``128``.
538
578
  temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
539
579
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
580
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
540
581
 
541
582
  Returns:
542
583
  pandas.Series: Responses aligned with the DataFrame's original index.
543
584
  """
544
585
  return self.responses_with_cache(
545
586
  instructions=instructions,
546
- cache=BatchingMapProxy(batch_size=batch_size),
587
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
547
588
  response_format=response_format,
548
589
  temperature=temperature,
549
590
  top_p=top_p,
550
591
  )
551
592
 
552
- def task(self, task: PreparedTask, batch_size: int = 128) -> pd.Series:
593
+ def task(self, task: PreparedTask, batch_size: int = 128, show_progress: bool = False) -> pd.Series:
553
594
  """Execute a prepared task on each DataFrame row after serialising it to JSON.
554
595
 
555
596
  This method applies a pre-configured task to each row in the DataFrame,
@@ -579,6 +620,7 @@ class OpenAIVecDataFrameAccessor:
579
620
  response format, and other parameters for processing the inputs.
580
621
  batch_size (int, optional): Number of requests sent in one batch
581
622
  to optimize API usage. Defaults to 128.
623
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
582
624
 
583
625
  Returns:
584
626
  pandas.Series: Series whose values are instances of the task's
@@ -588,7 +630,7 @@ class OpenAIVecDataFrameAccessor:
588
630
  lambda df: (
589
631
  df.pipe(lambda df: pd.Series(df.to_dict(orient="records"), index=df.index, name="record"))
590
632
  .map(lambda x: json.dumps(x, ensure_ascii=False))
591
- .ai.task(task=task, batch_size=batch_size)
633
+ .ai.task(task=task, batch_size=batch_size, show_progress=show_progress)
592
634
  )
593
635
  )
594
636
 
@@ -864,6 +906,7 @@ class AsyncOpenAIVecSeriesAccessor:
864
906
  temperature: float | None = 0.0,
865
907
  top_p: float = 1.0,
866
908
  max_concurrency: int = 8,
909
+ show_progress: bool = False,
867
910
  ) -> pd.Series:
868
911
  """Call an LLM once for every Series element (asynchronously).
869
912
 
@@ -872,6 +915,15 @@ class AsyncOpenAIVecSeriesAccessor:
872
915
  animals = pd.Series(["cat", "dog", "elephant"])
873
916
  # Must be awaited
874
917
  results = await animals.aio.responses("translate to French")
918
+
919
+ # With progress bar for large datasets
920
+ large_series = pd.Series(["data"] * 1000)
921
+ results = await large_series.aio.responses(
922
+ "analyze this data",
923
+ batch_size=32,
924
+ max_concurrency=4,
925
+ show_progress=True
926
+ )
875
927
  ```
876
928
  This method returns a Series of strings, each containing the
877
929
  assistant's response to the corresponding input.
@@ -888,6 +940,7 @@ class AsyncOpenAIVecSeriesAccessor:
888
940
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
889
941
  max_concurrency (int, optional): Maximum number of concurrent
890
942
  requests. Defaults to ``8``.
943
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
891
944
 
892
945
  Returns:
893
946
  pandas.Series: Series whose values are instances of ``response_format``.
@@ -897,13 +950,17 @@ class AsyncOpenAIVecSeriesAccessor:
897
950
  """
898
951
  return await self.responses_with_cache(
899
952
  instructions=instructions,
900
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
953
+ cache=AsyncBatchingMapProxy(
954
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
955
+ ),
901
956
  response_format=response_format,
902
957
  temperature=temperature,
903
958
  top_p=top_p,
904
959
  )
905
960
 
906
- async def embeddings(self, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
961
+ async def embeddings(
962
+ self, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
963
+ ) -> pd.Series:
907
964
  """Compute OpenAI embeddings for every Series element (asynchronously).
908
965
 
909
966
  Example:
@@ -911,6 +968,14 @@ class AsyncOpenAIVecSeriesAccessor:
911
968
  animals = pd.Series(["cat", "dog", "elephant"])
912
969
  # Must be awaited
913
970
  embeddings = await animals.aio.embeddings()
971
+
972
+ # With progress bar for large datasets
973
+ large_texts = pd.Series(["text"] * 5000)
974
+ embeddings = await large_texts.aio.embeddings(
975
+ batch_size=100,
976
+ max_concurrency=4,
977
+ show_progress=True
978
+ )
914
979
  ```
915
980
  This method returns a Series of numpy arrays, each containing the
916
981
  embedding vector for the corresponding input.
@@ -922,6 +987,7 @@ class AsyncOpenAIVecSeriesAccessor:
922
987
  single request. Defaults to ``128``.
923
988
  max_concurrency (int, optional): Maximum number of concurrent
924
989
  requests. Defaults to ``8``.
990
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
925
991
 
926
992
  Returns:
927
993
  pandas.Series: Series whose values are ``np.ndarray`` objects
@@ -931,10 +997,14 @@ class AsyncOpenAIVecSeriesAccessor:
931
997
  This is an asynchronous method and must be awaited.
932
998
  """
933
999
  return await self.embeddings_with_cache(
934
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
1000
+ cache=AsyncBatchingMapProxy(
1001
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
1002
+ ),
935
1003
  )
936
1004
 
937
- async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
1005
+ async def task(
1006
+ self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
1007
+ ) -> pd.Series:
938
1008
  """Execute a prepared task on every Series element (asynchronously).
939
1009
 
940
1010
  This method applies a pre-configured task to each element in the Series,
@@ -951,6 +1021,15 @@ class AsyncOpenAIVecSeriesAccessor:
951
1021
  reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
952
1022
  # Must be awaited
953
1023
  results = await reviews.aio.task(sentiment_task)
1024
+
1025
+ # With progress bar for large datasets
1026
+ large_reviews = pd.Series(["review text"] * 2000)
1027
+ results = await large_reviews.aio.task(
1028
+ sentiment_task,
1029
+ batch_size=50,
1030
+ max_concurrency=4,
1031
+ show_progress=True
1032
+ )
954
1033
  ```
955
1034
  This method returns a Series containing the task results for each
956
1035
  corresponding input element, following the task's defined structure.
@@ -962,6 +1041,7 @@ class AsyncOpenAIVecSeriesAccessor:
962
1041
  request to optimize API usage. Defaults to 128.
963
1042
  max_concurrency (int, optional): Maximum number of concurrent
964
1043
  requests. Defaults to 8.
1044
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
965
1045
 
966
1046
  Returns:
967
1047
  pandas.Series: Series whose values are instances of the task's
@@ -972,7 +1052,9 @@ class AsyncOpenAIVecSeriesAccessor:
972
1052
  """
973
1053
  return await self.task_with_cache(
974
1054
  task=task,
975
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
1055
+ cache=AsyncBatchingMapProxy(
1056
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
1057
+ ),
976
1058
  )
977
1059
 
978
1060
 
@@ -1056,6 +1138,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1056
1138
  temperature: float | None = 0.0,
1057
1139
  top_p: float = 1.0,
1058
1140
  max_concurrency: int = 8,
1141
+ show_progress: bool = False,
1059
1142
  ) -> pd.Series:
1060
1143
  """Generate a response for each row after serialising it to JSON (asynchronously).
1061
1144
 
@@ -1068,6 +1151,15 @@ class AsyncOpenAIVecDataFrameAccessor:
1068
1151
  ])
1069
1152
  # Must be awaited
1070
1153
  results = await df.aio.responses(\"what is the animal\'s name?\")
1154
+
1155
+ # With progress bar for large datasets
1156
+ large_df = pd.DataFrame({\"id\": list(range(1000))})
1157
+ results = await large_df.aio.responses(
1158
+ \"generate a name for this ID\",
1159
+ batch_size=20,
1160
+ max_concurrency=4,
1161
+ show_progress=True
1162
+ )
1071
1163
  ```
1072
1164
  This method returns a Series of strings, each containing the
1073
1165
  assistant's response to the corresponding input.
@@ -1085,6 +1177,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1085
1177
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
1086
1178
  max_concurrency (int, optional): Maximum number of concurrent
1087
1179
  requests. Defaults to ``8``.
1180
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1088
1181
 
1089
1182
  Returns:
1090
1183
  pandas.Series: Responses aligned with the DataFrame's original index.
@@ -1094,13 +1187,17 @@ class AsyncOpenAIVecDataFrameAccessor:
1094
1187
  """
1095
1188
  return await self.responses_with_cache(
1096
1189
  instructions=instructions,
1097
- cache=AsyncBatchingMapProxy(batch_size=batch_size, max_concurrency=max_concurrency),
1190
+ cache=AsyncBatchingMapProxy(
1191
+ batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress
1192
+ ),
1098
1193
  response_format=response_format,
1099
1194
  temperature=temperature,
1100
1195
  top_p=top_p,
1101
1196
  )
1102
1197
 
1103
- async def task(self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8) -> pd.Series:
1198
+ async def task(
1199
+ self, task: PreparedTask, batch_size: int = 128, max_concurrency: int = 8, show_progress: bool = False
1200
+ ) -> pd.Series:
1104
1201
  """Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
1105
1202
 
1106
1203
  This method applies a pre-configured task to each row in the DataFrame,
@@ -1122,6 +1219,15 @@ class AsyncOpenAIVecDataFrameAccessor:
1122
1219
  ])
1123
1220
  # Must be awaited
1124
1221
  results = await df.aio.task(analysis_task)
1222
+
1223
+ # With progress bar for large datasets
1224
+ large_df = pd.DataFrame({"id": list(range(1000))})
1225
+ results = await large_df.aio.task(
1226
+ analysis_task,
1227
+ batch_size=50,
1228
+ max_concurrency=4,
1229
+ show_progress=True
1230
+ )
1125
1231
  ```
1126
1232
  This method returns a Series containing the task results for each
1127
1233
  corresponding row, following the task's defined structure.
@@ -1133,6 +1239,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1133
1239
  to optimize API usage. Defaults to 128.
1134
1240
  max_concurrency (int, optional): Maximum number of concurrent
1135
1241
  requests. Defaults to 8.
1242
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
1136
1243
 
1137
1244
  Returns:
1138
1245
  pandas.Series: Series whose values are instances of the task's
@@ -1153,6 +1260,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1153
1260
  task=task,
1154
1261
  batch_size=batch_size,
1155
1262
  max_concurrency=max_concurrency,
1263
+ show_progress=show_progress,
1156
1264
  )
1157
1265
 
1158
1266
  async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:
@@ -51,6 +51,9 @@ from openai import OpenAI
51
51
  from openai.types.responses import ParsedResponse
52
52
  from pydantic import BaseModel
53
53
 
54
+ from openaivec.model import ResponsesModelName
55
+ from openaivec.provider import CONTAINER
56
+
54
57
  __all__ = [
55
58
  "FewShotPrompt",
56
59
  "FewShotPromptBuilder",
@@ -203,7 +206,9 @@ _PROMPT: str = """
203
206
  "iterations": [
204
207
  {
205
208
  "id": 1,
206
- "analysis": "The original purpose was vague and did not explicitly state the main objective. This ambiguity could lead to confusion about the task. In this iteration, we refined the purpose to clearly specify that the goal is to determine the correct category for a given word based on its context.",
209
+ "analysis": "The original purpose was vague and did not explicitly state the main objective.
210
+ This ambiguity could lead to confusion about the task. In this iteration, we refined the purpose to
211
+ clearly specify that the goal is to determine the correct category for a given word based on its context.",
207
212
  "prompt": {
208
213
  "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
209
214
  "cautions": [
@@ -225,7 +230,10 @@ _PROMPT: str = """
225
230
  },
226
231
  {
227
232
  "id": 2,
228
- "analysis": "Next, we focused solely on the cautions section. The original cautions were generic and did not mention potential pitfalls like homonyms or polysemy. Failing to address these could result in misclassification. Therefore, we added a specific caution regarding homonyms while keeping the purpose and examples unchanged.",
233
+ "analysis": "Next, we focused solely on the cautions section. The original cautions were generic and
234
+ did not mention potential pitfalls like homonyms or polysemy. Failing to address these could result in
235
+ misclassification. Therefore, we added a specific caution regarding homonyms while keeping the purpose
236
+ and examples unchanged.",
229
237
  "prompt": {
230
238
  "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
231
239
  "cautions": [
@@ -248,7 +256,10 @@ _PROMPT: str = """
248
256
  },
249
257
  {
250
258
  "id": 3,
251
- "analysis": "In this step, we improved the examples section to cover a broader range of scenarios and address potential ambiguities. By adding examples that include words with multiple interpretations (such as 'Mercury' for both a planet and an element), we enhance clarity and ensure better coverage. This iteration only modifies the examples section, leaving purpose and cautions intact.",
259
+ "analysis": "In this step, we improved the examples section to cover a broader range of scenarios and
260
+ address potential ambiguities. By adding examples that include words with multiple interpretations
261
+ (such as 'Mercury' for both a planet and an element), we enhance clarity and ensure better coverage.
262
+ This iteration only modifies the examples section, leaving purpose and cautions intact.",
252
263
  "prompt": {
253
264
  "purpose": "Determine the correct category for a given word by analyzing its context for clear meaning.",
254
265
  "cautions": [
@@ -409,28 +420,34 @@ class FewShotPromptBuilder:
409
420
 
410
421
  def improve(
411
422
  self,
412
- client: OpenAI,
413
- model_name: str,
414
- temperature: float = 0.0,
415
- top_p: float = 1.0,
423
+ client: OpenAI | None = None,
424
+ model_name: str | None = None,
425
+ temperature: float | None = None,
426
+ top_p: float | None = None,
416
427
  ) -> "FewShotPromptBuilder":
417
428
  """Iteratively refine the prompt using an LLM.
418
429
 
419
430
  The method calls a single LLM request that returns multiple
420
431
  editing steps and stores each step for inspection.
421
432
 
433
+ When client is None, automatically creates a client using environment variables:
434
+ - For OpenAI: ``OPENAI_API_KEY``
435
+ - For Azure OpenAI: ``AZURE_OPENAI_API_KEY``, ``AZURE_OPENAI_BASE_URL``, ``AZURE_OPENAI_API_VERSION``
436
+
422
437
  Args:
423
- client (openai.OpenAI): Configured OpenAI client.
424
- model_name (str): Model identifier (e.g. ``gpt-4.1-mini``).
425
- temperature (float, optional): Sampling temperature. Defaults to 0.0.
426
- top_p (float, optional): Nucleus sampling parameter. Defaults to 1.0.
438
+ client (OpenAI | None): Configured OpenAI client. If None, uses DI container with environment variables.
439
+ model_name (str | None): Model identifier. If None, uses default ``gpt-4.1-mini``.
440
+ temperature (float | None): Sampling temperature. If None, uses model default.
441
+ top_p (float | None): Nucleus sampling parameter. If None, uses model default.
427
442
 
428
443
  Returns:
429
444
  FewShotPromptBuilder: The current builder instance containing the refined prompt and iteration history.
430
445
  """
446
+ _client = client or CONTAINER.resolve(OpenAI)
447
+ _model_name = model_name or CONTAINER.resolve(ResponsesModelName).value
431
448
 
432
- response: ParsedResponse[Response] = client.responses.parse(
433
- model=model_name,
449
+ response: ParsedResponse[Response] = _client.responses.parse(
450
+ model=_model_name,
434
451
  instructions=_PROMPT,
435
452
  input=Request(prompt=self._prompt).model_dump_json(),
436
453
  temperature=temperature,
@@ -456,6 +473,10 @@ class FewShotPromptBuilder:
456
473
  Returns:
457
474
  FewShotPromptBuilder: The current builder instance.
458
475
  """
476
+ if not hasattr(self, "_steps") or not self._steps:
477
+ print("No improvement steps available. Call improve() first.")
478
+ return self
479
+
459
480
  for previous, current in zip(self._steps, self._steps[1:]):
460
481
  print(f"=== Iteration {current.id} ===\n")
461
482
  print(f"Instruction: {current.analysis}")
@@ -4,8 +4,8 @@ import warnings
4
4
  import tiktoken
5
5
  from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
6
6
 
7
- from . import di
8
- from .model import (
7
+ from openaivec import di
8
+ from openaivec.model import (
9
9
  AzureOpenAIAPIKey,
10
10
  AzureOpenAIAPIVersion,
11
11
  AzureOpenAIBaseURL,
@@ -13,7 +13,7 @@ from .model import (
13
13
  OpenAIAPIKey,
14
14
  ResponsesModelName,
15
15
  )
16
- from .util import TextChunker
16
+ from openaivec.util import TextChunker
17
17
 
18
18
  CONTAINER = di.Container()
19
19