openaivec 1.0.1__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {openaivec-1.0.1 → openaivec-1.0.2}/PKG-INFO +12 -12
  2. {openaivec-1.0.1 → openaivec-1.0.2}/README.md +11 -11
  3. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_cache/proxy.py +52 -15
  4. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/spark.py +74 -0
  5. {openaivec-1.0.1 → openaivec-1.0.2}/.env.example +0 -0
  6. {openaivec-1.0.1 → openaivec-1.0.2}/.github/copilot-instructions.md +0 -0
  7. {openaivec-1.0.1 → openaivec-1.0.2}/.github/dependabot.yml +0 -0
  8. {openaivec-1.0.1 → openaivec-1.0.2}/.github/workflows/docs.yml +0 -0
  9. {openaivec-1.0.1 → openaivec-1.0.2}/.github/workflows/publish.yml +0 -0
  10. {openaivec-1.0.1 → openaivec-1.0.2}/.github/workflows/test.yml +0 -0
  11. {openaivec-1.0.1 → openaivec-1.0.2}/.gitignore +0 -0
  12. {openaivec-1.0.1 → openaivec-1.0.2}/AGENTS.md +0 -0
  13. {openaivec-1.0.1 → openaivec-1.0.2}/CODE_OF_CONDUCT.md +0 -0
  14. {openaivec-1.0.1 → openaivec-1.0.2}/LICENSE +0 -0
  15. {openaivec-1.0.1 → openaivec-1.0.2}/SECURITY.md +0 -0
  16. {openaivec-1.0.1 → openaivec-1.0.2}/SUPPORT.md +0 -0
  17. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/main.md +0 -0
  18. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/pandas_ext.md +0 -0
  19. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/spark.md +0 -0
  20. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/task.md +0 -0
  21. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/customer_support/customer_sentiment.md +0 -0
  22. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/customer_support/inquiry_classification.md +0 -0
  23. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/customer_support/inquiry_summary.md +0 -0
  24. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/customer_support/intent_analysis.md +0 -0
  25. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/customer_support/response_suggestion.md +0 -0
  26. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/customer_support/urgency_analysis.md +0 -0
  27. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/nlp/dependency_parsing.md +0 -0
  28. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/nlp/keyword_extraction.md +0 -0
  29. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/nlp/morphological_analysis.md +0 -0
  30. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/nlp/named_entity_recognition.md +0 -0
  31. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/nlp/sentiment_analysis.md +0 -0
  32. {openaivec-1.0.1 → openaivec-1.0.2}/docs/api/tasks/nlp/translation.md +0 -0
  33. {openaivec-1.0.1 → openaivec-1.0.2}/docs/contributor-guide.md +0 -0
  34. {openaivec-1.0.1 → openaivec-1.0.2}/docs/index.md +0 -0
  35. {openaivec-1.0.1 → openaivec-1.0.2}/docs/robots.txt +0 -0
  36. {openaivec-1.0.1 → openaivec-1.0.2}/mkdocs.yml +0 -0
  37. {openaivec-1.0.1 → openaivec-1.0.2}/pyproject.toml +0 -0
  38. {openaivec-1.0.1 → openaivec-1.0.2}/pytest.ini +0 -0
  39. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/__init__.py +0 -0
  40. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_cache/__init__.py +0 -0
  41. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_cache/optimize.py +0 -0
  42. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_di.py +0 -0
  43. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_embeddings.py +0 -0
  44. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_log.py +0 -0
  45. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_model.py +0 -0
  46. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_prompt.py +0 -0
  47. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_provider.py +0 -0
  48. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_responses.py +0 -0
  49. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_schema/__init__.py +0 -0
  50. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_schema/infer.py +0 -0
  51. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_schema/spec.py +0 -0
  52. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_serialize.py +0 -0
  53. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/_util.py +0 -0
  54. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/pandas_ext.py +0 -0
  55. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/__init__.py +0 -0
  56. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/customer_support/__init__.py +0 -0
  57. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/customer_support/customer_sentiment.py +0 -0
  58. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/customer_support/inquiry_classification.py +0 -0
  59. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/customer_support/inquiry_summary.py +0 -0
  60. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/customer_support/intent_analysis.py +0 -0
  61. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/customer_support/response_suggestion.py +0 -0
  62. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/customer_support/urgency_analysis.py +0 -0
  63. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/nlp/__init__.py +0 -0
  64. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/nlp/dependency_parsing.py +0 -0
  65. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/nlp/keyword_extraction.py +0 -0
  66. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/nlp/morphological_analysis.py +0 -0
  67. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/nlp/named_entity_recognition.py +0 -0
  68. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/nlp/sentiment_analysis.py +0 -0
  69. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/nlp/translation.py +0 -0
  70. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/table/__init__.py +0 -0
  71. {openaivec-1.0.1 → openaivec-1.0.2}/src/openaivec/task/table/fillna.py +0 -0
  72. {openaivec-1.0.1 → openaivec-1.0.2}/tests/__init__.py +0 -0
  73. {openaivec-1.0.1 → openaivec-1.0.2}/tests/_cache/test_optimize.py +0 -0
  74. {openaivec-1.0.1 → openaivec-1.0.2}/tests/_cache/test_proxy.py +0 -0
  75. {openaivec-1.0.1 → openaivec-1.0.2}/tests/_cache/test_proxy_suggester.py +0 -0
  76. {openaivec-1.0.1 → openaivec-1.0.2}/tests/_schema/test_infer.py +0 -0
  77. {openaivec-1.0.1 → openaivec-1.0.2}/tests/_schema/test_spec.py +0 -0
  78. {openaivec-1.0.1 → openaivec-1.0.2}/tests/conftest.py +0 -0
  79. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_di.py +0 -0
  80. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_embeddings.py +0 -0
  81. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_pandas_ext.py +0 -0
  82. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_prompt.py +0 -0
  83. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_provider.py +0 -0
  84. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_responses.py +0 -0
  85. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_serialize.py +0 -0
  86. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_serialize_pydantic_v2_compliance.py +0 -0
  87. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_spark.py +0 -0
  88. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_task.py +0 -0
  89. {openaivec-1.0.1 → openaivec-1.0.2}/tests/test_util.py +0 -0
  90. {openaivec-1.0.1 → openaivec-1.0.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -57,7 +57,7 @@ reviews = pd.Series([
57
57
 
58
58
  sentiment = reviews.ai.responses(
59
59
  "Summarize sentiment in one short sentence.",
60
- reasoning={"effort": "medium"}, # Mirrors OpenAI SDK for reasoning models
60
+ reasoning={"effort": "none"}, # Mirrors OpenAI SDK for reasoning models
61
61
  )
62
62
  print(sentiment.tolist())
63
63
  ```
@@ -109,7 +109,7 @@ client = BatchResponses.of(
109
109
 
110
110
  result = client.parse(
111
111
  ["panda", "rabbit", "koala"],
112
- reasoning={"effort": "medium"}, # Required for gpt-5.1
112
+ reasoning={"effort": "none"},
113
113
  )
114
114
  print(result) # Expected output: ['bear family', 'rabbit family', 'koala family']
115
115
  ```
@@ -147,15 +147,15 @@ df = pd.DataFrame({"name": ["panda", "rabbit", "koala"]})
147
147
  result = df.assign(
148
148
  family=lambda df: df.name.ai.responses(
149
149
  "What animal family? Answer with 'X family'",
150
- reasoning={"effort": "medium"},
150
+ reasoning={"effort": "none"},
151
151
  ),
152
152
  habitat=lambda df: df.name.ai.responses(
153
153
  "Primary habitat in one word",
154
- reasoning={"effort": "medium"},
154
+ reasoning={"effort": "none"},
155
155
  ),
156
156
  fun_fact=lambda df: df.name.ai.responses(
157
157
  "One interesting fact in 10 words or less",
158
- reasoning={"effort": "medium"},
158
+ reasoning={"effort": "none"},
159
159
  ),
160
160
  )
161
161
  ```
@@ -178,7 +178,7 @@ pandas_ext.set_responses_model("o1-mini") # Set your reasoning model
178
178
  result = df.assign(
179
179
  analysis=lambda df: df.text.ai.responses(
180
180
  "Analyze this text step by step",
181
- reasoning={"effort": "medium"} # Optional: mirrors the OpenAI SDK argument
181
+ reasoning={"effort": "none"} # Optional: mirrors the OpenAI SDK argument
182
182
  )
183
183
  )
184
184
  ```
@@ -232,7 +232,7 @@ df = pd.DataFrame({"text": [
232
232
  async def process_data():
233
233
  return await df["text"].aio.responses(
234
234
  "Analyze sentiment and classify as positive/negative/neutral",
235
- reasoning={"effort": "medium"}, # Required for gpt-5.1
235
+ reasoning={"effort": "none"}, # Required for gpt-5.1
236
236
  max_concurrency=12 # Allow up to 12 concurrent requests
237
237
  )
238
238
 
@@ -284,7 +284,7 @@ spark.udf.register(
284
284
  "extract_brand",
285
285
  responses_udf(
286
286
  instructions="Extract the brand name from the product. Return only the brand name.",
287
- reasoning={"effort": "medium"}, # Recommended with gpt-5.1
287
+ reasoning={"effort": "none"}, # Recommended with gpt-5.1
288
288
  )
289
289
  )
290
290
 
@@ -298,7 +298,7 @@ spark.udf.register(
298
298
  responses_udf(
299
299
  instructions="Translate the text to English, French, and Japanese.",
300
300
  response_format=Translation,
301
- reasoning={"effort": "medium"}, # Recommended with gpt-5.1
301
+ reasoning={"effort": "none"}, # Recommended with gpt-5.1
302
302
  )
303
303
  )
304
304
 
@@ -336,7 +336,7 @@ prompt = (
336
336
 
337
337
  ## Using with Microsoft Fabric
338
338
 
339
- [Microsoft Fabric](https://www.microsoft.com/en-us/microsoft-fabric/) is a unified, cloud-based analytics platform. Add `openaivec` from PyPI in your Fabric environment, select it in your notebook, and use `openaivec.spark` like standard Spark. Detailed walkthrough: 📓 **[Fabric guide →](https://microsoft.github.io/openaivec/examples/fabric/)**.
339
+ [Microsoft Fabric](https://www.microsoft.com/en-us/microsoft-fabric/) is a unified, cloud-based analytics platform. Add `openaivec` from PyPI in your Fabric environment, select it in your notebook, and use `openaivec.spark` like standard Spark.
340
340
 
341
341
  ## Contributing
342
342
 
@@ -374,4 +374,4 @@ uv run pytest -m "not slow and not requires_api"
374
374
 
375
375
  ## Community
376
376
 
377
- Join our Discord community for support and announcements: https://discord.gg/vbb83Pgn
377
+ Join our Discord community for support and announcements: https://discord.gg/hXCS9J6Qek
@@ -31,7 +31,7 @@ reviews = pd.Series([
31
31
 
32
32
  sentiment = reviews.ai.responses(
33
33
  "Summarize sentiment in one short sentence.",
34
- reasoning={"effort": "medium"}, # Mirrors OpenAI SDK for reasoning models
34
+ reasoning={"effort": "none"}, # Mirrors OpenAI SDK for reasoning models
35
35
  )
36
36
  print(sentiment.tolist())
37
37
  ```
@@ -83,7 +83,7 @@ client = BatchResponses.of(
83
83
 
84
84
  result = client.parse(
85
85
  ["panda", "rabbit", "koala"],
86
- reasoning={"effort": "medium"}, # Required for gpt-5.1
86
+ reasoning={"effort": "none"},
87
87
  )
88
88
  print(result) # Expected output: ['bear family', 'rabbit family', 'koala family']
89
89
  ```
@@ -121,15 +121,15 @@ df = pd.DataFrame({"name": ["panda", "rabbit", "koala"]})
121
121
  result = df.assign(
122
122
  family=lambda df: df.name.ai.responses(
123
123
  "What animal family? Answer with 'X family'",
124
- reasoning={"effort": "medium"},
124
+ reasoning={"effort": "none"},
125
125
  ),
126
126
  habitat=lambda df: df.name.ai.responses(
127
127
  "Primary habitat in one word",
128
- reasoning={"effort": "medium"},
128
+ reasoning={"effort": "none"},
129
129
  ),
130
130
  fun_fact=lambda df: df.name.ai.responses(
131
131
  "One interesting fact in 10 words or less",
132
- reasoning={"effort": "medium"},
132
+ reasoning={"effort": "none"},
133
133
  ),
134
134
  )
135
135
  ```
@@ -152,7 +152,7 @@ pandas_ext.set_responses_model("o1-mini") # Set your reasoning model
152
152
  result = df.assign(
153
153
  analysis=lambda df: df.text.ai.responses(
154
154
  "Analyze this text step by step",
155
- reasoning={"effort": "medium"} # Optional: mirrors the OpenAI SDK argument
155
+ reasoning={"effort": "none"} # Optional: mirrors the OpenAI SDK argument
156
156
  )
157
157
  )
158
158
  ```
@@ -206,7 +206,7 @@ df = pd.DataFrame({"text": [
206
206
  async def process_data():
207
207
  return await df["text"].aio.responses(
208
208
  "Analyze sentiment and classify as positive/negative/neutral",
209
- reasoning={"effort": "medium"}, # Required for gpt-5.1
209
+ reasoning={"effort": "none"}, # Required for gpt-5.1
210
210
  max_concurrency=12 # Allow up to 12 concurrent requests
211
211
  )
212
212
 
@@ -258,7 +258,7 @@ spark.udf.register(
258
258
  "extract_brand",
259
259
  responses_udf(
260
260
  instructions="Extract the brand name from the product. Return only the brand name.",
261
- reasoning={"effort": "medium"}, # Recommended with gpt-5.1
261
+ reasoning={"effort": "none"}, # Recommended with gpt-5.1
262
262
  )
263
263
  )
264
264
 
@@ -272,7 +272,7 @@ spark.udf.register(
272
272
  responses_udf(
273
273
  instructions="Translate the text to English, French, and Japanese.",
274
274
  response_format=Translation,
275
- reasoning={"effort": "medium"}, # Recommended with gpt-5.1
275
+ reasoning={"effort": "none"}, # Recommended with gpt-5.1
276
276
  )
277
277
  )
278
278
 
@@ -310,7 +310,7 @@ prompt = (
310
310
 
311
311
  ## Using with Microsoft Fabric
312
312
 
313
- [Microsoft Fabric](https://www.microsoft.com/en-us/microsoft-fabric/) is a unified, cloud-based analytics platform. Add `openaivec` from PyPI in your Fabric environment, select it in your notebook, and use `openaivec.spark` like standard Spark. Detailed walkthrough: 📓 **[Fabric guide →](https://microsoft.github.io/openaivec/examples/fabric/)**.
313
+ [Microsoft Fabric](https://www.microsoft.com/en-us/microsoft-fabric/) is a unified, cloud-based analytics platform. Add `openaivec` from PyPI in your Fabric environment, select it in your notebook, and use `openaivec.spark` like standard Spark.
314
314
 
315
315
  ## Contributing
316
316
 
@@ -348,4 +348,4 @@ uv run pytest -m "not slow and not requires_api"
348
348
 
349
349
  ## Community
350
350
 
351
- Join our Discord community for support and announcements: https://discord.gg/vbb83Pgn
351
+ Join our Discord community for support and announcements: https://discord.gg/hXCS9J6Qek
@@ -186,11 +186,15 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
186
186
  performance (targeting 30-60 seconds per batch).
187
187
 
188
188
  Example:
189
- >>> p = BatchingMapProxy[int, str](batch_size=3)
190
- >>> def f(xs: list[int]) -> list[str]:
191
- ... return [f"v:{x}" for x in xs]
192
- >>> p.map([1, 2, 2, 3, 4], f)
193
- ['v:1', 'v:2', 'v:2', 'v:3', 'v:4']
189
+ ```python
190
+ p = BatchingMapProxy[int, str](batch_size=3)
191
+
192
+ def f(xs: list[int]) -> list[str]:
193
+ return [f"v:{x}" for x in xs]
194
+
195
+ p.map([1, 2, 2, 3, 4], f)
196
+ # ['v:1', 'v:2', 'v:2', 'v:3', 'v:4']
197
+ ```
194
198
  """
195
199
 
196
200
  # Number of items to process per call to map_func.
@@ -449,6 +453,21 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
449
453
 
450
454
  Raises:
451
455
  Exception: Propagates any exception raised by ``map_func``.
456
+
457
+ Example:
458
+ ```python
459
+ proxy: BatchingMapProxy[int, str] = BatchingMapProxy(batch_size=2)
460
+ calls: list[list[int]] = []
461
+
462
+ def mapper(chunk: list[int]) -> list[str]:
463
+ calls.append(chunk)
464
+ return [f"v:{x}" for x in chunk]
465
+
466
+ proxy.map([1, 2, 2, 3], mapper)
467
+ # ['v:1', 'v:2', 'v:2', 'v:3']
468
+ calls # duplicate ``2`` is only computed once
469
+ # [[1, 2], [3]]
470
+ ```
452
471
  """
453
472
  if self.__all_cached(items):
454
473
  return self.__values(items)
@@ -490,16 +509,21 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
490
509
  performance (targeting 30-60 seconds per batch).
491
510
 
492
511
  Example:
493
- >>> import asyncio
494
- >>> from typing import List
495
- >>> p = AsyncBatchingMapProxy[int, str](batch_size=2)
496
- >>> async def af(xs: list[int]) -> list[str]:
497
- ... await asyncio.sleep(0)
498
- ... return [f"v:{x}" for x in xs]
499
- >>> async def run():
500
- ... return await p.map([1, 2, 3], af)
501
- >>> asyncio.run(run())
502
- ['v:1', 'v:2', 'v:3']
512
+ ```python
513
+ import asyncio
514
+
515
+ p = AsyncBatchingMapProxy[int, str](batch_size=2)
516
+
517
+ async def af(xs: list[int]) -> list[str]:
518
+ await asyncio.sleep(0)
519
+ return [f"v:{x}" for x in xs]
520
+
521
+ async def run():
522
+ return await p.map([1, 2, 3], af)
523
+
524
+ asyncio.run(run())
525
+ # ['v:1', 'v:2', 'v:3']
526
+ ```
503
527
  """
504
528
 
505
529
  # Number of items to process per call to map_func.
@@ -747,6 +771,19 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
747
771
 
748
772
  Returns:
749
773
  list[T]: Mapped values corresponding to ``items`` in the same order.
774
+
775
+ Example:
776
+ ```python
777
+ import asyncio
778
+
779
+ async def mapper(chunk: list[int]) -> list[str]:
780
+ await asyncio.sleep(0)
781
+ return [f"v:{x}" for x in chunk]
782
+
783
+ proxy: AsyncBatchingMapProxy[int, str] = AsyncBatchingMapProxy(batch_size=2)
784
+ asyncio.run(proxy.map([1, 1, 2], mapper))
785
+ # ['v:1', 'v:1', 'v:2']
786
+ ```
750
787
  """
751
788
  if await self.__all_cached(items):
752
789
  return await self.__values(items)
@@ -181,6 +181,20 @@ def setup(
181
181
  If provided, registers `ResponsesModelName` in the DI container.
182
182
  embeddings_model_name (str | None): Default model name for embeddings.
183
183
  If provided, registers `EmbeddingsModelName` in the DI container.
184
+
185
+ Example:
186
+ ```python
187
+ from pyspark.sql import SparkSession
188
+ from openaivec.spark import setup
189
+
190
+ spark = SparkSession.builder.getOrCreate()
191
+ setup(
192
+ spark,
193
+ api_key="sk-***",
194
+ responses_model_name="gpt-4.1-mini",
195
+ embeddings_model_name="text-embedding-3-small",
196
+ )
197
+ ```
184
198
  """
185
199
 
186
200
  CONTAINER.register(SparkSession, lambda: spark)
@@ -221,6 +235,22 @@ def setup_azure(
221
235
  If provided, registers `ResponsesModelName` in the DI container.
222
236
  embeddings_model_name (str | None): Default model name for embeddings.
223
237
  If provided, registers `EmbeddingsModelName` in the DI container.
238
+
239
+ Example:
240
+ ```python
241
+ from pyspark.sql import SparkSession
242
+ from openaivec.spark import setup_azure
243
+
244
+ spark = SparkSession.builder.getOrCreate()
245
+ setup_azure(
246
+ spark,
247
+ api_key="azure-key",
248
+ base_url="https://YOUR-RESOURCE-NAME.services.ai.azure.com/openai/v1/",
249
+ api_version="preview",
250
+ responses_model_name="gpt4-deployment",
251
+ embeddings_model_name="embedding-deployment",
252
+ )
253
+ ```
224
254
  """
225
255
 
226
256
  CONTAINER.register(SparkSession, lambda: spark)
@@ -375,6 +405,19 @@ def responses_udf(
375
405
  Raises:
376
406
  ValueError: If `response_format` is not `str` or a Pydantic `BaseModel`.
377
407
 
408
+ Example:
409
+ ```python
410
+ from pyspark.sql import SparkSession
411
+ from openaivec.spark import responses_udf, setup
412
+
413
+ spark = SparkSession.builder.getOrCreate()
414
+ setup(spark, api_key="sk-***", responses_model_name="gpt-4.1-mini")
415
+ udf = responses_udf("Reply with one word.")
416
+ spark.udf.register("short_answer", udf)
417
+ df = spark.createDataFrame([("hello",), ("bye",)], ["text"])
418
+ df.selectExpr("short_answer(text) as reply").show()
419
+ ```
420
+
378
421
  Note:
379
422
  For optimal performance in distributed environments:
380
423
  - **Automatic Caching**: Duplicate inputs within each partition are cached,
@@ -533,6 +576,20 @@ def infer_schema(
533
576
 
534
577
  Returns:
535
578
  InferredSchema: An object containing the inferred schema and response format.
579
+
580
+ Example:
581
+ ```python
582
+ from pyspark.sql import SparkSession
583
+
584
+ spark = SparkSession.builder.getOrCreate()
585
+ spark.createDataFrame([("great product",), ("bad service",)], ["text"]).createOrReplaceTempView("examples")
586
+ infer_schema(
587
+ instructions="Classify sentiment as positive or negative.",
588
+ example_table_name="examples",
589
+ example_field_name="text",
590
+ max_examples=2,
591
+ )
592
+ ```
536
593
  """
537
594
 
538
595
  spark = CONTAINER.resolve(SparkSession)
@@ -595,6 +652,23 @@ def parse_udf(
595
652
  forwarded verbatim to the underlying API calls. These parameters are applied to
596
653
  all API requests made by the UDF and override any parameters set in the
597
654
  response_format or example data.
655
+ Example:
656
+ ```python
657
+ from pyspark.sql import SparkSession
658
+
659
+ spark = SparkSession.builder.getOrCreate()
660
+ spark.createDataFrame(
661
+ [("Order #123 delivered",), ("Order #456 delayed",)],
662
+ ["body"],
663
+ ).createOrReplaceTempView("messages")
664
+ udf = parse_udf(
665
+ instructions="Extract order id as `order_id` and status as `status`.",
666
+ example_table_name="messages",
667
+ example_field_name="body",
668
+ )
669
+ spark.udf.register("parse_ticket", udf)
670
+ spark.sql("SELECT parse_ticket(body) AS parsed FROM messages").show()
671
+ ```
598
672
  Returns:
599
673
  UserDefinedFunction: A Spark pandas UDF configured to parse responses asynchronously.
600
674
  Output schema is `StringType` for str response format or a struct derived from
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes