openaivec 0.14.3__py3-none-any.whl → 0.14.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/_proxy.py CHANGED
@@ -460,7 +460,20 @@ class BatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
460
460
  self.__process_owned(owned, map_func)
461
461
  self.__wait_for(wait_for, map_func)
462
462
 
463
- return self.__values(items)
463
+ # Fetch results before purging None entries
464
+ results = self.__values(items)
465
+
466
+ # Remove None values from cache so they are recomputed on future calls
467
+ with self._lock:
468
+ if self._cache: # micro-optimization
469
+ for k in set(items):
470
+ try:
471
+ if self._cache.get(k, object()) is None:
472
+ del self._cache[k]
473
+ except KeyError:
474
+ pass
475
+
476
+ return results
464
477
 
465
478
 
466
479
  @dataclass
@@ -745,4 +758,13 @@ class AsyncBatchingMapProxy(ProxyBase[S, T], Generic[S, T]):
745
758
  await self.__process_owned(owned, map_func)
746
759
  await self.__wait_for(wait_for, map_func)
747
760
 
748
- return await self.__values(items)
761
+ results = await self.__values(items)
762
+
763
+ # Remove None values from cache after retrieval to avoid persisting incomplete results
764
+ async with self._lock:
765
+ if self._cache:
766
+ for k in set(items):
767
+ if self._cache.get(k, object()) is None:
768
+ self._cache.pop(k, None)
769
+
770
+ return results
openaivec/_schema.py CHANGED
@@ -128,6 +128,12 @@ class InferredSchema(BaseModel):
128
128
  redundancy removed).
129
129
  examples_summary: Neutral description of structural / semantic patterns
130
130
  observed in the examples (domain, recurring signals, constraints).
131
+ examples_purpose_alignment: Analytical explanation of how the concrete
132
+ recurring patterns in the provided examples *justify*, *constrain*,
133
+ or *refine* the stated purpose. Should map purpose facets to
134
+ observed evidence (or explicitly note gaps) to discourage
135
+ hallucinated fields and anchor extraction scope. This is an
136
+ internal quality aid – downstream consumers typically ignore it.
131
137
  fields: Ordered list of ``FieldSpec`` objects comprising the schema's
132
138
  sole authoritative contract.
133
139
  inference_prompt: Self-contained extraction instructions enforcing an
@@ -147,6 +153,13 @@ class InferredSchema(BaseModel):
147
153
  "patterns, and notable constraints."
148
154
  )
149
155
  )
156
+ examples_purpose_alignment: str = Field(
157
+ description=(
158
+ "Explanation of how observable recurring patterns in the examples substantiate and bound the stated "
159
+ "purpose. Should reference purpose facets and cite supporting example evidence (or note any gaps) to "
160
+ "reduce hallucinated fields. Internal diagnostic / quality aid; not required for downstream extraction."
161
+ )
162
+ )
150
163
  fields: List[FieldSpec] = Field(
151
164
  description=(
152
165
  "Ordered list of proposed fields derived strictly from observable, repeatable signals in the "
@@ -234,7 +247,7 @@ class InferredSchema(BaseModel):
234
247
  py_type = enum_cls
235
248
  else:
236
249
  py_type = type_map[spec.type]
237
- fields[spec.name] = (py_type, ...)
250
+ fields[spec.name] = (py_type, Field(description=spec.description))
238
251
 
239
252
  model = create_model("InferredSchema", **fields) # type: ignore[call-arg]
240
253
  return model
@@ -281,11 +294,15 @@ You are a schema inference engine.
281
294
  Task:
282
295
  1. Normalize the user's purpose (eliminate ambiguity, redundancy, contradictions).
283
296
  2. Objectively summarize observable patterns in the example texts.
284
- 3. Propose a minimal flat set of scalar fields (no nesting / arrays) that are reliably extractable.
285
- 4. Skip fields likely missing in a large share (>~20%) of realistic inputs.
286
- 5. Provide enum_values ONLY when a small stable closed categorical set (2–24 lowercase tokens)
297
+ 3. Produce an "examples_purpose_alignment" explanation that explicitly maps purpose facets
298
+ to concrete recurring evidence in the examples (or flags gaps). Use concise bullet‑style
299
+ sentences (still a plain string) such as: "purpose facet -> supporting pattern / gap".
300
+ This MUST NOT introduce new domain facts beyond the examples & purpose.
301
+ 4. Propose a minimal flat set of scalar fields (no nesting / arrays) that are reliably extractable.
302
+ 5. Skip fields likely missing in a large share (>~20%) of realistic inputs.
303
+ 6. Provide enum_values ONLY when a small stable closed categorical set (2–24 lowercase tokens)
287
304
  is clearly evidenced; never invent.
288
- 6. If the purpose indicates prediction (predict / probability / likelihood), output only
305
+ 7. If the purpose indicates prediction (predict / probability / likelihood), output only
289
306
  explanatory features (no target restatement).
290
307
 
291
308
  Rules:
@@ -305,6 +322,7 @@ Output contract:
305
322
  Return exactly an InferredSchema object with JSON keys:
306
323
  - purpose (string)
307
324
  - examples_summary (string)
325
+ - examples_purpose_alignment (string)
308
326
  - fields (array of FieldSpec objects: name, type, description, enum_values?)
309
327
  - inference_prompt (string)
310
328
  """.strip()
@@ -359,10 +377,31 @@ class SchemaInferer:
359
377
  raise ValueError("max_retries must be >= 1")
360
378
 
361
379
  last_err: Exception | None = None
380
+ previous_errors: list[str] = []
362
381
  for attempt in range(max_retries):
382
+ if attempt == 0:
383
+ instructions = _INFER_INSTRUCTIONS
384
+ else:
385
+ # Provide structured feedback for correction. Keep concise and prohibit speculative expansion.
386
+ feedback_lines = [
387
+ "--- PRIOR VALIDATION FEEDBACK ---",
388
+ ]
389
+ for i, err in enumerate(previous_errors[-5:], 1): # include last up to 5 errors
390
+ feedback_lines.append(f"{i}. {err}")
391
+ feedback_lines.extend(
392
+ [
393
+ "Adjust ONLY listed issues; avoid adding brand-new fields unless essential.",
394
+ "Don't hallucinate or broaden enum_values unless enum rule caused failure.",
395
+ "Duplicate names: minimally rename; keep semantics.",
396
+ "Unsupported type: change to string|integer|float|boolean (no new facts).",
397
+ "Bad enum length: drop enum or constrain to 2–24 evidenced tokens.",
398
+ ]
399
+ )
400
+ instructions = _INFER_INSTRUCTIONS + "\n\n" + "\n".join(feedback_lines)
401
+
363
402
  response: ParsedResponse[InferredSchema] = self.client.responses.parse(
364
403
  model=self.model_name,
365
- instructions=_INFER_INSTRUCTIONS,
404
+ instructions=instructions,
366
405
  input=data.model_dump_json(),
367
406
  text_format=InferredSchema,
368
407
  *args,
@@ -371,8 +410,10 @@ class SchemaInferer:
371
410
  parsed = response.output_parsed
372
411
  try:
373
412
  _basic_field_list_validation(parsed)
413
+ parsed.build_model() # ensure dynamic model creation succeeds
374
414
  except ValueError as e:
375
415
  last_err = e
416
+ previous_errors.append(str(e))
376
417
  if attempt == max_retries - 1:
377
418
  raise
378
419
  continue
openaivec/pandas_ext.py CHANGED
@@ -182,6 +182,27 @@ class OpenAIVecSeriesAccessor:
182
182
  top_p: float = 1.0,
183
183
  **api_kwargs,
184
184
  ) -> pd.Series:
185
+ """Call an LLM once for every Series element using a provided cache.
186
+
187
+ This is a lower-level method that allows explicit cache management for advanced
188
+ use cases. Most users should use the standard ``responses`` method instead.
189
+
190
+ Args:
191
+ instructions (str): System prompt prepended to every user message.
192
+ cache (BatchingMapProxy[str, ResponseFormat]): Explicit cache instance for
193
+ batching and deduplication control.
194
+ response_format (Type[ResponseFormat], optional): Pydantic model or built-in
195
+ type the assistant should return. Defaults to ``str``.
196
+ temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
197
+ top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
198
+
199
+ Additional Keyword Args:
200
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
201
+ ``seed``, etc.) are forwarded verbatim to the underlying client.
202
+
203
+ Returns:
204
+ pandas.Series: Series whose values are instances of ``response_format``.
205
+ """
185
206
  client: BatchResponses = BatchResponses(
186
207
  client=CONTAINER.resolve(OpenAI),
187
208
  model_name=CONTAINER.resolve(ResponsesModelName).value,
@@ -195,6 +216,56 @@ class OpenAIVecSeriesAccessor:
195
216
  # Forward any extra kwargs to the underlying Responses API.
196
217
  return pd.Series(client.parse(self._obj.tolist(), **api_kwargs), index=self._obj.index, name=self._obj.name)
197
218
 
219
+ def responses(
220
+ self,
221
+ instructions: str,
222
+ response_format: Type[ResponseFormat] = str,
223
+ batch_size: int | None = None,
224
+ temperature: float | None = 0.0,
225
+ top_p: float = 1.0,
226
+ show_progress: bool = False,
227
+ **api_kwargs,
228
+ ) -> pd.Series:
229
+ """Call an LLM once for every Series element.
230
+
231
+ Example:
232
+ ```python
233
+ animals = pd.Series(["cat", "dog", "elephant"])
234
+ # Basic usage
235
+ animals.ai.responses("translate to French")
236
+
237
+ # With progress bar in Jupyter notebooks
238
+ large_series = pd.Series(["data"] * 1000)
239
+ large_series.ai.responses(
240
+ "analyze this data",
241
+ batch_size=32,
242
+ show_progress=True
243
+ )
244
+ ```
245
+
246
+ Args:
247
+ instructions (str): System prompt prepended to every user message.
248
+ response_format (Type[ResponseFormat], optional): Pydantic model or built‑in
249
+ type the assistant should return. Defaults to ``str``.
250
+ batch_size (int | None, optional): Number of prompts grouped into a single
251
+ request. Defaults to ``None`` (automatic batch size optimization
252
+ based on execution time). Set to a positive integer for fixed batch size.
253
+ temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
254
+ top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
255
+ show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
256
+
257
+ Returns:
258
+ pandas.Series: Series whose values are instances of ``response_format``.
259
+ """
260
+ return self.responses_with_cache(
261
+ instructions=instructions,
262
+ cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
263
+ response_format=response_format,
264
+ temperature=temperature,
265
+ top_p=top_p,
266
+ **api_kwargs,
267
+ )
268
+
198
269
  def embeddings_with_cache(
199
270
  self,
200
271
  cache: BatchingMapProxy[str, np.ndarray],
@@ -205,15 +276,6 @@ class OpenAIVecSeriesAccessor:
205
276
  a pre-configured BatchingMapProxy instance, enabling cache sharing
206
277
  across multiple operations or custom batch size management.
207
278
 
208
- Args:
209
- cache (BatchingMapProxy[str, np.ndarray]): Pre-configured cache
210
- instance for managing API call batching and deduplication.
211
- Set cache.batch_size=None to enable automatic batch size optimization.
212
-
213
- Returns:
214
- pandas.Series: Series whose values are ``np.ndarray`` objects
215
- (dtype ``float32``).
216
-
217
279
  Example:
218
280
  ```python
219
281
  from openaivec._proxy import BatchingMapProxy
@@ -225,6 +287,15 @@ class OpenAIVecSeriesAccessor:
225
287
  animals = pd.Series(["cat", "dog", "elephant"])
226
288
  embeddings = animals.ai.embeddings_with_cache(cache=shared_cache)
227
289
  ```
290
+
291
+ Args:
292
+ cache (BatchingMapProxy[str, np.ndarray]): Pre-configured cache
293
+ instance for managing API call batching and deduplication.
294
+ Set cache.batch_size=None to enable automatic batch size optimization.
295
+
296
+ Returns:
297
+ pandas.Series: Series whose values are ``np.ndarray`` objects
298
+ (dtype ``float32``).
228
299
  """
229
300
  client: BatchEmbeddings = BatchEmbeddings(
230
301
  client=CONTAINER.resolve(OpenAI),
@@ -238,54 +309,35 @@ class OpenAIVecSeriesAccessor:
238
309
  name=self._obj.name,
239
310
  )
240
311
 
241
- def responses(
242
- self,
243
- instructions: str,
244
- response_format: Type[ResponseFormat] = str,
245
- batch_size: int | None = None,
246
- temperature: float | None = 0.0,
247
- top_p: float = 1.0,
248
- show_progress: bool = False,
249
- **api_kwargs,
250
- ) -> pd.Series:
251
- """Call an LLM once for every Series element.
312
+ def embeddings(self, batch_size: int | None = None, show_progress: bool = False) -> pd.Series:
313
+ """Compute OpenAI embeddings for every Series element.
252
314
 
253
315
  Example:
254
316
  ```python
255
317
  animals = pd.Series(["cat", "dog", "elephant"])
256
318
  # Basic usage
257
- animals.ai.responses("translate to French")
319
+ animals.ai.embeddings()
258
320
 
259
- # With progress bar in Jupyter notebooks
260
- large_series = pd.Series(["data"] * 1000)
261
- large_series.ai.responses(
262
- "analyze this data",
263
- batch_size=32,
321
+ # With progress bar for large datasets
322
+ large_texts = pd.Series(["text"] * 5000)
323
+ embeddings = large_texts.ai.embeddings(
324
+ batch_size=100,
264
325
  show_progress=True
265
326
  )
266
327
  ```
267
328
 
268
329
  Args:
269
- instructions (str): System prompt prepended to every user message.
270
- response_format (Type[ResponseFormat], optional): Pydantic model or built‑in
271
- type the assistant should return. Defaults to ``str``.
272
- batch_size (int | None, optional): Number of prompts grouped into a single
273
- request. Defaults to ``None`` (automatic batch size optimization
330
+ batch_size (int | None, optional): Number of inputs grouped into a
331
+ single request. Defaults to ``None`` (automatic batch size optimization
274
332
  based on execution time). Set to a positive integer for fixed batch size.
275
- temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
276
- top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
277
333
  show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
278
334
 
279
335
  Returns:
280
- pandas.Series: Series whose values are instances of ``response_format``.
336
+ pandas.Series: Series whose values are ``np.ndarray`` objects
337
+ (dtype ``float32``).
281
338
  """
282
- return self.responses_with_cache(
283
- instructions=instructions,
339
+ return self.embeddings_with_cache(
284
340
  cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
285
- response_format=response_format,
286
- temperature=temperature,
287
- top_p=top_p,
288
- **api_kwargs,
289
341
  )
290
342
 
291
343
  def task_with_cache(
@@ -300,6 +352,13 @@ class OpenAIVecSeriesAccessor:
300
352
  response format, temperature and top_p. A supplied ``BatchingMapProxy`` enables
301
353
  cross‑operation deduplicated reuse and external batch size / progress control.
302
354
 
355
+ Example:
356
+ ```python
357
+ from openaivec._proxy import BatchingMapProxy
358
+ shared_cache = BatchingMapProxy(batch_size=64)
359
+ reviews.ai.task_with_cache(sentiment_task, cache=shared_cache)
360
+ ```
361
+
303
362
  Args:
304
363
  task (PreparedTask): Prepared task (instructions + response_format + sampling params).
305
364
  cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
@@ -311,13 +370,6 @@ class OpenAIVecSeriesAccessor:
311
370
 
312
371
  Returns:
313
372
  pandas.Series: Task results aligned with the original Series index.
314
-
315
- Example:
316
- ```python
317
- from openaivec._proxy import BatchingMapProxy
318
- shared_cache = BatchingMapProxy(batch_size=64)
319
- reviews.ai.task_with_cache(sentiment_task, cache=shared_cache)
320
- ```
321
373
  """
322
374
  client: BatchResponses = BatchResponses(
323
375
  client=CONTAINER.resolve(OpenAI),
@@ -382,37 +434,6 @@ class OpenAIVecSeriesAccessor:
382
434
  **api_kwargs,
383
435
  )
384
436
 
385
- def embeddings(self, batch_size: int | None = None, show_progress: bool = False) -> pd.Series:
386
- """Compute OpenAI embeddings for every Series element.
387
-
388
- Example:
389
- ```python
390
- animals = pd.Series(["cat", "dog", "elephant"])
391
- # Basic usage
392
- animals.ai.embeddings()
393
-
394
- # With progress bar for large datasets
395
- large_texts = pd.Series(["text"] * 5000)
396
- embeddings = large_texts.ai.embeddings(
397
- batch_size=100,
398
- show_progress=True
399
- )
400
- ```
401
-
402
- Args:
403
- batch_size (int | None, optional): Number of inputs grouped into a
404
- single request. Defaults to ``None`` (automatic batch size optimization
405
- based on execution time). Set to a positive integer for fixed batch size.
406
- show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
407
-
408
- Returns:
409
- pandas.Series: Series whose values are ``np.ndarray`` objects
410
- (dtype ``float32``).
411
- """
412
- return self.embeddings_with_cache(
413
- cache=BatchingMapProxy(batch_size=batch_size, show_progress=show_progress),
414
- )
415
-
416
437
  def count_tokens(self) -> pd.Series:
417
438
  """Count `tiktoken` tokens per row.
418
439
 
@@ -467,38 +488,6 @@ class OpenAIVecDataFrameAccessor:
467
488
  def __init__(self, df_obj: pd.DataFrame):
468
489
  self._obj = df_obj
469
490
 
470
- def extract(self, column: str) -> pd.DataFrame:
471
- """Flatten one column of Pydantic models/dicts into top‑level columns.
472
-
473
- Example:
474
- ```python
475
- df = pd.DataFrame([
476
- {"animal": {"name": "cat", "legs": 4}},
477
- {"animal": {"name": "dog", "legs": 4}},
478
- {"animal": {"name": "elephant", "legs": 4}},
479
- ])
480
- df.ai.extract("animal")
481
- ```
482
- This method returns a DataFrame with the same index as the original,
483
- where each column corresponds to a key in the dictionaries.
484
- The source column is dropped.
485
-
486
- Args:
487
- column (str): Column to expand.
488
-
489
- Returns:
490
- pandas.DataFrame: Original DataFrame with the extracted columns; the source column is dropped.
491
- """
492
- if column not in self._obj.columns:
493
- raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
494
-
495
- return (
496
- self._obj.pipe(lambda df: df.reset_index(drop=True))
497
- .pipe(lambda df: df.join(df[column].ai.extract()))
498
- .pipe(lambda df: df.set_index(self._obj.index))
499
- .pipe(lambda df: df.drop(columns=[column], axis=1))
500
- )
501
-
502
491
  def responses_with_cache(
503
492
  self,
504
493
  instructions: str,
@@ -508,25 +497,12 @@ class OpenAIVecDataFrameAccessor:
508
497
  top_p: float = 1.0,
509
498
  **api_kwargs,
510
499
  ) -> pd.Series:
511
- """Generate a response for each row after serialising it to JSON using a provided cache.
500
+ """Generate a response for each row after serializing it to JSON using a provided cache.
512
501
 
513
502
  This method allows external control over caching behavior by accepting
514
503
  a pre-configured BatchingMapProxy instance, enabling cache sharing
515
504
  across multiple operations or custom batch size management.
516
505
 
517
- Args:
518
- instructions (str): System prompt for the assistant.
519
- cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
520
- instance for managing API call batching and deduplication.
521
- Set cache.batch_size=None to enable automatic batch size optimization.
522
- response_format (Type[ResponseFormat], optional): Desired Python type of the
523
- responses. Defaults to ``str``.
524
- temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
525
- top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
526
-
527
- Returns:
528
- pandas.Series: Responses aligned with the DataFrame's original index.
529
-
530
506
  Example:
531
507
  ```python
532
508
  from openaivec._proxy import BatchingMapProxy
@@ -544,6 +520,19 @@ class OpenAIVecDataFrameAccessor:
544
520
  cache=shared_cache
545
521
  )
546
522
  ```
523
+
524
+ Args:
525
+ instructions (str): System prompt for the assistant.
526
+ cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
527
+ instance for managing API call batching and deduplication.
528
+ Set cache.batch_size=None to enable automatic batch size optimization.
529
+ response_format (Type[ResponseFormat], optional): Desired Python type of the
530
+ responses. Defaults to ``str``.
531
+ temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
532
+ top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
533
+
534
+ Returns:
535
+ pandas.Series: Responses aligned with the DataFrame's original index.
547
536
  """
548
537
  return _df_rows_to_json_series(self._obj).ai.responses_with_cache(
549
538
  instructions=instructions,
@@ -564,7 +553,7 @@ class OpenAIVecDataFrameAccessor:
564
553
  show_progress: bool = False,
565
554
  **api_kwargs,
566
555
  ) -> pd.Series:
567
- """Generate a response for each row after serialising it to JSON.
556
+ """Generate a response for each row after serializing it to JSON.
568
557
 
569
558
  Example:
570
559
  ```python
@@ -592,7 +581,7 @@ class OpenAIVecDataFrameAccessor:
592
581
  batch_size (int | None, optional): Number of requests sent in one batch.
593
582
  Defaults to ``None`` (automatic batch size optimization
594
583
  based on execution time). Set to a positive integer for fixed batch size.
595
- temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
584
+ temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
596
585
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
597
586
  show_progress (bool, optional): Show progress bar in Jupyter notebooks. Defaults to ``False``.
598
587
 
@@ -608,6 +597,31 @@ class OpenAIVecDataFrameAccessor:
608
597
  **api_kwargs,
609
598
  )
610
599
 
600
+ def task_with_cache(
601
+ self,
602
+ task: PreparedTask[ResponseFormat],
603
+ cache: BatchingMapProxy[str, ResponseFormat],
604
+ **api_kwargs,
605
+ ) -> pd.Series:
606
+ """Execute a prepared task on each DataFrame row after serializing it to JSON using a provided cache.
607
+
608
+ Args:
609
+ task (PreparedTask): Prepared task (instructions + response_format + sampling params).
610
+ cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
611
+
612
+ Additional Keyword Args:
613
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
614
+ ``seed``) forwarded verbatim. Core routing keys are managed internally.
615
+
616
+ Returns:
617
+ pandas.Series: Task results aligned with the DataFrame's original index.
618
+ """
619
+ return _df_rows_to_json_series(self._obj).ai.task_with_cache(
620
+ task=task,
621
+ cache=cache,
622
+ **api_kwargs,
623
+ )
624
+
611
625
  def task(
612
626
  self,
613
627
  task: PreparedTask,
@@ -615,7 +629,7 @@ class OpenAIVecDataFrameAccessor:
615
629
  show_progress: bool = False,
616
630
  **api_kwargs,
617
631
  ) -> pd.Series:
618
- """Execute a prepared task on each DataFrame row after serialising it to JSON.
632
+ """Execute a prepared task on each DataFrame row after serializing it to JSON.
619
633
 
620
634
  Example:
621
635
  ```python
@@ -666,29 +680,36 @@ class OpenAIVecDataFrameAccessor:
666
680
  **api_kwargs,
667
681
  )
668
682
 
669
- def task_with_cache(
670
- self,
671
- task: PreparedTask[ResponseFormat],
672
- cache: BatchingMapProxy[str, ResponseFormat],
673
- **api_kwargs,
674
- ) -> pd.Series:
675
- """Execute a prepared task on each DataFrame row after serializing it to JSON using a provided cache.
683
+ def extract(self, column: str) -> pd.DataFrame:
684
+ """Flatten one column of Pydantic models/dicts into top‑level columns.
676
685
 
677
- Args:
678
- task (PreparedTask): Prepared task (instructions + response_format + sampling params).
679
- cache (BatchingMapProxy[str, ResponseFormat]): Pre‑configured cache instance.
686
+ Example:
687
+ ```python
688
+ df = pd.DataFrame([
689
+ {"animal": {"name": "cat", "legs": 4}},
690
+ {"animal": {"name": "dog", "legs": 4}},
691
+ {"animal": {"name": "elephant", "legs": 4}},
692
+ ])
693
+ df.ai.extract("animal")
694
+ ```
695
+ This method returns a DataFrame with the same index as the original,
696
+ where each column corresponds to a key in the dictionaries.
697
+ The source column is dropped.
680
698
 
681
- Additional Keyword Args:
682
- Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
683
- ``seed``) forwarded verbatim. Core routing keys are managed internally.
699
+ Args:
700
+ column (str): Column to expand.
684
701
 
685
702
  Returns:
686
- pandas.Series: Task results aligned with the DataFrame's original index.
703
+ pandas.DataFrame: Original DataFrame with the extracted columns; the source column is dropped.
687
704
  """
688
- return _df_rows_to_json_series(self._obj).ai.task_with_cache(
689
- task=task,
690
- cache=cache,
691
- **api_kwargs,
705
+ if column not in self._obj.columns:
706
+ raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
707
+
708
+ return (
709
+ self._obj.pipe(lambda df: df.reset_index(drop=True))
710
+ .pipe(lambda df: df.join(df[column].ai.extract()))
711
+ .pipe(lambda df: df.set_index(self._obj.index))
712
+ .pipe(lambda df: df.drop(columns=[column], axis=1))
692
713
  )
693
714
 
694
715
  def fillna(
@@ -776,15 +797,6 @@ class OpenAIVecDataFrameAccessor:
776
797
  two columns of the DataFrame. The vectors should be numpy arrays or
777
798
  array-like objects that support dot product operations.
778
799
 
779
- Args:
780
- col1 (str): Name of the first column containing embedding vectors.
781
- col2 (str): Name of the second column containing embedding vectors.
782
-
783
- Returns:
784
- pandas.Series: Series containing cosine similarity scores between
785
- corresponding vectors in col1 and col2, with values ranging
786
- from -1 to 1, where 1 indicates identical direction.
787
-
788
800
  Example:
789
801
  ```python
790
802
  df = pd.DataFrame({
@@ -793,6 +805,15 @@ class OpenAIVecDataFrameAccessor:
793
805
  })
794
806
  similarities = df.ai.similarity('vec1', 'vec2')
795
807
  ```
808
+
809
+ Args:
810
+ col1 (str): Name of the first column containing embedding vectors.
811
+ col2 (str): Name of the second column containing embedding vectors.
812
+
813
+ Returns:
814
+ pandas.Series: Series containing cosine similarity scores between
815
+ corresponding vectors in col1 and col2, with values ranging
816
+ from -1 to 1, where 1 indicates identical direction.
796
817
  """
797
818
  return self._obj.apply(
798
819
  lambda row: np.dot(row[col1], row[col2]) / (np.linalg.norm(row[col1]) * np.linalg.norm(row[col2])),
@@ -823,163 +844,47 @@ class AsyncOpenAIVecSeriesAccessor:
823
844
  across multiple operations or custom batch size management. The concurrency
824
845
  is controlled by the cache instance itself.
825
846
 
826
- Args:
827
- instructions (str): System prompt prepended to every user message.
828
- cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
829
- instance for managing API call batching and deduplication.
830
- Set cache.batch_size=None to enable automatic batch size optimization.
831
- response_format (Type[ResponseFormat], optional): Pydantic model or built‑in
832
- type the assistant should return. Defaults to ``str``.
833
- temperature (float | None, optional): Sampling temperature. ``None`` omits the
834
- parameter (recommended for reasoning models). Defaults to ``0.0``.
835
- top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
836
- **api_kwargs: Additional keyword arguments forwarded verbatim to
837
- ``AsyncOpenAI.responses.parse`` (e.g. ``max_output_tokens``, penalties,
838
- future parameters). Core batching keys (model, instructions, input,
839
- text_format) are protected and silently ignored if provided.
840
-
841
- Returns:
842
- pandas.Series: Series whose values are instances of ``response_format``.
843
-
844
847
  Example:
845
848
  ```python
846
849
  result = await series.aio.responses_with_cache(
847
850
  "classify",
848
- cache=shared,
849
- max_output_tokens=256,
850
- frequency_penalty=0.2,
851
- )
852
- ```
853
-
854
- Note:
855
- This is an asynchronous method and must be awaited.
856
- """
857
- client: AsyncBatchResponses = AsyncBatchResponses(
858
- client=CONTAINER.resolve(AsyncOpenAI),
859
- model_name=CONTAINER.resolve(ResponsesModelName).value,
860
- system_message=instructions,
861
- response_format=response_format,
862
- cache=cache,
863
- temperature=temperature,
864
- top_p=top_p,
865
- )
866
- results = await client.parse(self._obj.tolist(), **api_kwargs)
867
- return pd.Series(results, index=self._obj.index, name=self._obj.name)
868
-
869
- async def embeddings_with_cache(
870
- self,
871
- cache: AsyncBatchingMapProxy[str, np.ndarray],
872
- ) -> pd.Series:
873
- """Compute OpenAI embeddings for every Series element using a provided cache (asynchronously).
874
-
875
- This method allows external control over caching behavior by accepting
876
- a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
877
- across multiple operations or custom batch size management. The concurrency
878
- is controlled by the cache instance itself.
879
-
880
- Args:
881
- cache (AsyncBatchingMapProxy[str, np.ndarray]): Pre-configured cache
882
- instance for managing API call batching and deduplication.
883
- Set cache.batch_size=None to enable automatic batch size optimization.
884
-
885
- Returns:
886
- pandas.Series: Series whose values are ``np.ndarray`` objects
887
- (dtype ``float32``).
888
-
889
- Example:
890
- ```python
891
- from openaivec._proxy import AsyncBatchingMapProxy
892
- import numpy as np
893
-
894
- # Create a shared cache with custom batch size and concurrency
895
- shared_cache = AsyncBatchingMapProxy[str, np.ndarray](
896
- batch_size=64, max_concurrency=4
897
- )
898
-
899
- animals = pd.Series(["cat", "dog", "elephant"])
900
- # Must be awaited
901
- embeddings = await animals.aio.embeddings_with_cache(cache=shared_cache)
902
- ```
903
-
904
- Note:
905
- This is an asynchronous method and must be awaited.
906
- """
907
- client: AsyncBatchEmbeddings = AsyncBatchEmbeddings(
908
- client=CONTAINER.resolve(AsyncOpenAI),
909
- model_name=CONTAINER.resolve(EmbeddingsModelName).value,
910
- cache=cache,
911
- )
912
-
913
- # Await the async operation
914
- results = await client.create(self._obj.tolist())
915
-
916
- return pd.Series(
917
- results,
918
- index=self._obj.index,
919
- name=self._obj.name,
920
- )
921
-
922
- async def task_with_cache(
923
- self,
924
- task: PreparedTask[ResponseFormat],
925
- cache: AsyncBatchingMapProxy[str, ResponseFormat],
926
- **api_kwargs,
927
- ) -> pd.Series:
928
- """Execute a prepared task on every Series element using a provided cache (asynchronously).
929
-
930
- This method allows external control over caching behavior by accepting
931
- a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
932
- across multiple operations or custom batch size management. The concurrency
933
- is controlled by the cache instance itself.
851
+ cache=shared,
852
+ max_output_tokens=256,
853
+ frequency_penalty=0.2,
854
+ )
855
+ ```
934
856
 
935
857
  Args:
936
- task (PreparedTask): A pre-configured task containing instructions,
937
- response format, and other parameters for processing the inputs.
858
+ instructions (str): System prompt prepended to every user message.
938
859
  cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
939
860
  instance for managing API call batching and deduplication.
940
861
  Set cache.batch_size=None to enable automatic batch size optimization.
941
-
942
- Additional Keyword Args:
943
- Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
944
- ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
945
- keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
946
- library and cannot be overridden.
862
+ response_format (Type[ResponseFormat], optional): Pydantic model or built‑in
863
+ type the assistant should return. Defaults to ``str``.
864
+ temperature (float | None, optional): Sampling temperature. ``None`` omits the
865
+ parameter (recommended for reasoning models). Defaults to ``0.0``.
866
+ top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
867
+ **api_kwargs: Additional keyword arguments forwarded verbatim to
868
+ ``AsyncOpenAI.responses.parse`` (e.g. ``max_output_tokens``, penalties,
869
+ future parameters). Core batching keys (model, instructions, input,
870
+ text_format) are protected and silently ignored if provided.
947
871
 
948
872
  Returns:
949
- pandas.Series: Series whose values are instances of the task's
950
- response format, aligned with the original Series index.
951
-
952
- Example:
953
- ```python
954
- from openaivec._model import PreparedTask
955
- from openaivec._proxy import AsyncBatchingMapProxy
956
-
957
- # Create a shared cache with custom batch size and concurrency
958
- shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
959
-
960
- # Assume you have a prepared task for sentiment analysis
961
- sentiment_task = PreparedTask(...)
962
-
963
- reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
964
- # Must be awaited
965
- results = await reviews.aio.task_with_cache(sentiment_task, cache=shared_cache)
966
- ```
873
+ pandas.Series: Series whose values are instances of ``response_format``.
967
874
 
968
875
  Note:
969
876
  This is an asynchronous method and must be awaited.
970
877
  """
971
- client = AsyncBatchResponses(
878
+ client: AsyncBatchResponses = AsyncBatchResponses(
972
879
  client=CONTAINER.resolve(AsyncOpenAI),
973
880
  model_name=CONTAINER.resolve(ResponsesModelName).value,
974
- system_message=task.instructions,
975
- response_format=task.response_format,
881
+ system_message=instructions,
882
+ response_format=response_format,
976
883
  cache=cache,
977
- temperature=task.temperature,
978
- top_p=task.top_p,
884
+ temperature=temperature,
885
+ top_p=top_p,
979
886
  )
980
- # Await the async operation
981
887
  results = await client.parse(self._obj.tolist(), **api_kwargs)
982
-
983
888
  return pd.Series(results, index=self._obj.index, name=self._obj.name)
984
889
 
985
890
  async def responses(
@@ -1018,7 +923,7 @@ class AsyncOpenAIVecSeriesAccessor:
1018
923
  batch_size (int | None, optional): Number of prompts grouped into a single
1019
924
  request. Defaults to ``None`` (automatic batch size optimization
1020
925
  based on execution time). Set to a positive integer for fixed batch size.
1021
- temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
926
+ temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
1022
927
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
1023
928
  max_concurrency (int, optional): Maximum number of concurrent
1024
929
  requests. Defaults to ``8``.
@@ -1041,6 +946,59 @@ class AsyncOpenAIVecSeriesAccessor:
1041
946
  **api_kwargs,
1042
947
  )
1043
948
 
949
+ async def embeddings_with_cache(
950
+ self,
951
+ cache: AsyncBatchingMapProxy[str, np.ndarray],
952
+ ) -> pd.Series:
953
+ """Compute OpenAI embeddings for every Series element using a provided cache (asynchronously).
954
+
955
+ This method allows external control over caching behavior by accepting
956
+ a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
957
+ across multiple operations or custom batch size management. The concurrency
958
+ is controlled by the cache instance itself.
959
+
960
+ Example:
961
+ ```python
962
+ from openaivec._proxy import AsyncBatchingMapProxy
963
+ import numpy as np
964
+
965
+ # Create a shared cache with custom batch size and concurrency
966
+ shared_cache = AsyncBatchingMapProxy[str, np.ndarray](
967
+ batch_size=64, max_concurrency=4
968
+ )
969
+
970
+ animals = pd.Series(["cat", "dog", "elephant"])
971
+ # Must be awaited
972
+ embeddings = await animals.aio.embeddings_with_cache(cache=shared_cache)
973
+ ```
974
+
975
+ Args:
976
+ cache (AsyncBatchingMapProxy[str, np.ndarray]): Pre-configured cache
977
+ instance for managing API call batching and deduplication.
978
+ Set cache.batch_size=None to enable automatic batch size optimization.
979
+
980
+ Returns:
981
+ pandas.Series: Series whose values are ``np.ndarray`` objects
982
+ (dtype ``float32``).
983
+
984
+ Note:
985
+ This is an asynchronous method and must be awaited.
986
+ """
987
+ client: AsyncBatchEmbeddings = AsyncBatchEmbeddings(
988
+ client=CONTAINER.resolve(AsyncOpenAI),
989
+ model_name=CONTAINER.resolve(EmbeddingsModelName).value,
990
+ cache=cache,
991
+ )
992
+
993
+ # Await the async operation
994
+ results = await client.create(self._obj.tolist())
995
+
996
+ return pd.Series(
997
+ results,
998
+ index=self._obj.index,
999
+ name=self._obj.name,
1000
+ )
1001
+
1044
1002
  async def embeddings(
1045
1003
  self, batch_size: int | None = None, max_concurrency: int = 8, show_progress: bool = False
1046
1004
  ) -> pd.Series:
@@ -1082,6 +1040,69 @@ class AsyncOpenAIVecSeriesAccessor:
1082
1040
  ),
1083
1041
  )
1084
1042
 
1043
+ async def task_with_cache(
1044
+ self,
1045
+ task: PreparedTask[ResponseFormat],
1046
+ cache: AsyncBatchingMapProxy[str, ResponseFormat],
1047
+ **api_kwargs,
1048
+ ) -> pd.Series:
1049
+ """Execute a prepared task on every Series element using a provided cache (asynchronously).
1050
+
1051
+ This method allows external control over caching behavior by accepting
1052
+ a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
1053
+ across multiple operations or custom batch size management. The concurrency
1054
+ is controlled by the cache instance itself.
1055
+
1056
+ Args:
1057
+ task (PreparedTask): A pre-configured task containing instructions,
1058
+ response format, and other parameters for processing the inputs.
1059
+ cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
1060
+ instance for managing API call batching and deduplication.
1061
+ Set cache.batch_size=None to enable automatic batch size optimization.
1062
+
1063
+ Example:
1064
+ ```python
1065
+ from openaivec._model import PreparedTask
1066
+ from openaivec._proxy import AsyncBatchingMapProxy
1067
+
1068
+ # Create a shared cache with custom batch size and concurrency
1069
+ shared_cache = AsyncBatchingMapProxy(batch_size=64, max_concurrency=4)
1070
+
1071
+ # Assume you have a prepared task for sentiment analysis
1072
+ sentiment_task = PreparedTask(...)
1073
+
1074
+ reviews = pd.Series(["Great product!", "Not satisfied", "Amazing quality"])
1075
+ # Must be awaited
1076
+ results = await reviews.aio.task_with_cache(sentiment_task, cache=shared_cache)
1077
+ ```
1078
+
1079
+ Additional Keyword Args:
1080
+ Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
1081
+ ``seed``, etc.) are forwarded verbatim to the underlying client. Core batching / routing
1082
+ keys (``model``, ``instructions`` / system message, user ``input``) are managed by the
1083
+ library and cannot be overridden.
1084
+
1085
+ Returns:
1086
+ pandas.Series: Series whose values are instances of the task's
1087
+ response format, aligned with the original Series index.
1088
+
1089
+ Note:
1090
+ This is an asynchronous method and must be awaited.
1091
+ """
1092
+ client = AsyncBatchResponses(
1093
+ client=CONTAINER.resolve(AsyncOpenAI),
1094
+ model_name=CONTAINER.resolve(ResponsesModelName).value,
1095
+ system_message=task.instructions,
1096
+ response_format=task.response_format,
1097
+ cache=cache,
1098
+ temperature=task.temperature,
1099
+ top_p=task.top_p,
1100
+ )
1101
+ # Await the async operation
1102
+ results = await client.parse(self._obj.tolist(), **api_kwargs)
1103
+
1104
+ return pd.Series(results, index=self._obj.index, name=self._obj.name)
1105
+
1085
1106
  async def task(
1086
1107
  self,
1087
1108
  task: PreparedTask,
@@ -1161,26 +1182,13 @@ class AsyncOpenAIVecDataFrameAccessor:
1161
1182
  top_p: float = 1.0,
1162
1183
  **api_kwargs,
1163
1184
  ) -> pd.Series:
1164
- """Generate a response for each row after serialising it to JSON using a provided cache (asynchronously).
1185
+ """Generate a response for each row after serializing it to JSON using a provided cache (asynchronously).
1165
1186
 
1166
1187
  This method allows external control over caching behavior by accepting
1167
1188
  a pre-configured AsyncBatchingMapProxy instance, enabling cache sharing
1168
1189
  across multiple operations or custom batch size management. The concurrency
1169
1190
  is controlled by the cache instance itself.
1170
1191
 
1171
- Args:
1172
- instructions (str): System prompt for the assistant.
1173
- cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
1174
- instance for managing API call batching and deduplication.
1175
- Set cache.batch_size=None to enable automatic batch size optimization.
1176
- response_format (Type[ResponseFormat], optional): Desired Python type of the
1177
- responses. Defaults to ``str``.
1178
- temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
1179
- top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
1180
-
1181
- Returns:
1182
- pandas.Series: Responses aligned with the DataFrame's original index.
1183
-
1184
1192
  Example:
1185
1193
  ```python
1186
1194
  from openaivec._proxy import AsyncBatchingMapProxy
@@ -1200,6 +1208,19 @@ class AsyncOpenAIVecDataFrameAccessor:
1200
1208
  )
1201
1209
  ```
1202
1210
 
1211
+ Args:
1212
+ instructions (str): System prompt for the assistant.
1213
+ cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
1214
+ instance for managing API call batching and deduplication.
1215
+ Set cache.batch_size=None to enable automatic batch size optimization.
1216
+ response_format (Type[ResponseFormat], optional): Desired Python type of the
1217
+ responses. Defaults to ``str``.
1218
+ temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
1219
+ top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
1220
+
1221
+ Returns:
1222
+ pandas.Series: Responses aligned with the DataFrame's original index.
1223
+
1203
1224
  Note:
1204
1225
  This is an asynchronous method and must be awaited.
1205
1226
  """
@@ -1224,7 +1245,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1224
1245
  show_progress: bool = False,
1225
1246
  **api_kwargs,
1226
1247
  ) -> pd.Series:
1227
- """Generate a response for each row after serialising it to JSON (asynchronously).
1248
+ """Generate a response for each row after serializing it to JSON (asynchronously).
1228
1249
 
1229
1250
  Example:
1230
1251
  ```python
@@ -1253,7 +1274,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1253
1274
  batch_size (int | None, optional): Number of requests sent in one batch.
1254
1275
  Defaults to ``None`` (automatic batch size optimization
1255
1276
  based on execution time). Set to a positive integer for fixed batch size.
1256
- temperature (float, optional): Sampling temperature. Defaults to ``0.0``.
1277
+ temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
1257
1278
  top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
1258
1279
  max_concurrency (int, optional): Maximum number of concurrent
1259
1280
  requests. Defaults to ``8``.
@@ -1276,6 +1297,35 @@ class AsyncOpenAIVecDataFrameAccessor:
1276
1297
  **api_kwargs,
1277
1298
  )
1278
1299
 
1300
+ async def task_with_cache(
1301
+ self,
1302
+ task: PreparedTask[ResponseFormat],
1303
+ cache: AsyncBatchingMapProxy[str, ResponseFormat],
1304
+ **api_kwargs,
1305
+ ) -> pd.Series:
1306
+ """Execute a prepared task on each DataFrame row using a provided cache (asynchronously).
1307
+
1308
+ After serializing each row to JSON, this method executes the prepared task.
1309
+
1310
+ Args:
1311
+ task (PreparedTask): Prepared task (instructions + response_format + sampling params).
1312
+ cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre‑configured async cache instance.
1313
+
1314
+ Additional Keyword Args:
1315
+ Arbitrary OpenAI Responses API parameters forwarded verbatim. Core routing keys are protected.
1316
+
1317
+ Returns:
1318
+ pandas.Series: Task results aligned with the DataFrame's original index.
1319
+
1320
+ Note:
1321
+ This is an asynchronous method and must be awaited.
1322
+ """
1323
+ return await _df_rows_to_json_series(self._obj).aio.task_with_cache(
1324
+ task=task,
1325
+ cache=cache,
1326
+ **api_kwargs,
1327
+ )
1328
+
1279
1329
  async def task(
1280
1330
  self,
1281
1331
  task: PreparedTask,
@@ -1284,7 +1334,7 @@ class AsyncOpenAIVecDataFrameAccessor:
1284
1334
  show_progress: bool = False,
1285
1335
  **api_kwargs,
1286
1336
  ) -> pd.Series:
1287
- """Execute a prepared task on each DataFrame row after serialising it to JSON (asynchronously).
1337
+ """Execute a prepared task on each DataFrame row after serializing it to JSON (asynchronously).
1288
1338
 
1289
1339
  Example:
1290
1340
  ```python
@@ -1343,40 +1393,24 @@ class AsyncOpenAIVecDataFrameAccessor:
1343
1393
  **api_kwargs,
1344
1394
  )
1345
1395
 
1346
- async def task_with_cache(
1347
- self,
1348
- task: PreparedTask[ResponseFormat],
1349
- cache: AsyncBatchingMapProxy[str, ResponseFormat],
1350
- **api_kwargs,
1351
- ) -> pd.Series:
1352
- """Execute a prepared task on each DataFrame row after serializing it to JSON using a provided cache (async).
1353
-
1354
- Args:
1355
- task (PreparedTask): Prepared task (instructions + response_format + sampling params).
1356
- cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre‑configured async cache instance.
1357
-
1358
- Additional Keyword Args:
1359
- Arbitrary OpenAI Responses API parameters forwarded verbatim. Core routing keys are protected.
1360
-
1361
- Returns:
1362
- pandas.Series: Task results aligned with the DataFrame's original index.
1363
-
1364
- Note:
1365
- This is an asynchronous method and must be awaited.
1366
- """
1367
- return await _df_rows_to_json_series(self._obj).aio.task_with_cache(
1368
- task=task,
1369
- cache=cache,
1370
- **api_kwargs,
1371
- )
1372
-
1373
1396
  async def pipe(self, func: Callable[[pd.DataFrame], Awaitable[T] | T]) -> T:
1374
- """
1375
- Apply a function to the DataFrame, supporting both synchronous and asynchronous functions.
1397
+ """Apply a function to the DataFrame, supporting both synchronous and asynchronous functions.
1376
1398
 
1377
1399
  This method allows chaining operations on the DataFrame, similar to pandas' `pipe` method,
1378
1400
  but with support for asynchronous functions.
1379
1401
 
1402
+ Example:
1403
+ ```python
1404
+ async def process_data(df):
1405
+ # Simulate an asynchronous computation
1406
+ await asyncio.sleep(1)
1407
+ return df.dropna()
1408
+
1409
+ df = pd.DataFrame({"col": [1, 2, None, 4]})
1410
+ # Must be awaited
1411
+ result = await df.aio.pipe(process_data)
1412
+ ```
1413
+
1380
1414
  Args:
1381
1415
  func (Callable[[pd.DataFrame], Awaitable[T] | T]): A function that takes a DataFrame
1382
1416
  as input and returns either a result or an awaitable result.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.3
3
+ Version: 0.14.4
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -6,12 +6,12 @@ openaivec/_model.py,sha256=xg3s9Ljqb2xK1t_a5bwWxGJfFSIuaNrFGMgQq4nQKrM,3351
6
6
  openaivec/_optimize.py,sha256=-mKjD5YV_d1Z2nqfGfAcmx6mTKn6AODjFTrIKJPbAXQ,3851
7
7
  openaivec/_prompt.py,sha256=KoJbFK4gTEDRtu9OMweJq_jQLkSPFy2Kcvao30qKhAQ,20844
8
8
  openaivec/_provider.py,sha256=dNr9Y2C97GK-pkY81odurKoDup59dLK31V3EGT2HOwE,6711
9
- openaivec/_proxy.py,sha256=giOxRlCCO11XQ0gNVf2IksjZZj9RwvTHkHbmbQXadEk,28916
9
+ openaivec/_proxy.py,sha256=J0qGDcZqSab26ScA8OXxzornfwuXtrVycqup-JPq464,29719
10
10
  openaivec/_responses.py,sha256=xtkiOn01RkauHq2FAKRAcjPglH8rmbaSz0-VE0ClTe8,24026
11
- openaivec/_schema.py,sha256=Q1UgCxldjeQ3YNNRF9memq5CsjKIysrD6xAN5wBHacc,17939
11
+ openaivec/_schema.py,sha256=9enwqE2idLLUKbQxjiNn09uhdKz14kihEwUXglRqxx0,20543
12
12
  openaivec/_serialize.py,sha256=NLCKl4opc1WS24_duwpI2UGBepQ8SBh4YRxBlLwzDLw,8403
13
13
  openaivec/_util.py,sha256=dFWwjouJyvF-tqNPs2933OAt5Fw9I2Q2BvmGIfGH5k4,6423
14
- openaivec/pandas_ext.py,sha256=n_D2zvwNTrW2FITKm4w1Gz0dJaLQFg8QqGD5GQ1mGQs,61750
14
+ openaivec/pandas_ext.py,sha256=m4H6mrE__Jmr5R6hl6d8yc2JhVT0-wdf5GOKWIITeLU,63366
15
15
  openaivec/spark.py,sha256=lI-noacLvuxu6gBztKdcYd9vfK3eNI3aCGwJylkzv7E,25367
16
16
  openaivec/task/__init__.py,sha256=lrgoc9UIox7XnxZ96dQRl88a-8QfuZRFBHshxctpMB8,6178
17
17
  openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
@@ -30,7 +30,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=BNwWtNT-MNA76eIJbb31641upukmRwM9
30
30
  openaivec/task/nlp/translation.py,sha256=XTZM11JFjbgTK9wHnxFgVDabXZ5bqbabXK_bq2nEkyQ,6627
31
31
  openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
32
32
  openaivec/task/table/fillna.py,sha256=ZVcOpuh7ULVhrt1VsWy5fPhk53XNaiD7kXGCPhh83M8,6636
33
- openaivec-0.14.3.dist-info/METADATA,sha256=0lDWogR9ysS8ysInwOahPSNZwgxQJ9Y8N6sbzVRU5KQ,27566
34
- openaivec-0.14.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
- openaivec-0.14.3.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
36
- openaivec-0.14.3.dist-info/RECORD,,
33
+ openaivec-0.14.4.dist-info/METADATA,sha256=RF6rZDL5B4qYCqXIbC0jexv-IzHv48WBDV-MZtNHcvY,27566
34
+ openaivec-0.14.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
+ openaivec-0.14.4.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
36
+ openaivec-0.14.4.dist-info/RECORD,,