openaivec 0.14.4__py3-none-any.whl → 0.14.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/_provider.py CHANGED
@@ -13,6 +13,7 @@ from openaivec._model import (
13
13
  OpenAIAPIKey,
14
14
  ResponsesModelName,
15
15
  )
16
+ from openaivec._schema import SchemaInferer
16
17
  from openaivec._util import TextChunker
17
18
 
18
19
  __all__ = []
@@ -142,6 +143,13 @@ CONTAINER.register(OpenAI, provide_openai_client)
142
143
  CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
143
144
  CONTAINER.register(tiktoken.Encoding, lambda: tiktoken.get_encoding("o200k_base"))
144
145
  CONTAINER.register(TextChunker, lambda: TextChunker(CONTAINER.resolve(tiktoken.Encoding)))
146
+ CONTAINER.register(
147
+ SchemaInferer,
148
+ lambda SchemaInferer: SchemaInferer(
149
+ client=CONTAINER.resolve(OpenAI),
150
+ model_name=CONTAINER.resolve(ResponsesModelName).value,
151
+ ),
152
+ )
145
153
 
146
154
 
147
155
  def reset_environment_registrations():
@@ -160,3 +168,10 @@ def reset_environment_registrations():
160
168
  )
161
169
  CONTAINER.register(OpenAI, provide_openai_client)
162
170
  CONTAINER.register(AsyncOpenAI, provide_async_openai_client)
171
+ CONTAINER.register(
172
+ SchemaInferer,
173
+ lambda: SchemaInferer(
174
+ client=CONTAINER.resolve(OpenAI),
175
+ model_name=CONTAINER.resolve(ResponsesModelName).value,
176
+ ),
177
+ )
openaivec/pandas_ext.py CHANGED
@@ -49,6 +49,8 @@ import pandas as pd
49
49
  import tiktoken
50
50
  from openai import AsyncOpenAI, OpenAI
51
51
 
52
+ from openaivec._schema import InferredSchema, SchemaInferenceInput, SchemaInferer
53
+
52
54
  __all__ = [
53
55
  "embeddings_model",
54
56
  "responses_model",
@@ -434,6 +436,61 @@ class OpenAIVecSeriesAccessor:
434
436
  **api_kwargs,
435
437
  )
436
438
 
439
+ def infer_schema(self, purpose: str, max_examples: int = 100) -> InferredSchema:
440
+ """Infer a structured data schema from Series content using AI.
441
+
442
+ This method analyzes a sample of the Series values to automatically infer
443
+ a structured schema that can be used for consistent data extraction.
444
+ The inferred schema includes field names, types, descriptions, and
445
+ potential enum values based on patterns found in the data.
446
+
447
+ Args:
448
+ purpose (str): Plain language description of how the extracted
449
+ structured data will be used (e.g., "Extract customer sentiment
450
+ signals for analytics", "Parse product features for search").
451
+ This guides field relevance and helps exclude irrelevant information.
452
+ max_examples (int): Maximum number of examples to analyze from the
453
+ Series. The method will sample randomly from the Series up to this
454
+ limit. Defaults to 100.
455
+
456
+ Returns:
457
+ InferredSchema: An object containing:
458
+ - purpose: Normalized statement of the extraction objective
459
+ - fields: List of field specifications with names, types, and descriptions
460
+ - inference_prompt: Reusable prompt for future extractions
461
+ - model: Dynamically generated Pydantic model for parsing
462
+ - task: PreparedTask for batch extraction operations
463
+
464
+ Example:
465
+ ```python
466
+ reviews = pd.Series([
467
+ "Great product! Fast shipping and excellent quality.",
468
+ "Terrible experience. Item broke after 2 days.",
469
+ "Average product. Price is fair but nothing special."
470
+ ])
471
+
472
+ # Infer schema for sentiment analysis
473
+ schema = reviews.ai.infer_schema(
474
+ purpose="Extract sentiment and product quality indicators"
475
+ )
476
+
477
+ # Use the inferred schema for batch extraction
478
+ extracted = reviews.ai.task(schema.task)
479
+ ```
480
+
481
+ Note:
482
+ The schema inference uses AI to analyze patterns in the data and may
483
+ require multiple attempts to produce a valid schema. Fields are limited
484
+ to primitive types (string, integer, float, boolean) with optional
485
+ enum values for categorical fields.
486
+ """
487
+ inferer = CONTAINER.resolve(SchemaInferer)
488
+
489
+ input: SchemaInferenceInput = SchemaInferenceInput(
490
+ examples=self._obj.sample(n=min(max_examples, len(self._obj))).tolist(), purpose=purpose
491
+ )
492
+ return inferer.infer_schema(input)
493
+
437
494
  def count_tokens(self) -> pd.Series:
438
495
  """Count `tiktoken` tokens per row.
439
496
 
@@ -480,6 +537,90 @@ class OpenAIVecSeriesAccessor:
480
537
  extracted.columns = [f"{self._obj.name}_{col}" for col in extracted.columns]
481
538
  return extracted
482
539
 
540
+ def auto_extract(
541
+ self,
542
+ purpose: str,
543
+ max_examples: int = 100,
544
+ batch_size: int | None = None,
545
+ show_progress: bool = False,
546
+ **api_kwargs,
547
+ ) -> pd.DataFrame:
548
+ """Automatically infer schema and extract structured data in one step.
549
+
550
+ This convenience method combines schema inference and data extraction into
551
+ a single operation. It first analyzes a sample of the Series to infer an
552
+ appropriate schema based on the stated purpose, then immediately applies
553
+ that schema to extract structured data from all values in the Series.
554
+
555
+ Args:
556
+ purpose (str): Plain language description of what information to extract
557
+ and how it will be used (e.g., "Extract product features for search",
558
+ "Parse customer feedback for sentiment analysis"). This guides both
559
+ schema inference and field selection.
560
+ max_examples (int): Maximum number of examples to use for schema inference.
561
+ A larger sample may produce more accurate schemas but increases
562
+ inference time. Defaults to 100.
563
+ batch_size (int | None): Number of requests to process in parallel during
564
+ extraction. Defaults to None (automatic optimization). Set to a specific
565
+ value to control API usage and performance.
566
+ show_progress (bool): Whether to display a progress bar during extraction.
567
+ Useful for large datasets. Defaults to False.
568
+ **api_kwargs: Additional OpenAI API parameters (e.g., `temperature`, `top_p`,
569
+ `frequency_penalty`, `presence_penalty`, `seed`) forwarded to the task execution.
570
+
571
+ Returns:
572
+ pd.DataFrame: A DataFrame with extracted structured data. Each inferred
573
+ field becomes a column, with the same index as the original Series.
574
+ Column names and types are determined by the inferred schema.
575
+
576
+ Example:
577
+ ```python
578
+ # Extract structured data from product reviews
579
+ reviews = pd.Series([
580
+ "Great laptop! 16GB RAM, fast SSD, battery lasts 10 hours",
581
+ "Decent phone. 128GB storage, camera is okay, screen is bright",
582
+ "Gaming desktop with RTX 4090, 32GB RAM, runs everything smoothly"
583
+ ])
584
+
585
+ # One-step extraction
586
+ extracted = reviews.ai.auto_extract(
587
+ purpose="Extract product specifications and performance metrics",
588
+ show_progress=True
589
+ )
590
+ # Result: DataFrame with columns like 'ram', 'storage', 'battery_life', etc.
591
+
592
+ # Extract sentiment and issues from support tickets
593
+ tickets = pd.Series([
594
+ "Account locked, can't reset password, very frustrated",
595
+ "Billing error, charged twice for subscription",
596
+ "Great support! Issue resolved quickly"
597
+ ])
598
+
599
+ features = tickets.ai.auto_extract(
600
+ purpose="Extract issue type and customer sentiment for support analytics"
601
+ )
602
+ ```
603
+
604
+ Note:
605
+ This method is ideal for exploratory data analysis when you don't have
606
+ a predefined schema. For production use cases with stable schemas,
607
+ consider using `infer_schema()` once and reusing the schema with `task()`.
608
+ The inferred schema is not returned, so if you need to inspect or save it,
609
+ use `infer_schema()` and `task()` separately.
610
+ """
611
+ schema = self._obj.ai.infer_schema(purpose=purpose, max_examples=max_examples)
612
+
613
+ return pd.DataFrame(
614
+ {
615
+ "inferred": self._obj.ai.task(
616
+ task=schema.task,
617
+ batch_size=batch_size,
618
+ show_progress=show_progress,
619
+ **api_kwargs,
620
+ ),
621
+ }
622
+ ).ai.extract("inferred")
623
+
483
624
 
484
625
  @pd.api.extensions.register_dataframe_accessor("ai")
485
626
  class OpenAIVecDataFrameAccessor:
@@ -680,6 +821,62 @@ class OpenAIVecDataFrameAccessor:
680
821
  **api_kwargs,
681
822
  )
682
823
 
824
+ def infer_schema(self, purpose: str, max_examples: int = 100) -> InferredSchema:
825
+ """Infer a structured data schema from DataFrame rows using AI.
826
+
827
+ This method analyzes a sample of DataFrame rows to automatically infer
828
+ a structured schema that can be used for consistent data extraction.
829
+ Each row is converted to JSON format and analyzed to identify patterns,
830
+ field types, and potential categorical values.
831
+
832
+ Args:
833
+ purpose (str): Plain language description of how the extracted
834
+ structured data will be used (e.g., "Extract operational metrics
835
+ for dashboard", "Parse customer attributes for segmentation").
836
+ This guides field relevance and helps exclude irrelevant information.
837
+ max_examples (int): Maximum number of rows to analyze from the
838
+ DataFrame. The method will sample randomly up to this limit.
839
+ Defaults to 100.
840
+
841
+ Returns:
842
+ InferredSchema: An object containing:
843
+ - purpose: Normalized statement of the extraction objective
844
+ - fields: List of field specifications with names, types, and descriptions
845
+ - inference_prompt: Reusable prompt for future extractions
846
+ - model: Dynamically generated Pydantic model for parsing
847
+ - task: PreparedTask for batch extraction operations
848
+
849
+ Example:
850
+ ```python
851
+ df = pd.DataFrame({
852
+ 'text': [
853
+ "Order #123: Shipped to NYC, arriving Tuesday",
854
+ "Order #456: Delayed due to weather, new ETA Friday",
855
+ "Order #789: Delivered to customer in LA"
856
+ ],
857
+ 'timestamp': ['2024-01-01', '2024-01-02', '2024-01-03']
858
+ })
859
+
860
+ # Infer schema for logistics tracking
861
+ schema = df.ai.infer_schema(
862
+ purpose="Extract shipping status and location data for logistics tracking"
863
+ )
864
+
865
+ # Apply the schema to extract structured data
866
+ extracted_df = df.ai.task(schema.task)
867
+ ```
868
+
869
+ Note:
870
+ The DataFrame rows are internally converted to JSON format before
871
+ analysis. The inferred schema is flat (no nested structures) and
872
+ uses only primitive types to ensure compatibility with pandas and
873
+ Spark operations.
874
+ """
875
+ return _df_rows_to_json_series(self._obj).ai.infer_schema(
876
+ purpose=purpose,
877
+ max_examples=max_examples,
878
+ )
879
+
683
880
  def extract(self, column: str) -> pd.DataFrame:
684
881
  """Flatten one column of Pydantic models/dicts into top‑level columns.
685
882
 
@@ -790,6 +987,100 @@ class OpenAIVecDataFrameAccessor:
790
987
 
791
988
  return df
792
989
 
990
+ def auto_extract(
991
+ self,
992
+ purpose: str,
993
+ max_examples: int = 100,
994
+ batch_size: int | None = None,
995
+ show_progress: bool = False,
996
+ **api_kwargs,
997
+ ) -> pd.DataFrame:
998
+ """Automatically infer schema and add extracted fields to the DataFrame.
999
+
1000
+ This convenience method combines schema inference and data extraction to
1001
+ automatically add new columns to the existing DataFrame. It analyzes a
1002
+ sample of the DataFrame rows to infer an appropriate schema based on the
1003
+ stated purpose, then extracts structured data and joins it with the
1004
+ original DataFrame.
1005
+
1006
+ Args:
1007
+ purpose (str): Plain language description of what information to extract
1008
+ and how it will be used (e.g., "Extract customer sentiment metrics",
1009
+ "Parse product attributes for analytics"). This guides both schema
1010
+ inference and field selection.
1011
+ max_examples (int): Maximum number of rows to use for schema inference.
1012
+ A larger sample may produce more accurate schemas but increases
1013
+ inference time. Defaults to 100.
1014
+ batch_size (int | None): Number of requests to process in parallel during
1015
+ extraction. Defaults to None (automatic optimization). Set to a specific
1016
+ value to control API usage and performance.
1017
+ show_progress (bool): Whether to display a progress bar during extraction.
1018
+ Useful for large datasets. Defaults to False.
1019
+ **api_kwargs: Additional OpenAI API parameters (e.g., `temperature`, `top_p`,
1020
+ `frequency_penalty`, `presence_penalty`, `seed`) forwarded to the task execution.
1021
+
1022
+ Returns:
1023
+ pd.DataFrame: The original DataFrame with new columns added from the
1024
+ inferred structured data. Each inferred field becomes a new column.
1025
+ The original columns and index are preserved.
1026
+
1027
+ Example:
1028
+ ```python
1029
+ # Add sentiment and issue type to support tickets
1030
+ df = pd.DataFrame({
1031
+ 'ticket_id': [1, 2, 3],
1032
+ 'description': [
1033
+ "Can't login, password reset not working",
1034
+ "Billing error, charged twice last month",
1035
+ "Great service, issue resolved quickly!"
1036
+ ],
1037
+ 'date': ['2024-01-01', '2024-01-02', '2024-01-03']
1038
+ })
1039
+
1040
+ # Add inferred fields to existing DataFrame
1041
+ enriched_df = df.ai.auto_extract(
1042
+ purpose="Extract issue type and sentiment for support dashboard",
1043
+ show_progress=True
1044
+ )
1045
+ # Result: Original df with new columns like 'issue_type', 'sentiment', etc.
1046
+
1047
+ # Add product specifications to inventory data
1048
+ inventory = pd.DataFrame({
1049
+ 'sku': ['A001', 'B002', 'C003'],
1050
+ 'description': [
1051
+ "Laptop 16GB RAM, 512GB SSD, Intel i7",
1052
+ "Phone 128GB, 5G, dual camera",
1053
+ "Tablet 10-inch, WiFi only, 64GB"
1054
+ ]
1055
+ })
1056
+
1057
+ enriched_inventory = inventory.ai.auto_extract(
1058
+ purpose="Extract technical specifications for inventory system"
1059
+ )
1060
+ ```
1061
+
1062
+ Note:
1063
+ This method is ideal for enriching existing DataFrames with additional
1064
+ structured fields extracted from text columns. The schema is inferred
1065
+ from the entire DataFrame content (converted to JSON format). For
1066
+ production use cases with stable schemas, consider using `infer_schema()`
1067
+ once and reusing the schema with `task()`.
1068
+ """
1069
+ # Infer schema from DataFrame rows
1070
+ schema = self._obj.ai.infer_schema(purpose=purpose, max_examples=max_examples)
1071
+
1072
+ # Extract structured data using the inferred schema
1073
+ inferred_series = self._obj.ai.task(
1074
+ task=schema.task,
1075
+ batch_size=batch_size,
1076
+ show_progress=show_progress,
1077
+ **api_kwargs,
1078
+ )
1079
+
1080
+ return self._obj.assign(
1081
+ inferred=inferred_series,
1082
+ ).ai.extract("inferred")
1083
+
793
1084
  def similarity(self, col1: str, col2: str) -> pd.Series:
794
1085
  """Compute cosine similarity between two columns containing embedding vectors.
795
1086
 
@@ -1165,6 +1456,96 @@ class AsyncOpenAIVecSeriesAccessor:
1165
1456
  **api_kwargs,
1166
1457
  )
1167
1458
 
1459
+ async def auto_extract(
1460
+ self,
1461
+ purpose: str,
1462
+ max_examples: int = 100,
1463
+ batch_size: int | None = None,
1464
+ max_concurrency: int = 8,
1465
+ show_progress: bool = False,
1466
+ **api_kwargs,
1467
+ ) -> pd.DataFrame:
1468
+ """Automatically infer schema and extract structured data in one step (asynchronously).
1469
+
1470
+ This convenience method combines schema inference and data extraction into
1471
+ a single operation. It first analyzes a sample of the Series to infer an
1472
+ appropriate schema based on the stated purpose, then immediately applies
1473
+ that schema to extract structured data from all values in the Series.
1474
+
1475
+ Args:
1476
+ purpose (str): Plain language description of what information to extract
1477
+ and how it will be used (e.g., "Extract product features for search",
1478
+ "Parse customer feedback for sentiment analysis"). This guides both
1479
+ schema inference and field selection.
1480
+ max_examples (int): Maximum number of examples to use for schema inference.
1481
+ A larger sample may produce more accurate schemas but increases
1482
+ inference time. Defaults to 100.
1483
+ batch_size (int | None): Number of requests to process in parallel during
1484
+ extraction. Defaults to None (automatic optimization). Set to a specific
1485
+ value to control API usage and performance.
1486
+ max_concurrency (int): Maximum number of concurrent requests during
1487
+ extraction. Defaults to 8.
1488
+ show_progress (bool): Whether to display a progress bar during extraction.
1489
+ Useful for large datasets. Defaults to False.
1490
+ **api_kwargs: Additional OpenAI API parameters (e.g., `temperature`, `top_p`,
1491
+ `frequency_penalty`, `presence_penalty`, `seed`) forwarded to the task execution.
1492
+
1493
+ Returns:
1494
+ pd.DataFrame: A DataFrame with extracted structured data. Each inferred
1495
+ field becomes a column, with the same index as the original Series.
1496
+ Column names and types are determined by the inferred schema.
1497
+
1498
+ Example:
1499
+ ```python
1500
+ # Extract structured data from product reviews
1501
+ reviews = pd.Series([
1502
+ "Great laptop! 16GB RAM, fast SSD, battery lasts 10 hours",
1503
+ "Decent phone. 128GB storage, camera is okay, screen is bright",
1504
+ "Gaming desktop with RTX 4090, 32GB RAM, runs everything smoothly"
1505
+ ])
1506
+
1507
+ # One-step extraction (must be awaited)
1508
+ extracted = await reviews.aio.auto_extract(
1509
+ purpose="Extract product specifications and performance metrics",
1510
+ max_concurrency=4,
1511
+ show_progress=True
1512
+ )
1513
+ # Result: DataFrame with columns like 'ram', 'storage', 'battery_life', etc.
1514
+
1515
+ # Extract sentiment and issues from support tickets
1516
+ tickets = pd.Series([
1517
+ "Account locked, can't reset password, very frustrated",
1518
+ "Billing error, charged twice for subscription",
1519
+ "Great support! Issue resolved quickly"
1520
+ ])
1521
+
1522
+ features = await tickets.aio.auto_extract(
1523
+ purpose="Extract issue type and customer sentiment for support analytics",
1524
+ batch_size=32
1525
+ )
1526
+ ```
1527
+
1528
+ Note:
1529
+ This is an asynchronous method and must be awaited. This method is ideal
1530
+ for exploratory data analysis when you don't have a predefined schema.
1531
+ For production use cases with stable schemas, consider using the synchronous
1532
+ `infer_schema()` once and reusing the schema with `task()`. The inferred
1533
+ schema is not returned, so if you need to inspect or save it, use
1534
+ `infer_schema()` and `task()` separately.
1535
+ """
1536
+ # Use synchronous infer_schema since it's not async
1537
+ schema = self._obj.ai.infer_schema(purpose=purpose, max_examples=max_examples)
1538
+
1539
+ inferred_series = await self._obj.aio.task(
1540
+ task=schema.task,
1541
+ batch_size=batch_size,
1542
+ max_concurrency=max_concurrency,
1543
+ show_progress=show_progress,
1544
+ **api_kwargs,
1545
+ )
1546
+
1547
+ return pd.DataFrame({"inferred": inferred_series}).ai.extract("inferred")
1548
+
1168
1549
 
1169
1550
  @pd.api.extensions.register_dataframe_accessor("aio")
1170
1551
  class AsyncOpenAIVecDataFrameAccessor:
@@ -1572,3 +1953,103 @@ class AsyncOpenAIVecDataFrameAccessor:
1572
1953
  df.at[actual_index, target_column_name] = result.output
1573
1954
 
1574
1955
  return df
1956
+
1957
+ async def auto_extract(
1958
+ self,
1959
+ purpose: str,
1960
+ max_examples: int = 100,
1961
+ batch_size: int | None = None,
1962
+ max_concurrency: int = 8,
1963
+ show_progress: bool = False,
1964
+ **api_kwargs,
1965
+ ) -> pd.DataFrame:
1966
+ """Automatically infer schema and add extracted fields to the DataFrame (asynchronously).
1967
+
1968
+ This convenience method combines schema inference and data extraction to
1969
+ automatically add new columns to the existing DataFrame. It analyzes a
1970
+ sample of the DataFrame rows to infer an appropriate schema based on the
1971
+ stated purpose, then extracts structured data and joins it with the
1972
+ original DataFrame.
1973
+
1974
+ Args:
1975
+ purpose (str): Plain language description of what information to extract
1976
+ and how it will be used (e.g., "Extract customer sentiment metrics",
1977
+ "Parse product attributes for analytics"). This guides both schema
1978
+ inference and field selection.
1979
+ max_examples (int): Maximum number of rows to use for schema inference.
1980
+ A larger sample may produce more accurate schemas but increases
1981
+ inference time. Defaults to 100.
1982
+ batch_size (int | None): Number of requests to process in parallel during
1983
+ extraction. Defaults to None (automatic optimization). Set to a specific
1984
+ value to control API usage and performance.
1985
+ max_concurrency (int): Maximum number of concurrent requests during
1986
+ extraction. Defaults to 8.
1987
+ show_progress (bool): Whether to display a progress bar during extraction.
1988
+ Useful for large datasets. Defaults to False.
1989
+ **api_kwargs: Additional OpenAI API parameters (e.g., `temperature`, `top_p`,
1990
+ `frequency_penalty`, `presence_penalty`, `seed`) forwarded to the task execution.
1991
+
1992
+ Returns:
1993
+ pd.DataFrame: The original DataFrame with new columns added from the
1994
+ inferred structured data. Each inferred field becomes a new column.
1995
+ The original columns and index are preserved.
1996
+
1997
+ Example:
1998
+ ```python
1999
+ # Add sentiment and issue type to support tickets
2000
+ df = pd.DataFrame({
2001
+ 'ticket_id': [1, 2, 3],
2002
+ 'description': [
2003
+ "Can't login, password reset not working",
2004
+ "Billing error, charged twice last month",
2005
+ "Great service, issue resolved quickly!"
2006
+ ],
2007
+ 'date': ['2024-01-01', '2024-01-02', '2024-01-03']
2008
+ })
2009
+
2010
+ # Add inferred fields to existing DataFrame (must be awaited)
2011
+ enriched_df = await df.aio.auto_extract(
2012
+ purpose="Extract issue type and sentiment for support dashboard",
2013
+ max_concurrency=4,
2014
+ show_progress=True
2015
+ )
2016
+ # Result: Original df with new columns like 'issue_type', 'sentiment', etc.
2017
+
2018
+ # Add product specifications to inventory data
2019
+ inventory = pd.DataFrame({
2020
+ 'sku': ['A001', 'B002', 'C003'],
2021
+ 'description': [
2022
+ "Laptop 16GB RAM, 512GB SSD, Intel i7",
2023
+ "Phone 128GB, 5G, dual camera",
2024
+ "Tablet 10-inch, WiFi only, 64GB"
2025
+ ]
2026
+ })
2027
+
2028
+ enriched_inventory = await inventory.aio.auto_extract(
2029
+ purpose="Extract technical specifications for inventory system",
2030
+ batch_size=32
2031
+ )
2032
+ ```
2033
+
2034
+ Note:
2035
+ This is an asynchronous method and must be awaited. This method is ideal
2036
+ for enriching existing DataFrames with additional structured fields
2037
+ extracted from text columns. The schema is inferred synchronously from
2038
+ the DataFrame content. For production use cases with stable schemas,
2039
+ consider using `infer_schema()` once and reusing the schema with `task()`.
2040
+ """
2041
+ # Infer schema from DataFrame rows (synchronous)
2042
+ schema = self._obj.ai.infer_schema(purpose=purpose, max_examples=max_examples)
2043
+
2044
+ # Extract structured data using the inferred schema (asynchronous)
2045
+ inferred_series = await self._obj.aio.task(
2046
+ task=schema.task,
2047
+ batch_size=batch_size,
2048
+ max_concurrency=max_concurrency,
2049
+ show_progress=show_progress,
2050
+ **api_kwargs,
2051
+ )
2052
+
2053
+ return self._obj.assign(
2054
+ inferred=inferred_series,
2055
+ ).ai.extract("inferred")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.4
3
+ Version: 0.14.5
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -5,13 +5,13 @@ openaivec/_log.py,sha256=1qhc9CF4D4bwiF_VWHilcYBPcTqIKyI0zuNEfn0MLNA,1430
5
5
  openaivec/_model.py,sha256=xg3s9Ljqb2xK1t_a5bwWxGJfFSIuaNrFGMgQq4nQKrM,3351
6
6
  openaivec/_optimize.py,sha256=-mKjD5YV_d1Z2nqfGfAcmx6mTKn6AODjFTrIKJPbAXQ,3851
7
7
  openaivec/_prompt.py,sha256=KoJbFK4gTEDRtu9OMweJq_jQLkSPFy2Kcvao30qKhAQ,20844
8
- openaivec/_provider.py,sha256=dNr9Y2C97GK-pkY81odurKoDup59dLK31V3EGT2HOwE,6711
8
+ openaivec/_provider.py,sha256=d7ZjD3Rd2z4g63UwkrKvlw1Z9EcbAItrJiixaay4MCs,7159
9
9
  openaivec/_proxy.py,sha256=J0qGDcZqSab26ScA8OXxzornfwuXtrVycqup-JPq464,29719
10
10
  openaivec/_responses.py,sha256=xtkiOn01RkauHq2FAKRAcjPglH8rmbaSz0-VE0ClTe8,24026
11
11
  openaivec/_schema.py,sha256=9enwqE2idLLUKbQxjiNn09uhdKz14kihEwUXglRqxx0,20543
12
12
  openaivec/_serialize.py,sha256=NLCKl4opc1WS24_duwpI2UGBepQ8SBh4YRxBlLwzDLw,8403
13
13
  openaivec/_util.py,sha256=dFWwjouJyvF-tqNPs2933OAt5Fw9I2Q2BvmGIfGH5k4,6423
14
- openaivec/pandas_ext.py,sha256=m4H6mrE__Jmr5R6hl6d8yc2JhVT0-wdf5GOKWIITeLU,63366
14
+ openaivec/pandas_ext.py,sha256=xa2DhE6Of8ZwZM3sImG7PSeGvtGkspT-697uHc85R9I,85970
15
15
  openaivec/spark.py,sha256=lI-noacLvuxu6gBztKdcYd9vfK3eNI3aCGwJylkzv7E,25367
16
16
  openaivec/task/__init__.py,sha256=lrgoc9UIox7XnxZ96dQRl88a-8QfuZRFBHshxctpMB8,6178
17
17
  openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
@@ -30,7 +30,7 @@ openaivec/task/nlp/sentiment_analysis.py,sha256=BNwWtNT-MNA76eIJbb31641upukmRwM9
30
30
  openaivec/task/nlp/translation.py,sha256=XTZM11JFjbgTK9wHnxFgVDabXZ5bqbabXK_bq2nEkyQ,6627
31
31
  openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
32
32
  openaivec/task/table/fillna.py,sha256=ZVcOpuh7ULVhrt1VsWy5fPhk53XNaiD7kXGCPhh83M8,6636
33
- openaivec-0.14.4.dist-info/METADATA,sha256=RF6rZDL5B4qYCqXIbC0jexv-IzHv48WBDV-MZtNHcvY,27566
34
- openaivec-0.14.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
- openaivec-0.14.4.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
36
- openaivec-0.14.4.dist-info/RECORD,,
33
+ openaivec-0.14.5.dist-info/METADATA,sha256=chAhTTfFnXuZdxKQK5sVEJfOX1wT242b_g-TtHuurao,27566
34
+ openaivec-0.14.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
+ openaivec-0.14.5.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
36
+ openaivec-0.14.5.dist-info/RECORD,,