openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. openaivec/__init__.py +13 -4
  2. openaivec/_cache/__init__.py +12 -0
  3. openaivec/_cache/optimize.py +109 -0
  4. openaivec/_cache/proxy.py +806 -0
  5. openaivec/_di.py +326 -0
  6. openaivec/_embeddings.py +203 -0
  7. openaivec/{log.py → _log.py} +2 -2
  8. openaivec/_model.py +113 -0
  9. openaivec/{prompt.py → _prompt.py} +95 -28
  10. openaivec/_provider.py +207 -0
  11. openaivec/_responses.py +511 -0
  12. openaivec/_schema/__init__.py +9 -0
  13. openaivec/_schema/infer.py +340 -0
  14. openaivec/_schema/spec.py +350 -0
  15. openaivec/_serialize.py +234 -0
  16. openaivec/{util.py → _util.py} +25 -85
  17. openaivec/pandas_ext.py +1635 -425
  18. openaivec/spark.py +604 -335
  19. openaivec/task/__init__.py +27 -29
  20. openaivec/task/customer_support/__init__.py +9 -15
  21. openaivec/task/customer_support/customer_sentiment.py +51 -41
  22. openaivec/task/customer_support/inquiry_classification.py +86 -61
  23. openaivec/task/customer_support/inquiry_summary.py +44 -45
  24. openaivec/task/customer_support/intent_analysis.py +56 -41
  25. openaivec/task/customer_support/response_suggestion.py +49 -43
  26. openaivec/task/customer_support/urgency_analysis.py +76 -71
  27. openaivec/task/nlp/__init__.py +4 -4
  28. openaivec/task/nlp/dependency_parsing.py +19 -20
  29. openaivec/task/nlp/keyword_extraction.py +22 -24
  30. openaivec/task/nlp/morphological_analysis.py +25 -25
  31. openaivec/task/nlp/named_entity_recognition.py +26 -28
  32. openaivec/task/nlp/sentiment_analysis.py +29 -21
  33. openaivec/task/nlp/translation.py +24 -30
  34. openaivec/task/table/__init__.py +3 -0
  35. openaivec/task/table/fillna.py +183 -0
  36. openaivec-1.0.10.dist-info/METADATA +399 -0
  37. openaivec-1.0.10.dist-info/RECORD +39 -0
  38. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
  39. openaivec/embeddings.py +0 -172
  40. openaivec/responses.py +0 -392
  41. openaivec/serialize.py +0 -225
  42. openaivec/task/model.py +0 -84
  43. openaivec-0.10.0.dist-info/METADATA +0 -546
  44. openaivec-0.10.0.dist-info/RECORD +0 -29
  45. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
@@ -5,22 +5,22 @@ identifies and classifies named entities in text using OpenAI's language models.
5
5
 
6
6
  Example:
7
7
  Basic usage with BatchResponses:
8
-
8
+
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
-
13
+
14
14
  client = OpenAI()
15
15
  analyzer = BatchResponses.of_task(
16
16
  client=client,
17
- model_name="gpt-4o-mini",
17
+ model_name="gpt-4.1-mini",
18
18
  task=nlp.NAMED_ENTITY_RECOGNITION
19
19
  )
20
-
20
+
21
21
  texts = ["John works at Microsoft in Seattle", "The meeting is on March 15th"]
22
22
  analyses = analyzer.parse(texts)
23
-
23
+
24
24
  for analysis in analyses:
25
25
  print(f"Persons: {analysis.persons}")
26
26
  print(f"Organizations: {analysis.organizations}")
@@ -28,31 +28,29 @@ Example:
28
28
  ```
29
29
 
30
30
  With pandas integration:
31
-
31
+
32
32
  ```python
33
33
  import pandas as pd
34
34
  from openaivec import pandas_ext # Required for .ai accessor
35
35
  from openaivec.task import nlp
36
-
36
+
37
37
  df = pd.DataFrame({"text": ["John works at Microsoft in Seattle", "The meeting is on March 15th"]})
38
38
  df["entities"] = df["text"].ai.task(nlp.NAMED_ENTITY_RECOGNITION)
39
-
39
+
40
40
  # Extract entity components
41
41
  extracted_df = df.ai.extract("entities")
42
42
  print(extracted_df[["text", "entities_persons", "entities_organizations", "entities_locations"]])
43
43
  ```
44
44
 
45
45
  Attributes:
46
- NAMED_ENTITY_RECOGNITION (PreparedTask): A prepared task instance
47
- configured for named entity recognition with temperature=0.0 and
48
- top_p=1.0 for deterministic output.
46
+ NAMED_ENTITY_RECOGNITION (PreparedTask): A prepared task instance configured for named
47
+ entity recognition. Provide ``temperature=0.0`` and ``top_p=1.0`` to API calls for
48
+ deterministic output.
49
49
  """
50
50
 
51
- from typing import List, Optional
52
- from pydantic import BaseModel
53
- from pydantic import Field
51
+ from pydantic import BaseModel, Field
54
52
 
55
- from ..model import PreparedTask
53
+ from openaivec._model import PreparedTask
56
54
 
57
55
  __all__ = ["NAMED_ENTITY_RECOGNITION"]
58
56
 
@@ -62,22 +60,22 @@ class NamedEntity(BaseModel):
62
60
  label: str = Field(description="Entity type label")
63
61
  start: int = Field(description="Start position in the original text")
64
62
  end: int = Field(description="End position in the original text")
65
- confidence: Optional[float] = Field(description="Confidence score (0.0-1.0)")
63
+ confidence: float | None = Field(description="Confidence score (0.0-1.0)")
66
64
 
67
65
 
68
66
  class NamedEntityRecognition(BaseModel):
69
- persons: List[NamedEntity] = Field(description="Person entities")
70
- organizations: List[NamedEntity] = Field(description="Organization entities")
71
- locations: List[NamedEntity] = Field(description="Location entities")
72
- dates: List[NamedEntity] = Field(description="Date and time entities")
73
- money: List[NamedEntity] = Field(description="Money and currency entities")
74
- percentages: List[NamedEntity] = Field(description="Percentage entities")
75
- miscellaneous: List[NamedEntity] = Field(description="Other named entities")
67
+ persons: list[NamedEntity] = Field(description="Person entities")
68
+ organizations: list[NamedEntity] = Field(description="Organization entities")
69
+ locations: list[NamedEntity] = Field(description="Location entities")
70
+ dates: list[NamedEntity] = Field(description="Date and time entities")
71
+ money: list[NamedEntity] = Field(description="Money and currency entities")
72
+ percentages: list[NamedEntity] = Field(description="Percentage entities")
73
+ miscellaneous: list[NamedEntity] = Field(description="Other named entities")
76
74
 
77
75
 
78
76
  NAMED_ENTITY_RECOGNITION = PreparedTask(
79
- instructions="Identify and classify named entities in the following text. Extract persons, organizations, locations, dates, money, percentages, and other miscellaneous entities with their positions and confidence scores.",
77
+ instructions="Identify and classify named entities in the following text. Extract persons, "
78
+ "organizations, locations, dates, money, percentages, and other miscellaneous entities "
79
+ "with their positions and confidence scores.",
80
80
  response_format=NamedEntityRecognition,
81
- temperature=0.0,
82
- top_p=1.0
83
- )
81
+ )
@@ -5,22 +5,22 @@ sentiment and emotions in text using OpenAI's language models.
5
5
 
6
6
  Example:
7
7
  Basic usage with BatchResponses:
8
-
8
+
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
-
13
+
14
14
  client = OpenAI()
15
15
  analyzer = BatchResponses.of_task(
16
16
  client=client,
17
- model_name="gpt-4o-mini",
17
+ model_name="gpt-4.1-mini",
18
18
  task=nlp.SENTIMENT_ANALYSIS
19
19
  )
20
-
20
+
21
21
  texts = ["I love this product!", "This is terrible and disappointing."]
22
22
  analyses = analyzer.parse(texts)
23
-
23
+
24
24
  for analysis in analyses:
25
25
  print(f"Sentiment: {analysis.sentiment}")
26
26
  print(f"Confidence: {analysis.confidence}")
@@ -28,46 +28,54 @@ Example:
28
28
  ```
29
29
 
30
30
  With pandas integration:
31
-
31
+
32
32
  ```python
33
33
  import pandas as pd
34
34
  from openaivec import pandas_ext # Required for .ai accessor
35
35
  from openaivec.task import nlp
36
-
36
+
37
37
  df = pd.DataFrame({"text": ["I love this product!", "This is terrible and disappointing."]})
38
38
  df["sentiment"] = df["text"].ai.task(nlp.SENTIMENT_ANALYSIS)
39
-
39
+
40
40
  # Extract sentiment components
41
41
  extracted_df = df.ai.extract("sentiment")
42
42
  print(extracted_df[["text", "sentiment_sentiment", "sentiment_confidence", "sentiment_polarity"]])
43
43
  ```
44
44
 
45
45
  Attributes:
46
- SENTIMENT_ANALYSIS (PreparedTask): A prepared task instance
47
- configured for sentiment analysis with temperature=0.0 and
48
- top_p=1.0 for deterministic output.
46
+ SENTIMENT_ANALYSIS (PreparedTask): A prepared task instance configured for sentiment
47
+ analysis. Provide ``temperature=0.0`` and ``top_p=1.0`` to API calls for
48
+ deterministic output.
49
49
  """
50
50
 
51
- from typing import List, Literal
51
+ from typing import Literal
52
+
52
53
  from pydantic import BaseModel, Field
53
54
 
54
- from ..model import PreparedTask
55
+ from openaivec._model import PreparedTask
55
56
 
56
57
  __all__ = ["SENTIMENT_ANALYSIS"]
57
58
 
58
59
 
59
60
  class SentimentAnalysis(BaseModel):
60
- sentiment: Literal["positive", "negative", "neutral"] = Field(description="Overall sentiment (positive, negative, neutral)")
61
+ sentiment: Literal["positive", "negative", "neutral"] = Field(
62
+ description="Overall sentiment (positive, negative, neutral)"
63
+ )
61
64
  confidence: float = Field(description="Confidence score for sentiment (0.0-1.0)")
62
- emotions: List[Literal["joy", "sadness", "anger", "fear", "surprise", "disgust"]] = Field(description="Detected emotions (joy, sadness, anger, fear, surprise, disgust)")
63
- emotion_scores: List[float] = Field(description="Confidence scores for each emotion (0.0-1.0)")
65
+ emotions: list[Literal["joy", "sadness", "anger", "fear", "surprise", "disgust"]] = Field(
66
+ description="Detected emotions (joy, sadness, anger, fear, surprise, disgust)"
67
+ )
68
+ emotion_scores: list[float] = Field(description="Confidence scores for each emotion (0.0-1.0)")
64
69
  polarity: float = Field(description="Polarity score from -1.0 (negative) to 1.0 (positive)")
65
70
  subjectivity: float = Field(description="Subjectivity score from 0.0 (objective) to 1.0 (subjective)")
66
71
 
67
72
 
68
73
  SENTIMENT_ANALYSIS = PreparedTask(
69
- instructions="Analyze the sentiment and emotions in the following text. Provide overall sentiment classification, confidence scores, detected emotions, polarity, and subjectivity measures.\n\nIMPORTANT: Provide all analysis in the same language as the input text, except for the predefined categorical fields (sentiment, emotions) which must use the exact English values specified (positive/negative/neutral for sentiment, and joy/sadness/anger/fear/surprise/disgust for emotions).",
74
+ instructions="Analyze the sentiment and emotions in the following text. Provide overall "
75
+ "sentiment classification, confidence scores, detected emotions, polarity, and subjectivity "
76
+ "measures.\n\nIMPORTANT: Provide all analysis in the same language as the input text, except "
77
+ "for the predefined categorical fields (sentiment, emotions) which must use the exact "
78
+ "English values specified (positive/negative/neutral for sentiment, and "
79
+ "joy/sadness/anger/fear/surprise/disgust for emotions).",
70
80
  response_format=SentimentAnalysis,
71
- temperature=0.0,
72
- top_p=1.0
73
- )
81
+ )
@@ -10,22 +10,22 @@ provides structured output with consistent language code naming.
10
10
 
11
11
  Example:
12
12
  Basic usage with BatchResponses:
13
-
13
+
14
14
  ```python
15
15
  from openai import OpenAI
16
- from openaivec.responses import BatchResponses
16
+ from openaivec import BatchResponses
17
17
  from openaivec.task import nlp
18
-
18
+
19
19
  client = OpenAI()
20
20
  translator = BatchResponses.of_task(
21
21
  client=client,
22
- model_name="gpt-4o-mini",
22
+ model_name="gpt-4.1-mini",
23
23
  task=nlp.MULTILINGUAL_TRANSLATION
24
24
  )
25
-
25
+
26
26
  texts = ["Hello", "Good morning", "Thank you"]
27
27
  translations = translator.parse(texts)
28
-
28
+
29
29
  for translation in translations:
30
30
  print(f"English: {translation.en}")
31
31
  print(f"Japanese: {translation.ja}")
@@ -33,15 +33,15 @@ Example:
33
33
  ```
34
34
 
35
35
  With pandas integration:
36
-
36
+
37
37
  ```python
38
38
  import pandas as pd
39
39
  from openaivec import pandas_ext # Required for .ai accessor
40
40
  from openaivec.task import nlp
41
-
41
+
42
42
  df = pd.DataFrame({"text": ["Hello", "Goodbye"]})
43
43
  df["translations"] = df["text"].ai.task(nlp.MULTILINGUAL_TRANSLATION)
44
-
44
+
45
45
  # Extract specific languages
46
46
  extracted_df = df.ai.extract("translations")
47
47
  print(extracted_df[["text", "translations_en", "translations_ja", "translations_fr"]])
@@ -49,8 +49,8 @@ Example:
49
49
 
50
50
  Attributes:
51
51
  MULTILINGUAL_TRANSLATION (PreparedTask): A prepared task instance configured
52
- for multilingual translation with temperature=0.0 and top_p=1.0 for
53
- deterministic output.
52
+ for multilingual translation. Provide ``temperature=0.0`` and ``top_p=1.0``
53
+ to the calling API wrapper for deterministic output.
54
54
 
55
55
  Note:
56
56
  The translation covers 58 languages across major language families. All field
@@ -72,10 +72,9 @@ Note:
72
72
  - Other: Basque, Maltese
73
73
  """
74
74
 
75
- from openai import BaseModel
76
- from pydantic import Field
75
+ from pydantic import BaseModel, Field
77
76
 
78
- from ..model import PreparedTask
77
+ from openaivec._model import PreparedTask
79
78
 
80
79
  __all__ = ["MULTILINGUAL_TRANSLATION"]
81
80
 
@@ -88,7 +87,7 @@ class TranslatedString(BaseModel):
88
87
  sv: str = Field(description="Translated text in Swedish")
89
88
  da: str = Field(description="Translated text in Danish")
90
89
  no: str = Field(description="Translated text in Norwegian")
91
-
90
+
92
91
  # Romance languages
93
92
  es: str = Field(description="Translated text in Spanish")
94
93
  fr: str = Field(description="Translated text in French")
@@ -96,7 +95,7 @@ class TranslatedString(BaseModel):
96
95
  pt: str = Field(description="Translated text in Portuguese")
97
96
  ro: str = Field(description="Translated text in Romanian")
98
97
  ca: str = Field(description="Translated text in Catalan")
99
-
98
+
100
99
  # Slavic languages
101
100
  ru: str = Field(description="Translated text in Russian")
102
101
  pl: str = Field(description="Translated text in Polish")
@@ -106,37 +105,37 @@ class TranslatedString(BaseModel):
106
105
  bg: str = Field(description="Translated text in Bulgarian")
107
106
  hr: str = Field(description="Translated text in Croatian")
108
107
  sr: str = Field(description="Translated text in Serbian")
109
-
108
+
110
109
  # East Asian languages
111
110
  ja: str = Field(description="Translated text in Japanese")
112
111
  ko: str = Field(description="Translated text in Korean")
113
112
  zh: str = Field(description="Translated text in Chinese (Simplified)")
114
113
  zh_tw: str = Field(description="Translated text in Chinese (Traditional)")
115
-
114
+
116
115
  # South Asian languages
117
116
  hi: str = Field(description="Translated text in Hindi")
118
117
  bn: str = Field(description="Translated text in Bengali")
119
118
  te: str = Field(description="Translated text in Telugu")
120
119
  ta: str = Field(description="Translated text in Tamil")
121
120
  ur: str = Field(description="Translated text in Urdu")
122
-
121
+
123
122
  # Southeast Asian languages
124
123
  th: str = Field(description="Translated text in Thai")
125
124
  vi: str = Field(description="Translated text in Vietnamese")
126
125
  id: str = Field(description="Translated text in Indonesian")
127
126
  ms: str = Field(description="Translated text in Malay")
128
127
  tl: str = Field(description="Translated text in Filipino")
129
-
128
+
130
129
  # Middle Eastern languages
131
130
  ar: str = Field(description="Translated text in Arabic")
132
131
  he: str = Field(description="Translated text in Hebrew")
133
132
  fa: str = Field(description="Translated text in Persian")
134
133
  tr: str = Field(description="Translated text in Turkish")
135
-
134
+
136
135
  # African languages
137
136
  sw: str = Field(description="Translated text in Swahili")
138
137
  am: str = Field(description="Translated text in Amharic")
139
-
138
+
140
139
  # Other European languages
141
140
  fi: str = Field(description="Translated text in Finnish")
142
141
  hu: str = Field(description="Translated text in Hungarian")
@@ -144,10 +143,10 @@ class TranslatedString(BaseModel):
144
143
  lv: str = Field(description="Translated text in Latvian")
145
144
  lt: str = Field(description="Translated text in Lithuanian")
146
145
  el: str = Field(description="Translated text in Greek")
147
-
146
+
148
147
  # Nordic languages
149
148
  is_: str = Field(description="Translated text in Icelandic")
150
-
149
+
151
150
  # Other languages
152
151
  eu: str = Field(description="Translated text in Basque")
153
152
  cy: str = Field(description="Translated text in Welsh")
@@ -157,9 +156,4 @@ class TranslatedString(BaseModel):
157
156
 
158
157
  instructions = "Translate the following text into multiple languages. "
159
158
 
160
- MULTILINGUAL_TRANSLATION = PreparedTask(
161
- instructions=instructions,
162
- response_format=TranslatedString,
163
- temperature=0.0,
164
- top_p=1.0
165
- )
159
+ MULTILINGUAL_TRANSLATION = PreparedTask(instructions=instructions, response_format=TranslatedString)
@@ -0,0 +1,3 @@
1
+ from .fillna import FillNaResponse, fillna
2
+
3
+ __all__ = ["fillna", "FillNaResponse"]
@@ -0,0 +1,183 @@
1
+ """Missing value imputation task for DataFrame columns.
2
+
3
+ This module provides functionality to intelligently fill missing values in DataFrame
4
+ columns using AI-powered analysis. The task analyzes existing data patterns to
5
+ generate contextually appropriate values for missing entries.
6
+
7
+ Example:
8
+ Basic usage with pandas DataFrame:
9
+
10
+ ```python
11
+ import pandas as pd
12
+ from openaivec import pandas_ext # Required for .ai accessor
13
+ from openaivec.task.table import fillna
14
+
15
+ # Create DataFrame with missing values
16
+ df = pd.DataFrame({
17
+ "name": ["Alice", "Bob", None, "David"],
18
+ "age": [25, 30, 35, None],
19
+ "city": ["New York", "London", "Tokyo", "Paris"],
20
+ "salary": [50000, 60000, 70000, None]
21
+ })
22
+
23
+ # Fill missing values in the 'salary' column
24
+ task = fillna(df, "salary")
25
+ filled_salaries = df[df["salary"].isna()].ai.task(task)
26
+
27
+ # Apply filled values back to DataFrame
28
+ for result in filled_salaries:
29
+ df.loc[result.index, "salary"] = result.output
30
+ ```
31
+
32
+ With BatchResponses for more control:
33
+
34
+ ```python
35
+ from openai import OpenAI
36
+ from openaivec import BatchResponses
37
+ from openaivec.task.table import fillna
38
+
39
+ client = OpenAI()
40
+ df = pd.DataFrame({...}) # Your DataFrame with missing values
41
+
42
+ # Create fillna task for target column
43
+ task = fillna(df, "target_column")
44
+
45
+ # Get rows with missing values in target column
46
+ missing_rows = df[df["target_column"].isna()]
47
+
48
+ # Process with BatchResponses
49
+ filler = BatchResponses.of_task(
50
+ client=client,
51
+ model_name="gpt-4.1-mini",
52
+ task=task
53
+ )
54
+
55
+ # Generate inputs for missing rows
56
+ inputs = []
57
+ for idx, row in missing_rows.iterrows():
58
+ inputs.append({
59
+ "index": idx,
60
+ "input": {k: v for k, v in row.items() if k != "target_column"}
61
+ })
62
+
63
+ filled_values = filler.parse(inputs)
64
+ ```
65
+ """
66
+
67
+ import json
68
+
69
+ import pandas as pd
70
+ from pydantic import BaseModel, Field
71
+
72
+ from openaivec._model import PreparedTask
73
+ from openaivec._prompt import FewShotPromptBuilder
74
+
75
+ __all__ = ["fillna", "FillNaResponse"]
76
+
77
+
78
+ def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> list[dict]:
79
+ examples: list[dict] = []
80
+
81
+ samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
82
+ samples = samples.dropna(subset=[target_column_name])
83
+
84
+ for i, row in samples.head(max_examples).iterrows():
85
+ examples.append(
86
+ {
87
+ "index": i,
88
+ "input": {k: v for k, v in row.items() if k != target_column_name},
89
+ "output": row[target_column_name],
90
+ }
91
+ )
92
+
93
+ return examples
94
+
95
+
96
+ def get_instructions(df: pd.DataFrame, target_column_name: str, max_examples: int) -> str:
97
+ examples = get_examples(df, target_column_name, max_examples)
98
+
99
+ builder = (
100
+ FewShotPromptBuilder()
101
+ .purpose("Fill missing values in the target column based on the context provided by other columns.")
102
+ .caution("Ensure that the filled values are consistent with the data in other columns.")
103
+ )
104
+
105
+ for row in examples:
106
+ builder.example(
107
+ input_value=json.dumps({"index": row["index"], "input": row["input"]}, ensure_ascii=False),
108
+ output_value=json.dumps({"index": row["index"], "output": row["output"]}, ensure_ascii=False),
109
+ )
110
+
111
+ return builder.improve().build()
112
+
113
+
114
+ class FillNaResponse(BaseModel):
115
+ """Response model for missing value imputation results.
116
+
117
+ Contains the row index and the imputed value for a specific missing
118
+ entry in the target column.
119
+ """
120
+
121
+ index: int = Field(description="Index of the row in the original DataFrame")
122
+ output: int | float | str | bool | None = Field(
123
+ description="Filled value for the target column. This value should be JSON-compatible "
124
+ "and match the target column type in the original DataFrame."
125
+ )
126
+
127
+
128
+ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -> PreparedTask:
129
+ """Create a prepared task for filling missing values in a DataFrame column.
130
+
131
+ Analyzes the provided DataFrame to understand data patterns and creates
132
+ a configured task that can intelligently fill missing values in the
133
+ specified target column. The task uses few-shot learning with examples
134
+ extracted from non-null rows in the DataFrame.
135
+
136
+ Args:
137
+ df (pd.DataFrame): Source DataFrame containing the data with missing values.
138
+ target_column_name (str): Name of the column to fill missing values for.
139
+ This column should exist in the DataFrame and contain some
140
+ non-null values to serve as training examples.
141
+ max_examples (int): Maximum number of example rows to use for few-shot
142
+ learning. Defaults to 500. Higher values provide more context
143
+ but increase token usage and processing time.
144
+
145
+ Returns:
146
+ PreparedTask configured for missing value imputation with:
147
+ - Instructions based on DataFrame patterns
148
+ - FillNaResponse format for structured output
149
+ - Default deterministic settings (temperature=0.0, top_p=1.0)
150
+
151
+ Raises:
152
+ ValueError: If target_column_name doesn't exist in DataFrame,
153
+ contains no non-null values for training examples, DataFrame is empty,
154
+ or max_examples is not a positive integer.
155
+
156
+ Example:
157
+ ```python
158
+ import pandas as pd
159
+ from openaivec.task.table import fillna
160
+
161
+ df = pd.DataFrame({
162
+ "product": ["laptop", "phone", "tablet", "laptop"],
163
+ "brand": ["Apple", "Samsung", None, "Dell"],
164
+ "price": [1200, 800, 600, 1000]
165
+ })
166
+
167
+ # Create task to fill missing brand values
168
+ task = fillna(df, "brand")
169
+
170
+ # Use with pandas AI accessor
171
+ missing_brands = df[df["brand"].isna()].ai.task(task)
172
+ ```
173
+ """
174
+ if df.empty:
175
+ raise ValueError("DataFrame is empty.")
176
+ if not isinstance(max_examples, int) or max_examples <= 0:
177
+ raise ValueError("max_examples must be a positive integer.")
178
+ if target_column_name not in df.columns:
179
+ raise ValueError(f"Column '{target_column_name}' does not exist in the DataFrame.")
180
+ if df[target_column_name].notna().sum() == 0:
181
+ raise ValueError(f"Column '{target_column_name}' contains no non-null values for training examples.")
182
+ instructions = get_instructions(df, target_column_name, max_examples)
183
+ return PreparedTask(instructions=instructions, response_format=FillNaResponse)