openaivec 0.14.12__py3-none-any.whl → 0.14.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openaivec/spark.py CHANGED
@@ -134,6 +134,7 @@ import numpy as np
134
134
  import pandas as pd
135
135
  import tiktoken
136
136
  from pydantic import BaseModel
137
+ from pyspark import SparkContext
137
138
  from pyspark.sql import SparkSession
138
139
  from pyspark.sql.pandas.functions import pandas_udf
139
140
  from pyspark.sql.types import ArrayType, BooleanType, FloatType, IntegerType, StringType, StructField, StructType
@@ -180,7 +181,10 @@ def setup(
180
181
  If provided, registers `EmbeddingsModelName` in the DI container.
181
182
  """
182
183
 
183
- sc = spark.sparkContext
184
+ CONTAINER.register(SparkSession, lambda: spark)
185
+ CONTAINER.register(SparkContext, lambda: CONTAINER.resolve(SparkSession).sparkContext)
186
+
187
+ sc = CONTAINER.resolve(SparkContext)
184
188
  sc.environment["OPENAI_API_KEY"] = api_key
185
189
 
186
190
  os.environ["OPENAI_API_KEY"] = api_key
@@ -219,7 +223,10 @@ def setup_azure(
219
223
  If provided, registers `EmbeddingsModelName` in the DI container.
220
224
  """
221
225
 
222
- sc = spark.sparkContext
226
+ CONTAINER.register(SparkSession, lambda: spark)
227
+ CONTAINER.register(SparkContext, lambda: CONTAINER.resolve(SparkSession).sparkContext)
228
+
229
+ sc = CONTAINER.resolve(SparkContext)
223
230
  sc.environment["AZURE_OPENAI_API_KEY"] = api_key
224
231
  sc.environment["AZURE_OPENAI_BASE_URL"] = base_url
225
232
  sc.environment["AZURE_OPENAI_API_VERSION"] = api_version
@@ -317,8 +324,6 @@ def responses_udf(
317
324
  response_format: type[ResponseFormat] = str,
318
325
  model_name: str = CONTAINER.resolve(ResponsesModelName).value,
319
326
  batch_size: int | None = None,
320
- temperature: float | None = 0.0,
321
- top_p: float = 1.0,
322
327
  max_concurrency: int = 8,
323
328
  **api_kwargs,
324
329
  ) -> UserDefinedFunction:
@@ -353,17 +358,14 @@ def responses_udf(
353
358
  Defaults to None (automatic batch size optimization that dynamically
354
359
  adjusts based on execution time, targeting 30-60 seconds per batch).
355
360
  Set to a positive integer (e.g., 32-128) for fixed batch size.
356
- temperature (float): Sampling temperature (0.0 to 2.0). Defaults to 0.0.
357
- top_p (float): Nucleus sampling parameter. Defaults to 1.0.
358
361
  max_concurrency (int): Maximum number of concurrent API requests **PER EXECUTOR**.
359
362
  Total cluster concurrency = max_concurrency × number_of_executors.
360
363
  Higher values increase throughput but may hit OpenAI rate limits.
361
364
  Recommended: 4-12 per executor. Defaults to 8.
362
-
363
- Additional Keyword Args:
364
- Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
365
- ``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
366
- These parameters are applied to all API requests made by the UDF.
365
+ **api_kwargs: Additional OpenAI API parameters (e.g. ``temperature``, ``top_p``,
366
+ ``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
367
+ forwarded verbatim to the underlying API calls. These parameters are applied to
368
+ all API requests made by the UDF.
367
369
 
368
370
  Returns:
369
371
  UserDefinedFunction: A Spark pandas UDF configured to generate responses asynchronously.
@@ -399,8 +401,6 @@ def responses_udf(
399
401
  part.aio.responses_with_cache(
400
402
  instructions=instructions,
401
403
  response_format=response_format,
402
- temperature=temperature,
403
- top_p=top_p,
404
404
  cache=cache,
405
405
  **api_kwargs,
406
406
  )
@@ -427,8 +427,6 @@ def responses_udf(
427
427
  part.aio.responses_with_cache(
428
428
  instructions=instructions,
429
429
  response_format=str,
430
- temperature=temperature,
431
- top_p=top_p,
432
430
  cache=cache,
433
431
  **api_kwargs,
434
432
  )
@@ -460,7 +458,7 @@ def task_udf(
460
458
 
461
459
  Args:
462
460
  task (PreparedTask): A predefined task configuration containing instructions,
463
- response format, temperature, and top_p settings.
461
+ response format, and API parameters.
464
462
  model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
465
463
  For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
466
464
  batch_size (int | None): Number of rows per async batch request within each partition.
@@ -474,10 +472,10 @@ def task_udf(
474
472
  Recommended: 4-12 per executor. Defaults to 8.
475
473
 
476
474
  Additional Keyword Args:
477
- Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
478
- ``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
479
- These parameters are applied to all API requests made by the UDF and override any
480
- parameters set in the task configuration.
475
+ Arbitrary OpenAI Responses API parameters (e.g. ``temperature``, ``top_p``,
476
+ ``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
477
+ are forwarded verbatim to the underlying API calls. These parameters are applied to
478
+ all API requests made by the UDF and override any parameters set in the task configuration.
481
479
 
482
480
  Returns:
483
481
  UserDefinedFunction: A Spark pandas UDF configured to execute the specified task
@@ -498,15 +496,16 @@ def task_udf(
498
496
  **Automatic Caching**: Duplicate inputs within each partition are cached,
499
497
  reducing API calls and costs significantly on datasets with repeated content.
500
498
  """
499
+ # Merge task's api_kwargs with caller's api_kwargs (caller takes precedence)
500
+ merged_kwargs = {**task.api_kwargs, **api_kwargs}
501
+
501
502
  return responses_udf(
502
503
  instructions=task.instructions,
503
504
  response_format=task.response_format,
504
505
  model_name=model_name,
505
506
  batch_size=batch_size,
506
- temperature=task.temperature,
507
- top_p=task.top_p,
508
507
  max_concurrency=max_concurrency,
509
- **api_kwargs,
508
+ **merged_kwargs,
510
509
  )
511
510
 
512
511
 
@@ -532,15 +531,13 @@ def infer_schema(
532
531
  InferredSchema: An object containing the inferred schema and response format.
533
532
  """
534
533
 
535
- from pyspark.sql import SparkSession
536
-
537
- spark = SparkSession.builder.getOrCreate()
534
+ spark = CONTAINER.resolve(SparkSession)
538
535
  examples: list[str] = (
539
536
  spark.table(example_table_name).rdd.map(lambda row: row[example_field_name]).takeSample(False, max_examples)
540
537
  )
541
538
 
542
539
  input = SchemaInferenceInput(
543
- purpose=instructions,
540
+ instructions=instructions,
544
541
  examples=examples,
545
542
  )
546
543
  inferer = CONTAINER.resolve(SchemaInferer)
@@ -555,8 +552,6 @@ def parse_udf(
555
552
  max_examples: int = 100,
556
553
  model_name: str = CONTAINER.resolve(ResponsesModelName).value,
557
554
  batch_size: int | None = None,
558
- temperature: float | None = 0.0,
559
- top_p: float = 1.0,
560
555
  max_concurrency: int = 8,
561
556
  **api_kwargs,
562
557
  ) -> UserDefinedFunction:
@@ -586,17 +581,15 @@ def parse_udf(
586
581
  Defaults to None (automatic batch size optimization that dynamically
587
582
  adjusts based on execution time, targeting 30-60 seconds per batch).
588
583
  Set to a positive integer (e.g., 32-128) for fixed batch size
589
- temperature (float | None): Sampling temperature (0.0 to 2.0). Defaults to 0.0.
590
- top_p (float): Nucleus sampling parameter. Defaults to 1.0.
591
584
  max_concurrency (int): Maximum number of concurrent API requests **PER EXECUTOR**.
592
585
  Total cluster concurrency = max_concurrency × number_of_executors.
593
586
  Higher values increase throughput but may hit OpenAI rate limits.
594
587
  Recommended: 4-12 per executor. Defaults to 8.
595
- Additional Keyword Args:
596
- Arbitrary OpenAI Responses API parameters (e.g. ``frequency_penalty``, ``presence_penalty``,
597
- ``seed``, ``max_output_tokens``, etc.) are forwarded verbatim to the underlying API calls.
598
- These parameters are applied to all API requests made by the UDF and override any
599
- parameters set in the response_format or example data.
588
+ **api_kwargs: Additional OpenAI API parameters (e.g. ``temperature``, ``top_p``,
589
+ ``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
590
+ forwarded verbatim to the underlying API calls. These parameters are applied to
591
+ all API requests made by the UDF and override any parameters set in the
592
+ response_format or example data.
600
593
  Returns:
601
594
  UserDefinedFunction: A Spark pandas UDF configured to parse responses asynchronously.
602
595
  Output schema is `StringType` for str response format or a struct derived from
@@ -623,8 +616,6 @@ def parse_udf(
623
616
  response_format=schema.model if schema else response_format,
624
617
  model_name=model_name,
625
618
  batch_size=batch_size,
626
- temperature=temperature,
627
- top_p=top_p,
628
619
  max_concurrency=max_concurrency,
629
620
  **api_kwargs,
630
621
  )
@@ -634,6 +625,7 @@ def embeddings_udf(
634
625
  model_name: str = CONTAINER.resolve(EmbeddingsModelName).value,
635
626
  batch_size: int | None = None,
636
627
  max_concurrency: int = 8,
628
+ **api_kwargs,
637
629
  ) -> UserDefinedFunction:
638
630
  """Create an asynchronous Spark pandas UDF for generating embeddings.
639
631
 
@@ -669,6 +661,7 @@ def embeddings_udf(
669
661
  Total cluster concurrency = max_concurrency × number_of_executors.
670
662
  Higher values increase throughput but may hit OpenAI rate limits.
671
663
  Recommended: 4-12 per executor. Defaults to 8.
664
+ **api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
672
665
 
673
666
  Returns:
674
667
  UserDefinedFunction: A Spark pandas UDF configured to generate embeddings asynchronously
@@ -695,7 +688,7 @@ def embeddings_udf(
695
688
 
696
689
  try:
697
690
  for part in col:
698
- embeddings: pd.Series = asyncio.run(part.aio.embeddings_with_cache(cache=cache))
691
+ embeddings: pd.Series = asyncio.run(part.aio.embeddings_with_cache(cache=cache, **api_kwargs))
699
692
  yield embeddings.map(lambda x: x.tolist())
700
693
  finally:
701
694
  asyncio.run(cache.clear())
@@ -117,7 +117,7 @@ All tasks are built using the `PreparedTask` dataclass:
117
117
  @dataclass(frozen=True)
118
118
  class PreparedTask:
119
119
  instructions: str # Detailed prompt for the LLM
120
- response_format: Type[ResponseFormat] # Pydantic model or str for structured/plain output
120
+ response_format: type[ResponseFormat] # Pydantic model or str for structured/plain output
121
121
  temperature: float = 0.0 # Sampling temperature
122
122
  top_p: float = 1.0 # Nucleus sampling parameter
123
123
  ```
@@ -95,15 +95,12 @@ class CustomerSentiment(BaseModel):
95
95
  )
96
96
 
97
97
 
98
- def customer_sentiment(
99
- business_context: str = "general customer support", temperature: float = 0.0, top_p: float = 1.0
100
- ) -> PreparedTask:
98
+ def customer_sentiment(business_context: str = "general customer support", **api_kwargs) -> PreparedTask:
101
99
  """Create a configurable customer sentiment analysis task.
102
100
 
103
101
  Args:
104
102
  business_context (str): Business context for sentiment analysis.
105
- temperature (float): Sampling temperature (0.0-1.0).
106
- top_p (float): Nucleus sampling parameter (0.0-1.0).
103
+ **api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
107
104
 
108
105
  Returns:
109
106
  PreparedTask configured for customer sentiment analysis.
@@ -169,10 +166,8 @@ values like "positive" for sentiment.
169
166
 
170
167
  Provide comprehensive sentiment analysis with business context and recommended response strategy."""
171
168
 
172
- return PreparedTask(
173
- instructions=instructions, response_format=CustomerSentiment, temperature=temperature, top_p=top_p
174
- )
169
+ return PreparedTask(instructions=instructions, response_format=CustomerSentiment, api_kwargs=api_kwargs)
175
170
 
176
171
 
177
172
  # Backward compatibility - default configuration
178
- CUSTOMER_SENTIMENT = customer_sentiment()
173
+ CUSTOMER_SENTIMENT = customer_sentiment(temperature=0.0, top_p=1.0)
@@ -119,8 +119,7 @@ def inquiry_classification(
119
119
  priority_rules: Dict[str, str] | None = None,
120
120
  business_context: str = "general customer support",
121
121
  custom_keywords: Dict[str, list[str]] | None = None,
122
- temperature: float = 0.0,
123
- top_p: float = 1.0,
122
+ **api_kwargs,
124
123
  ) -> PreparedTask:
125
124
  """Create a configurable inquiry classification task.
126
125
 
@@ -133,8 +132,8 @@ def inquiry_classification(
133
132
  Default uses standard priority indicators.
134
133
  business_context (str): Description of the business context to help with classification.
135
134
  custom_keywords (dict[str, list[str]] | None): Dictionary mapping categories to relevant keywords.
136
- temperature (float): Sampling temperature (0.0-1.0).
137
- top_p (float): Nucleus sampling parameter (0.0-1.0).
135
+ **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
136
+ such as temperature, top_p, etc.
138
137
 
139
138
  Returns:
140
139
  PreparedTask configured for inquiry classification.
@@ -254,10 +253,8 @@ language where appropriate, but priority must use English values like "high".
254
253
 
255
254
  Provide accurate classification with detailed reasoning."""
256
255
 
257
- return PreparedTask(
258
- instructions=instructions, response_format=InquiryClassification, temperature=temperature, top_p=top_p
259
- )
256
+ return PreparedTask(instructions=instructions, response_format=InquiryClassification, api_kwargs=api_kwargs)
260
257
 
261
258
 
262
259
  # Backward compatibility - default configuration
263
- INQUIRY_CLASSIFICATION = inquiry_classification()
260
+ INQUIRY_CLASSIFICATION = inquiry_classification(temperature=0.0, top_p=1.0)
@@ -87,16 +87,15 @@ class InquirySummary(BaseModel):
87
87
  def inquiry_summary(
88
88
  summary_length: str = "concise",
89
89
  business_context: str = "general customer support",
90
- temperature: float = 0.0,
91
- top_p: float = 1.0,
90
+ **api_kwargs,
92
91
  ) -> PreparedTask:
93
92
  """Create a configurable inquiry summary task.
94
93
 
95
94
  Args:
96
95
  summary_length (str): Length of summary (concise, detailed, bullet_points).
97
96
  business_context (str): Business context for summary.
98
- temperature (float): Sampling temperature (0.0-1.0).
99
- top_p (float): Nucleus sampling parameter (0.0-1.0).
97
+ **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
98
+ such as temperature, top_p, etc.
100
99
 
101
100
  Returns:
102
101
  PreparedTask configured for inquiry summarization.
@@ -163,8 +162,8 @@ input is in German, provide all summary content in German, but use English value
163
162
 
164
163
  Provide accurate, actionable summary that enables efficient support resolution."""
165
164
 
166
- return PreparedTask(instructions=instructions, response_format=InquirySummary, temperature=temperature, top_p=top_p)
165
+ return PreparedTask(instructions=instructions, response_format=InquirySummary, api_kwargs=api_kwargs)
167
166
 
168
167
 
169
168
  # Backward compatibility - default configuration
170
- INQUIRY_SUMMARY = inquiry_summary()
169
+ INQUIRY_SUMMARY = inquiry_summary(temperature=0.0, top_p=1.0)
@@ -100,15 +100,13 @@ class IntentAnalysis(BaseModel):
100
100
  )
101
101
 
102
102
 
103
- def intent_analysis(
104
- business_context: str = "general customer support", temperature: float = 0.0, top_p: float = 1.0
105
- ) -> PreparedTask:
103
+ def intent_analysis(business_context: str = "general customer support", **api_kwargs) -> PreparedTask:
106
104
  """Create a configurable intent analysis task.
107
105
 
108
106
  Args:
109
107
  business_context (str): Business context for intent analysis.
110
- temperature (float): Sampling temperature (0.0-1.0).
111
- top_p (float): Nucleus sampling parameter (0.0-1.0).
108
+ **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
109
+ such as temperature, top_p, etc.
112
110
 
113
111
  Returns:
114
112
  PreparedTask configured for intent analysis.
@@ -171,8 +169,8 @@ next_steps, and reasoning in Japanese, but use English values like "get_help" fo
171
169
 
172
170
  Provide comprehensive intent analysis with actionable recommendations."""
173
171
 
174
- return PreparedTask(instructions=instructions, response_format=IntentAnalysis, temperature=temperature, top_p=top_p)
172
+ return PreparedTask(instructions=instructions, response_format=IntentAnalysis, api_kwargs=api_kwargs)
175
173
 
176
174
 
177
175
  # Backward compatibility - default configuration
178
- INTENT_ANALYSIS = intent_analysis()
176
+ INTENT_ANALYSIS = intent_analysis(temperature=0.0, top_p=1.0)
@@ -92,8 +92,7 @@ def response_suggestion(
92
92
  response_style: str = "professional",
93
93
  company_name: str = "our company",
94
94
  business_context: str = "general customer support",
95
- temperature: float = 0.0,
96
- top_p: float = 1.0,
95
+ **api_kwargs,
97
96
  ) -> PreparedTask:
98
97
  """Create a configurable response suggestion task.
99
98
 
@@ -101,8 +100,8 @@ def response_suggestion(
101
100
  response_style (str): Style of response (professional, friendly, empathetic, formal).
102
101
  company_name (str): Name of the company for personalization.
103
102
  business_context (str): Business context for responses.
104
- temperature (float): Sampling temperature (0.0-1.0).
105
- top_p (float): Nucleus sampling parameter (0.0-1.0).
103
+ **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
104
+ such as temperature, top_p, etc.
106
105
 
107
106
  Returns:
108
107
  PreparedTask configured for response suggestions.
@@ -190,10 +189,8 @@ but use English values like "empathetic" for tone.
190
189
  Generate helpful, professional response that moves toward resolution while maintaining
191
190
  positive customer relationship."""
192
191
 
193
- return PreparedTask(
194
- instructions=instructions, response_format=ResponseSuggestion, temperature=temperature, top_p=top_p
195
- )
192
+ return PreparedTask(instructions=instructions, response_format=ResponseSuggestion, api_kwargs=api_kwargs)
196
193
 
197
194
 
198
195
  # Backward compatibility - default configuration
199
- RESPONSE_SUGGESTION = response_suggestion()
196
+ RESPONSE_SUGGESTION = response_suggestion(temperature=0.0, top_p=1.0)
@@ -135,8 +135,7 @@ def urgency_analysis(
135
135
  business_context: str = "general customer support",
136
136
  business_hours: str = "24/7 support",
137
137
  sla_rules: Dict[str, str] | None = None,
138
- temperature: float = 0.0,
139
- top_p: float = 1.0,
138
+ **api_kwargs,
140
139
  ) -> PreparedTask:
141
140
  """Create a configurable urgency analysis task.
142
141
 
@@ -149,8 +148,8 @@ def urgency_analysis(
149
148
  business_context (str): Description of the business context.
150
149
  business_hours (str): Description of business hours for response time calculation.
151
150
  sla_rules (dict[str, str] | None): Dictionary mapping customer tiers to SLA requirements.
152
- temperature (float): Sampling temperature (0.0-1.0).
153
- top_p (float): Nucleus sampling parameter (0.0-1.0).
151
+ **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
152
+ such as temperature, top_p, etc.
154
153
 
155
154
  Returns:
156
155
  PreparedTask configured for urgency analysis.
@@ -287,10 +286,8 @@ urgency_level.
287
286
 
288
287
  Provide detailed analysis with clear reasoning for urgency level and response time recommendations."""
289
288
 
290
- return PreparedTask(
291
- instructions=instructions, response_format=UrgencyAnalysis, temperature=temperature, top_p=top_p
292
- )
289
+ return PreparedTask(instructions=instructions, response_format=UrgencyAnalysis, api_kwargs=api_kwargs)
293
290
 
294
291
 
295
292
  # Backward compatibility - default configuration
296
- URGENCY_ANALYSIS = urgency_analysis()
293
+ URGENCY_ANALYSIS = urgency_analysis(temperature=0.0, top_p=1.0)
@@ -75,6 +75,5 @@ DEPENDENCY_PARSING = PreparedTask(
75
75
  "relations between words, determine the root word, and provide a tree representation of the "
76
76
  "syntactic structure.",
77
77
  response_format=DependencyParsing,
78
- temperature=0.0,
79
- top_p=1.0,
78
+ api_kwargs={"temperature": 0.0, "top_p": 1.0},
80
79
  )
@@ -75,6 +75,5 @@ KEYWORD_EXTRACTION = PreparedTask(
75
75
  instructions="Extract important keywords and phrases from the following text. Rank them "
76
76
  "by importance, provide frequency counts, identify main topics, and generate a brief summary.",
77
77
  response_format=KeywordExtraction,
78
- temperature=0.0,
79
- top_p=1.0,
78
+ api_kwargs={"temperature": 0.0, "top_p": 1.0},
80
79
  )
@@ -70,6 +70,5 @@ MORPHOLOGICAL_ANALYSIS = PreparedTask(
70
70
  "identify part-of-speech tags, provide lemmatized forms, and extract morphological features "
71
71
  "for each token.",
72
72
  response_format=MorphologicalAnalysis,
73
- temperature=0.0,
74
- top_p=1.0,
73
+ api_kwargs={"temperature": 0.0, "top_p": 1.0},
75
74
  )
@@ -78,6 +78,5 @@ NAMED_ENTITY_RECOGNITION = PreparedTask(
78
78
  "organizations, locations, dates, money, percentages, and other miscellaneous entities "
79
79
  "with their positions and confidence scores.",
80
80
  response_format=NamedEntityRecognition,
81
- temperature=0.0,
82
- top_p=1.0,
81
+ api_kwargs={"temperature": 0.0, "top_p": 1.0},
83
82
  )
@@ -78,6 +78,5 @@ SENTIMENT_ANALYSIS = PreparedTask(
78
78
  "English values specified (positive/negative/neutral for sentiment, and "
79
79
  "joy/sadness/anger/fear/surprise/disgust for emotions).",
80
80
  response_format=SentimentAnalysis,
81
- temperature=0.0,
82
- top_p=1.0,
81
+ api_kwargs={"temperature": 0.0, "top_p": 1.0},
83
82
  )
@@ -157,5 +157,5 @@ class TranslatedString(BaseModel):
157
157
  instructions = "Translate the following text into multiple languages. "
158
158
 
159
159
  MULTILINGUAL_TRANSLATION = PreparedTask(
160
- instructions=instructions, response_format=TranslatedString, temperature=0.0, top_p=1.0
160
+ instructions=instructions, response_format=TranslatedString, api_kwargs={"temperature": 0.0, "top_p": 1.0}
161
161
  )
@@ -125,7 +125,7 @@ class FillNaResponse(BaseModel):
125
125
  )
126
126
 
127
127
 
128
- def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -> PreparedTask:
128
+ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500, **api_kwargs) -> PreparedTask:
129
129
  """Create a prepared task for filling missing values in a DataFrame column.
130
130
 
131
131
  Analyzes the provided DataFrame to understand data patterns and creates
@@ -141,12 +141,14 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
141
141
  max_examples (int): Maximum number of example rows to use for few-shot
142
142
  learning. Defaults to 500. Higher values provide more context
143
143
  but increase token usage and processing time.
144
+ **api_kwargs: Additional keyword arguments to pass to the OpenAI API,
145
+ such as temperature, top_p, etc.
144
146
 
145
147
  Returns:
146
148
  PreparedTask configured for missing value imputation with:
147
149
  - Instructions based on DataFrame patterns
148
150
  - FillNaResponse format for structured output
149
- - Temperature=0.0 and top_p=1.0 for deterministic results
151
+ - Default deterministic settings (temperature=0.0, top_p=1.0)
150
152
 
151
153
  Raises:
152
154
  ValueError: If target_column_name doesn't exist in DataFrame,
@@ -180,4 +182,7 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
180
182
  if df[target_column_name].notna().sum() == 0:
181
183
  raise ValueError(f"Column '{target_column_name}' contains no non-null values for training examples.")
182
184
  instructions = get_instructions(df, target_column_name, max_examples)
183
- return PreparedTask(instructions=instructions, response_format=FillNaResponse, temperature=0.0, top_p=1.0)
185
+ # Set default values for deterministic results if not provided
186
+ if not api_kwargs:
187
+ api_kwargs = {"temperature": 0.0, "top_p": 1.0}
188
+ return PreparedTask(instructions=instructions, response_format=FillNaResponse, api_kwargs=api_kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openaivec
3
- Version: 0.14.12
3
+ Version: 0.14.13
4
4
  Summary: Generative mutation for tabular calculation
5
5
  Project-URL: Homepage, https://microsoft.github.io/openaivec/
6
6
  Project-URL: Repository, https://github.com/microsoft/openaivec
@@ -0,0 +1,37 @@
1
+ openaivec/__init__.py,sha256=mXCGNNTjYbmE4CAXGvAs78soxUsoy_mxxnvaCk_CL6Y,361
2
+ openaivec/_di.py,sha256=Cl1ZoNBlQsJL1bpzoMDl08uT9pZFVSlqOdLbS3_MwPE,11462
3
+ openaivec/_dynamic.py,sha256=7ZaC59w2Edemnao57XeZVO4qmSOA-Kus6TchZC3Dd5o,14821
4
+ openaivec/_embeddings.py,sha256=nirLqOu69fTB7aSCYhbbRbwAA6ggwEYJiQoPDsHqAqQ,8200
5
+ openaivec/_log.py,sha256=LHNs6AbJzM4weaRARZFroigxR6D148d7WSIMLk1IhbU,1439
6
+ openaivec/_model.py,sha256=71oiENUKwpY58ilj1LE7fDOAhs7PUSiZRiUHKUIuu7Y,3235
7
+ openaivec/_optimize.py,sha256=3nS8VehbS7iGC1tPDDQh-iAgyKHbVYmMbCRBWM77U_U,3827
8
+ openaivec/_prompt.py,sha256=NWE7jZKYphkD856haynJLmRadPugJ68emT42pd7Ciso,20633
9
+ openaivec/_provider.py,sha256=8z8gPYY5-Z7rzDlj_NC6hR__DUqVAH7VLHJn6LalzRg,6158
10
+ openaivec/_proxy.py,sha256=AiGuC1MCFjZCRXCac-pHUI3Np3nf1HIpWY6nC9ZVCFY,29671
11
+ openaivec/_responses.py,sha256=qBrYv4qblDIs5dRvj9t96r8UfAJmy4ZvtAe6csNZ7oM,20412
12
+ openaivec/_schema.py,sha256=iOeR5J_ihZRDZtzmqvOK1ZtInKcx4OnoR38DB3VmmQw,15666
13
+ openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
14
+ openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
15
+ openaivec/pandas_ext.py,sha256=r2jpFqDnWcQYK3pMv5hCtOStOMltccDyLkpprLmIOls,85715
16
+ openaivec/spark.py,sha256=zaEivVOe3ukG8coa9JEUyISQ1YcMqCvAbhaarvn2SOM,32507
17
+ openaivec/task/__init__.py,sha256=RkYIKrcE83M_9Um9cSMkeGzL9kPRAovajfRvr31YxLE,6178
18
+ openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
19
+ openaivec/task/customer_support/customer_sentiment.py,sha256=d8spZUtImjePK0xWGvIW98ghbdyOZ0KEZmaUpG8QB7M,7532
20
+ openaivec/task/customer_support/inquiry_classification.py,sha256=NKz1oTm06eU6W-plHe3T3o20lCk6M2NemVXZ4Y_IozU,9602
21
+ openaivec/task/customer_support/inquiry_summary.py,sha256=8X1J8lZwlgX6s02cs86-K0moZ5gTrX7E7WEKiY2vpiQ,6896
22
+ openaivec/task/customer_support/intent_analysis.py,sha256=Jnokzi0wTlHpuTRl5uqxdoHClYU71b9iFTzn3KNeNVM,7478
23
+ openaivec/task/customer_support/response_suggestion.py,sha256=IykZE-BJ_ENhe5frnVl4bQKpArwOuNAITGlBxlu62c0,8306
24
+ openaivec/task/customer_support/urgency_analysis.py,sha256=fdBT0Ud-InGqou-ZuFcVc3EpUNAq5N55_Q9D6D74WlQ,11531
25
+ openaivec/task/nlp/__init__.py,sha256=QoQ0egEK9IEh5hdrE07rZ_KCmC0gy_2FPrWJYRWiipY,512
26
+ openaivec/task/nlp/dependency_parsing.py,sha256=V7pd4_EbBBvdpnFDkfZh08u7kfJ7XJLq_qLkec48yr0,2832
27
+ openaivec/task/nlp/keyword_extraction.py,sha256=e6niCt8XU0EPJLGYOJXQvbfWtl7w9CgfnCE188kecb4,2819
28
+ openaivec/task/nlp/morphological_analysis.py,sha256=qTFFBkFP8CRZU87S59ju5ygXWlEBCtjYlH9Su7czLjs,2416
29
+ openaivec/task/nlp/named_entity_recognition.py,sha256=9BFKYk0PZlyNN8pItGIEFecvZew4K_F5GgY5Ub8xDtM,3052
30
+ openaivec/task/nlp/sentiment_analysis.py,sha256=u-zpqAaQYcr7I3mqMv_CTJXkfxtoLft3qm-qwmqb_p4,3100
31
+ openaivec/task/nlp/translation.py,sha256=kgWj2oN8pUId3vuHTJNx636gB49AGEKXWICA_XJgE_0,6628
32
+ openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
33
+ openaivec/task/table/fillna.py,sha256=zL6m5hGD4kamV7qHETnn__B59wIY540Ks0EzNgUJgdI,6888
34
+ openaivec-0.14.13.dist-info/METADATA,sha256=rB_WJhIVX11WUoA-r2Ryn57QIuTWj0q0JhjPlz6wXv4,28216
35
+ openaivec-0.14.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
+ openaivec-0.14.13.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
+ openaivec-0.14.13.dist-info/RECORD,,
@@ -1,37 +0,0 @@
1
- openaivec/__init__.py,sha256=mXCGNNTjYbmE4CAXGvAs78soxUsoy_mxxnvaCk_CL6Y,361
2
- openaivec/_di.py,sha256=Cl1ZoNBlQsJL1bpzoMDl08uT9pZFVSlqOdLbS3_MwPE,11462
3
- openaivec/_dynamic.py,sha256=7ZaC59w2Edemnao57XeZVO4qmSOA-Kus6TchZC3Dd5o,14821
4
- openaivec/_embeddings.py,sha256=upCjl8m9h1CihP6t7wvIH_vivOAPSgmgooAxIhnUMUw,7449
5
- openaivec/_log.py,sha256=LHNs6AbJzM4weaRARZFroigxR6D148d7WSIMLk1IhbU,1439
6
- openaivec/_model.py,sha256=toS2oBubrJa9jrdYy-87Fb2XivjXUlk_8Zn5gKUAcFI,3345
7
- openaivec/_optimize.py,sha256=3nS8VehbS7iGC1tPDDQh-iAgyKHbVYmMbCRBWM77U_U,3827
8
- openaivec/_prompt.py,sha256=zLv13q47CKV3jnETUyWAIlnjXFSEMs70c8m0yN7_Hek,20820
9
- openaivec/_provider.py,sha256=8z8gPYY5-Z7rzDlj_NC6hR__DUqVAH7VLHJn6LalzRg,6158
10
- openaivec/_proxy.py,sha256=AiGuC1MCFjZCRXCac-pHUI3Np3nf1HIpWY6nC9ZVCFY,29671
11
- openaivec/_responses.py,sha256=lVJRa_Uc7hQJnYJRgumqwBbu6GToZqsLFS6tIAFO1Fc,24014
12
- openaivec/_schema.py,sha256=RKjDPqet1TlReYibah0R0NIvCV1VWN5SZxiaBeV0gCY,15492
13
- openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
14
- openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
15
- openaivec/pandas_ext.py,sha256=fjBW_TU4zsew3j7g7x67t9ESCwZ0fIuxbh9bZdOmRA0,85407
16
- openaivec/spark.py,sha256=V0Gg9b9Q-2ycet33ENAN21aA-GltNj57tWoE2pCZIRQ,32601
17
- openaivec/task/__init__.py,sha256=lrgoc9UIox7XnxZ96dQRl88a-8QfuZRFBHshxctpMB8,6178
18
- openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
19
- openaivec/task/customer_support/customer_sentiment.py,sha256=NHIr9nm2d2Bu1MSpxFsM3_w1UuQrQEwnHrClVbhdCUw,7612
20
- openaivec/task/customer_support/inquiry_classification.py,sha256=NUU_apX6ADi4SyGUbvflGt-v5Ka7heHXlJOHPAeVoGg,9640
21
- openaivec/task/customer_support/inquiry_summary.py,sha256=PDQvF_ZEZ9TnFhLM2yIinP-OKz_PSPeIET48P9UIgzQ,6920
22
- openaivec/task/customer_support/intent_analysis.py,sha256=uWdza2pkqnRJn3JtPWbsTAUDL1Sn-BwH-ZpN2cUxhe8,7504
23
- openaivec/task/customer_support/response_suggestion.py,sha256=Hxt5MDpdfoo5S7_I_eQ302AOIsSCyNBeaDSMMMfPYoQ,8344
24
- openaivec/task/customer_support/urgency_analysis.py,sha256=DRd4pmFnwuiNGBKxxkEkfp5CZZeDppmBUThs5NYOL9g,11569
25
- openaivec/task/nlp/__init__.py,sha256=QoQ0egEK9IEh5hdrE07rZ_KCmC0gy_2FPrWJYRWiipY,512
26
- openaivec/task/nlp/dependency_parsing.py,sha256=MhrHNCqSd-JmlQ21ISYwGYXazNVZGsVuX_v0ZpyI50w,2817
27
- openaivec/task/nlp/keyword_extraction.py,sha256=seFeuk6Z2dmlVBFoDN-tOVgCnR7jq36sTsWySjb_ric,2804
28
- openaivec/task/nlp/morphological_analysis.py,sha256=TcNGA0cYrPczr1ZxflBiokh-qdwMSvRDHq66fP7gi2c,2401
29
- openaivec/task/nlp/named_entity_recognition.py,sha256=jnVfGtf7TDCNNHrLQ5rhMYvmHc8FKXQxEzC5ib6NnVc,3037
30
- openaivec/task/nlp/sentiment_analysis.py,sha256=Np-yY0d4Kr5WEjGjq4tNFHDNarBLajJr8Q2E6K9ms3A,3085
31
- openaivec/task/nlp/translation.py,sha256=VYgiXtr2TL1tbqZkBpyVAy4ahrgd8UO4ZjhIL6xMdkI,6609
32
- openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
33
- openaivec/task/table/fillna.py,sha256=g_CpLnLzK1C5rCiVq15L3X0kywJK6CtSrKRYxQFuhn8,6606
34
- openaivec-0.14.12.dist-info/METADATA,sha256=GC5evUtog4LhK1XhJXfF-jO9DeyDq7l9Ii8KN1sVIBo,28216
35
- openaivec-0.14.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
36
- openaivec-0.14.12.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
37
- openaivec-0.14.12.dist-info/RECORD,,