openaivec 0.14.12__py3-none-any.whl → 0.14.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/_embeddings.py +17 -4
- openaivec/_model.py +7 -12
- openaivec/_prompt.py +3 -6
- openaivec/_responses.py +39 -117
- openaivec/_schema.py +27 -23
- openaivec/pandas_ext.py +355 -343
- openaivec/spark.py +32 -39
- openaivec/task/__init__.py +1 -1
- openaivec/task/customer_support/customer_sentiment.py +4 -9
- openaivec/task/customer_support/inquiry_classification.py +5 -8
- openaivec/task/customer_support/inquiry_summary.py +5 -6
- openaivec/task/customer_support/intent_analysis.py +5 -7
- openaivec/task/customer_support/response_suggestion.py +5 -8
- openaivec/task/customer_support/urgency_analysis.py +5 -8
- openaivec/task/nlp/dependency_parsing.py +1 -2
- openaivec/task/nlp/keyword_extraction.py +1 -2
- openaivec/task/nlp/morphological_analysis.py +1 -2
- openaivec/task/nlp/named_entity_recognition.py +1 -2
- openaivec/task/nlp/sentiment_analysis.py +1 -2
- openaivec/task/nlp/translation.py +1 -1
- openaivec/task/table/fillna.py +8 -3
- {openaivec-0.14.12.dist-info → openaivec-0.14.13.dist-info}/METADATA +1 -1
- openaivec-0.14.13.dist-info/RECORD +37 -0
- openaivec-0.14.12.dist-info/RECORD +0 -37
- {openaivec-0.14.12.dist-info → openaivec-0.14.13.dist-info}/WHEEL +0 -0
- {openaivec-0.14.12.dist-info → openaivec-0.14.13.dist-info}/licenses/LICENSE +0 -0
openaivec/spark.py
CHANGED
|
@@ -134,6 +134,7 @@ import numpy as np
|
|
|
134
134
|
import pandas as pd
|
|
135
135
|
import tiktoken
|
|
136
136
|
from pydantic import BaseModel
|
|
137
|
+
from pyspark import SparkContext
|
|
137
138
|
from pyspark.sql import SparkSession
|
|
138
139
|
from pyspark.sql.pandas.functions import pandas_udf
|
|
139
140
|
from pyspark.sql.types import ArrayType, BooleanType, FloatType, IntegerType, StringType, StructField, StructType
|
|
@@ -180,7 +181,10 @@ def setup(
|
|
|
180
181
|
If provided, registers `EmbeddingsModelName` in the DI container.
|
|
181
182
|
"""
|
|
182
183
|
|
|
183
|
-
|
|
184
|
+
CONTAINER.register(SparkSession, lambda: spark)
|
|
185
|
+
CONTAINER.register(SparkContext, lambda: CONTAINER.resolve(SparkSession).sparkContext)
|
|
186
|
+
|
|
187
|
+
sc = CONTAINER.resolve(SparkContext)
|
|
184
188
|
sc.environment["OPENAI_API_KEY"] = api_key
|
|
185
189
|
|
|
186
190
|
os.environ["OPENAI_API_KEY"] = api_key
|
|
@@ -219,7 +223,10 @@ def setup_azure(
|
|
|
219
223
|
If provided, registers `EmbeddingsModelName` in the DI container.
|
|
220
224
|
"""
|
|
221
225
|
|
|
222
|
-
|
|
226
|
+
CONTAINER.register(SparkSession, lambda: spark)
|
|
227
|
+
CONTAINER.register(SparkContext, lambda: CONTAINER.resolve(SparkSession).sparkContext)
|
|
228
|
+
|
|
229
|
+
sc = CONTAINER.resolve(SparkContext)
|
|
223
230
|
sc.environment["AZURE_OPENAI_API_KEY"] = api_key
|
|
224
231
|
sc.environment["AZURE_OPENAI_BASE_URL"] = base_url
|
|
225
232
|
sc.environment["AZURE_OPENAI_API_VERSION"] = api_version
|
|
@@ -317,8 +324,6 @@ def responses_udf(
|
|
|
317
324
|
response_format: type[ResponseFormat] = str,
|
|
318
325
|
model_name: str = CONTAINER.resolve(ResponsesModelName).value,
|
|
319
326
|
batch_size: int | None = None,
|
|
320
|
-
temperature: float | None = 0.0,
|
|
321
|
-
top_p: float = 1.0,
|
|
322
327
|
max_concurrency: int = 8,
|
|
323
328
|
**api_kwargs,
|
|
324
329
|
) -> UserDefinedFunction:
|
|
@@ -353,17 +358,14 @@ def responses_udf(
|
|
|
353
358
|
Defaults to None (automatic batch size optimization that dynamically
|
|
354
359
|
adjusts based on execution time, targeting 30-60 seconds per batch).
|
|
355
360
|
Set to a positive integer (e.g., 32-128) for fixed batch size.
|
|
356
|
-
temperature (float): Sampling temperature (0.0 to 2.0). Defaults to 0.0.
|
|
357
|
-
top_p (float): Nucleus sampling parameter. Defaults to 1.0.
|
|
358
361
|
max_concurrency (int): Maximum number of concurrent API requests **PER EXECUTOR**.
|
|
359
362
|
Total cluster concurrency = max_concurrency × number_of_executors.
|
|
360
363
|
Higher values increase throughput but may hit OpenAI rate limits.
|
|
361
364
|
Recommended: 4-12 per executor. Defaults to 8.
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
These parameters are applied to all API requests made by the UDF.
|
|
365
|
+
**api_kwargs: Additional OpenAI API parameters (e.g. ``temperature``, ``top_p``,
|
|
366
|
+
``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
|
|
367
|
+
forwarded verbatim to the underlying API calls. These parameters are applied to
|
|
368
|
+
all API requests made by the UDF.
|
|
367
369
|
|
|
368
370
|
Returns:
|
|
369
371
|
UserDefinedFunction: A Spark pandas UDF configured to generate responses asynchronously.
|
|
@@ -399,8 +401,6 @@ def responses_udf(
|
|
|
399
401
|
part.aio.responses_with_cache(
|
|
400
402
|
instructions=instructions,
|
|
401
403
|
response_format=response_format,
|
|
402
|
-
temperature=temperature,
|
|
403
|
-
top_p=top_p,
|
|
404
404
|
cache=cache,
|
|
405
405
|
**api_kwargs,
|
|
406
406
|
)
|
|
@@ -427,8 +427,6 @@ def responses_udf(
|
|
|
427
427
|
part.aio.responses_with_cache(
|
|
428
428
|
instructions=instructions,
|
|
429
429
|
response_format=str,
|
|
430
|
-
temperature=temperature,
|
|
431
|
-
top_p=top_p,
|
|
432
430
|
cache=cache,
|
|
433
431
|
**api_kwargs,
|
|
434
432
|
)
|
|
@@ -460,7 +458,7 @@ def task_udf(
|
|
|
460
458
|
|
|
461
459
|
Args:
|
|
462
460
|
task (PreparedTask): A predefined task configuration containing instructions,
|
|
463
|
-
response format,
|
|
461
|
+
response format, and API parameters.
|
|
464
462
|
model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
465
463
|
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to configured model in DI container.
|
|
466
464
|
batch_size (int | None): Number of rows per async batch request within each partition.
|
|
@@ -474,10 +472,10 @@ def task_udf(
|
|
|
474
472
|
Recommended: 4-12 per executor. Defaults to 8.
|
|
475
473
|
|
|
476
474
|
Additional Keyword Args:
|
|
477
|
-
Arbitrary OpenAI Responses API parameters (e.g. ``
|
|
478
|
-
``seed``, ``max_output_tokens``, etc.)
|
|
479
|
-
|
|
480
|
-
parameters set in the task configuration.
|
|
475
|
+
Arbitrary OpenAI Responses API parameters (e.g. ``temperature``, ``top_p``,
|
|
476
|
+
``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
|
|
477
|
+
are forwarded verbatim to the underlying API calls. These parameters are applied to
|
|
478
|
+
all API requests made by the UDF and override any parameters set in the task configuration.
|
|
481
479
|
|
|
482
480
|
Returns:
|
|
483
481
|
UserDefinedFunction: A Spark pandas UDF configured to execute the specified task
|
|
@@ -498,15 +496,16 @@ def task_udf(
|
|
|
498
496
|
**Automatic Caching**: Duplicate inputs within each partition are cached,
|
|
499
497
|
reducing API calls and costs significantly on datasets with repeated content.
|
|
500
498
|
"""
|
|
499
|
+
# Merge task's api_kwargs with caller's api_kwargs (caller takes precedence)
|
|
500
|
+
merged_kwargs = {**task.api_kwargs, **api_kwargs}
|
|
501
|
+
|
|
501
502
|
return responses_udf(
|
|
502
503
|
instructions=task.instructions,
|
|
503
504
|
response_format=task.response_format,
|
|
504
505
|
model_name=model_name,
|
|
505
506
|
batch_size=batch_size,
|
|
506
|
-
temperature=task.temperature,
|
|
507
|
-
top_p=task.top_p,
|
|
508
507
|
max_concurrency=max_concurrency,
|
|
509
|
-
**
|
|
508
|
+
**merged_kwargs,
|
|
510
509
|
)
|
|
511
510
|
|
|
512
511
|
|
|
@@ -532,15 +531,13 @@ def infer_schema(
|
|
|
532
531
|
InferredSchema: An object containing the inferred schema and response format.
|
|
533
532
|
"""
|
|
534
533
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
spark = SparkSession.builder.getOrCreate()
|
|
534
|
+
spark = CONTAINER.resolve(SparkSession)
|
|
538
535
|
examples: list[str] = (
|
|
539
536
|
spark.table(example_table_name).rdd.map(lambda row: row[example_field_name]).takeSample(False, max_examples)
|
|
540
537
|
)
|
|
541
538
|
|
|
542
539
|
input = SchemaInferenceInput(
|
|
543
|
-
|
|
540
|
+
instructions=instructions,
|
|
544
541
|
examples=examples,
|
|
545
542
|
)
|
|
546
543
|
inferer = CONTAINER.resolve(SchemaInferer)
|
|
@@ -555,8 +552,6 @@ def parse_udf(
|
|
|
555
552
|
max_examples: int = 100,
|
|
556
553
|
model_name: str = CONTAINER.resolve(ResponsesModelName).value,
|
|
557
554
|
batch_size: int | None = None,
|
|
558
|
-
temperature: float | None = 0.0,
|
|
559
|
-
top_p: float = 1.0,
|
|
560
555
|
max_concurrency: int = 8,
|
|
561
556
|
**api_kwargs,
|
|
562
557
|
) -> UserDefinedFunction:
|
|
@@ -586,17 +581,15 @@ def parse_udf(
|
|
|
586
581
|
Defaults to None (automatic batch size optimization that dynamically
|
|
587
582
|
adjusts based on execution time, targeting 30-60 seconds per batch).
|
|
588
583
|
Set to a positive integer (e.g., 32-128) for fixed batch size
|
|
589
|
-
temperature (float | None): Sampling temperature (0.0 to 2.0). Defaults to 0.0.
|
|
590
|
-
top_p (float): Nucleus sampling parameter. Defaults to 1.0.
|
|
591
584
|
max_concurrency (int): Maximum number of concurrent API requests **PER EXECUTOR**.
|
|
592
585
|
Total cluster concurrency = max_concurrency × number_of_executors.
|
|
593
586
|
Higher values increase throughput but may hit OpenAI rate limits.
|
|
594
587
|
Recommended: 4-12 per executor. Defaults to 8.
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
588
|
+
**api_kwargs: Additional OpenAI API parameters (e.g. ``temperature``, ``top_p``,
|
|
589
|
+
``frequency_penalty``, ``presence_penalty``, ``seed``, ``max_output_tokens``, etc.)
|
|
590
|
+
forwarded verbatim to the underlying API calls. These parameters are applied to
|
|
591
|
+
all API requests made by the UDF and override any parameters set in the
|
|
592
|
+
response_format or example data.
|
|
600
593
|
Returns:
|
|
601
594
|
UserDefinedFunction: A Spark pandas UDF configured to parse responses asynchronously.
|
|
602
595
|
Output schema is `StringType` for str response format or a struct derived from
|
|
@@ -623,8 +616,6 @@ def parse_udf(
|
|
|
623
616
|
response_format=schema.model if schema else response_format,
|
|
624
617
|
model_name=model_name,
|
|
625
618
|
batch_size=batch_size,
|
|
626
|
-
temperature=temperature,
|
|
627
|
-
top_p=top_p,
|
|
628
619
|
max_concurrency=max_concurrency,
|
|
629
620
|
**api_kwargs,
|
|
630
621
|
)
|
|
@@ -634,6 +625,7 @@ def embeddings_udf(
|
|
|
634
625
|
model_name: str = CONTAINER.resolve(EmbeddingsModelName).value,
|
|
635
626
|
batch_size: int | None = None,
|
|
636
627
|
max_concurrency: int = 8,
|
|
628
|
+
**api_kwargs,
|
|
637
629
|
) -> UserDefinedFunction:
|
|
638
630
|
"""Create an asynchronous Spark pandas UDF for generating embeddings.
|
|
639
631
|
|
|
@@ -669,6 +661,7 @@ def embeddings_udf(
|
|
|
669
661
|
Total cluster concurrency = max_concurrency × number_of_executors.
|
|
670
662
|
Higher values increase throughput but may hit OpenAI rate limits.
|
|
671
663
|
Recommended: 4-12 per executor. Defaults to 8.
|
|
664
|
+
**api_kwargs: Additional OpenAI API parameters (e.g., dimensions for text-embedding-3 models).
|
|
672
665
|
|
|
673
666
|
Returns:
|
|
674
667
|
UserDefinedFunction: A Spark pandas UDF configured to generate embeddings asynchronously
|
|
@@ -695,7 +688,7 @@ def embeddings_udf(
|
|
|
695
688
|
|
|
696
689
|
try:
|
|
697
690
|
for part in col:
|
|
698
|
-
embeddings: pd.Series = asyncio.run(part.aio.embeddings_with_cache(cache=cache))
|
|
691
|
+
embeddings: pd.Series = asyncio.run(part.aio.embeddings_with_cache(cache=cache, **api_kwargs))
|
|
699
692
|
yield embeddings.map(lambda x: x.tolist())
|
|
700
693
|
finally:
|
|
701
694
|
asyncio.run(cache.clear())
|
openaivec/task/__init__.py
CHANGED
|
@@ -117,7 +117,7 @@ All tasks are built using the `PreparedTask` dataclass:
|
|
|
117
117
|
@dataclass(frozen=True)
|
|
118
118
|
class PreparedTask:
|
|
119
119
|
instructions: str # Detailed prompt for the LLM
|
|
120
|
-
response_format:
|
|
120
|
+
response_format: type[ResponseFormat] # Pydantic model or str for structured/plain output
|
|
121
121
|
temperature: float = 0.0 # Sampling temperature
|
|
122
122
|
top_p: float = 1.0 # Nucleus sampling parameter
|
|
123
123
|
```
|
|
@@ -95,15 +95,12 @@ class CustomerSentiment(BaseModel):
|
|
|
95
95
|
)
|
|
96
96
|
|
|
97
97
|
|
|
98
|
-
def customer_sentiment(
|
|
99
|
-
business_context: str = "general customer support", temperature: float = 0.0, top_p: float = 1.0
|
|
100
|
-
) -> PreparedTask:
|
|
98
|
+
def customer_sentiment(business_context: str = "general customer support", **api_kwargs) -> PreparedTask:
|
|
101
99
|
"""Create a configurable customer sentiment analysis task.
|
|
102
100
|
|
|
103
101
|
Args:
|
|
104
102
|
business_context (str): Business context for sentiment analysis.
|
|
105
|
-
|
|
106
|
-
top_p (float): Nucleus sampling parameter (0.0-1.0).
|
|
103
|
+
**api_kwargs: Additional OpenAI API parameters (temperature, top_p, etc.).
|
|
107
104
|
|
|
108
105
|
Returns:
|
|
109
106
|
PreparedTask configured for customer sentiment analysis.
|
|
@@ -169,10 +166,8 @@ values like "positive" for sentiment.
|
|
|
169
166
|
|
|
170
167
|
Provide comprehensive sentiment analysis with business context and recommended response strategy."""
|
|
171
168
|
|
|
172
|
-
return PreparedTask(
|
|
173
|
-
instructions=instructions, response_format=CustomerSentiment, temperature=temperature, top_p=top_p
|
|
174
|
-
)
|
|
169
|
+
return PreparedTask(instructions=instructions, response_format=CustomerSentiment, api_kwargs=api_kwargs)
|
|
175
170
|
|
|
176
171
|
|
|
177
172
|
# Backward compatibility - default configuration
|
|
178
|
-
CUSTOMER_SENTIMENT = customer_sentiment()
|
|
173
|
+
CUSTOMER_SENTIMENT = customer_sentiment(temperature=0.0, top_p=1.0)
|
|
@@ -119,8 +119,7 @@ def inquiry_classification(
|
|
|
119
119
|
priority_rules: Dict[str, str] | None = None,
|
|
120
120
|
business_context: str = "general customer support",
|
|
121
121
|
custom_keywords: Dict[str, list[str]] | None = None,
|
|
122
|
-
|
|
123
|
-
top_p: float = 1.0,
|
|
122
|
+
**api_kwargs,
|
|
124
123
|
) -> PreparedTask:
|
|
125
124
|
"""Create a configurable inquiry classification task.
|
|
126
125
|
|
|
@@ -133,8 +132,8 @@ def inquiry_classification(
|
|
|
133
132
|
Default uses standard priority indicators.
|
|
134
133
|
business_context (str): Description of the business context to help with classification.
|
|
135
134
|
custom_keywords (dict[str, list[str]] | None): Dictionary mapping categories to relevant keywords.
|
|
136
|
-
|
|
137
|
-
|
|
135
|
+
**api_kwargs: Additional keyword arguments to pass to the OpenAI API,
|
|
136
|
+
such as temperature, top_p, etc.
|
|
138
137
|
|
|
139
138
|
Returns:
|
|
140
139
|
PreparedTask configured for inquiry classification.
|
|
@@ -254,10 +253,8 @@ language where appropriate, but priority must use English values like "high".
|
|
|
254
253
|
|
|
255
254
|
Provide accurate classification with detailed reasoning."""
|
|
256
255
|
|
|
257
|
-
return PreparedTask(
|
|
258
|
-
instructions=instructions, response_format=InquiryClassification, temperature=temperature, top_p=top_p
|
|
259
|
-
)
|
|
256
|
+
return PreparedTask(instructions=instructions, response_format=InquiryClassification, api_kwargs=api_kwargs)
|
|
260
257
|
|
|
261
258
|
|
|
262
259
|
# Backward compatibility - default configuration
|
|
263
|
-
INQUIRY_CLASSIFICATION = inquiry_classification()
|
|
260
|
+
INQUIRY_CLASSIFICATION = inquiry_classification(temperature=0.0, top_p=1.0)
|
|
@@ -87,16 +87,15 @@ class InquirySummary(BaseModel):
|
|
|
87
87
|
def inquiry_summary(
|
|
88
88
|
summary_length: str = "concise",
|
|
89
89
|
business_context: str = "general customer support",
|
|
90
|
-
|
|
91
|
-
top_p: float = 1.0,
|
|
90
|
+
**api_kwargs,
|
|
92
91
|
) -> PreparedTask:
|
|
93
92
|
"""Create a configurable inquiry summary task.
|
|
94
93
|
|
|
95
94
|
Args:
|
|
96
95
|
summary_length (str): Length of summary (concise, detailed, bullet_points).
|
|
97
96
|
business_context (str): Business context for summary.
|
|
98
|
-
|
|
99
|
-
|
|
97
|
+
**api_kwargs: Additional keyword arguments to pass to the OpenAI API,
|
|
98
|
+
such as temperature, top_p, etc.
|
|
100
99
|
|
|
101
100
|
Returns:
|
|
102
101
|
PreparedTask configured for inquiry summarization.
|
|
@@ -163,8 +162,8 @@ input is in German, provide all summary content in German, but use English value
|
|
|
163
162
|
|
|
164
163
|
Provide accurate, actionable summary that enables efficient support resolution."""
|
|
165
164
|
|
|
166
|
-
return PreparedTask(instructions=instructions, response_format=InquirySummary,
|
|
165
|
+
return PreparedTask(instructions=instructions, response_format=InquirySummary, api_kwargs=api_kwargs)
|
|
167
166
|
|
|
168
167
|
|
|
169
168
|
# Backward compatibility - default configuration
|
|
170
|
-
INQUIRY_SUMMARY = inquiry_summary()
|
|
169
|
+
INQUIRY_SUMMARY = inquiry_summary(temperature=0.0, top_p=1.0)
|
|
@@ -100,15 +100,13 @@ class IntentAnalysis(BaseModel):
|
|
|
100
100
|
)
|
|
101
101
|
|
|
102
102
|
|
|
103
|
-
def intent_analysis(
|
|
104
|
-
business_context: str = "general customer support", temperature: float = 0.0, top_p: float = 1.0
|
|
105
|
-
) -> PreparedTask:
|
|
103
|
+
def intent_analysis(business_context: str = "general customer support", **api_kwargs) -> PreparedTask:
|
|
106
104
|
"""Create a configurable intent analysis task.
|
|
107
105
|
|
|
108
106
|
Args:
|
|
109
107
|
business_context (str): Business context for intent analysis.
|
|
110
|
-
|
|
111
|
-
|
|
108
|
+
**api_kwargs: Additional keyword arguments to pass to the OpenAI API,
|
|
109
|
+
such as temperature, top_p, etc.
|
|
112
110
|
|
|
113
111
|
Returns:
|
|
114
112
|
PreparedTask configured for intent analysis.
|
|
@@ -171,8 +169,8 @@ next_steps, and reasoning in Japanese, but use English values like "get_help" fo
|
|
|
171
169
|
|
|
172
170
|
Provide comprehensive intent analysis with actionable recommendations."""
|
|
173
171
|
|
|
174
|
-
return PreparedTask(instructions=instructions, response_format=IntentAnalysis,
|
|
172
|
+
return PreparedTask(instructions=instructions, response_format=IntentAnalysis, api_kwargs=api_kwargs)
|
|
175
173
|
|
|
176
174
|
|
|
177
175
|
# Backward compatibility - default configuration
|
|
178
|
-
INTENT_ANALYSIS = intent_analysis()
|
|
176
|
+
INTENT_ANALYSIS = intent_analysis(temperature=0.0, top_p=1.0)
|
|
@@ -92,8 +92,7 @@ def response_suggestion(
|
|
|
92
92
|
response_style: str = "professional",
|
|
93
93
|
company_name: str = "our company",
|
|
94
94
|
business_context: str = "general customer support",
|
|
95
|
-
|
|
96
|
-
top_p: float = 1.0,
|
|
95
|
+
**api_kwargs,
|
|
97
96
|
) -> PreparedTask:
|
|
98
97
|
"""Create a configurable response suggestion task.
|
|
99
98
|
|
|
@@ -101,8 +100,8 @@ def response_suggestion(
|
|
|
101
100
|
response_style (str): Style of response (professional, friendly, empathetic, formal).
|
|
102
101
|
company_name (str): Name of the company for personalization.
|
|
103
102
|
business_context (str): Business context for responses.
|
|
104
|
-
|
|
105
|
-
|
|
103
|
+
**api_kwargs: Additional keyword arguments to pass to the OpenAI API,
|
|
104
|
+
such as temperature, top_p, etc.
|
|
106
105
|
|
|
107
106
|
Returns:
|
|
108
107
|
PreparedTask configured for response suggestions.
|
|
@@ -190,10 +189,8 @@ but use English values like "empathetic" for tone.
|
|
|
190
189
|
Generate helpful, professional response that moves toward resolution while maintaining
|
|
191
190
|
positive customer relationship."""
|
|
192
191
|
|
|
193
|
-
return PreparedTask(
|
|
194
|
-
instructions=instructions, response_format=ResponseSuggestion, temperature=temperature, top_p=top_p
|
|
195
|
-
)
|
|
192
|
+
return PreparedTask(instructions=instructions, response_format=ResponseSuggestion, api_kwargs=api_kwargs)
|
|
196
193
|
|
|
197
194
|
|
|
198
195
|
# Backward compatibility - default configuration
|
|
199
|
-
RESPONSE_SUGGESTION = response_suggestion()
|
|
196
|
+
RESPONSE_SUGGESTION = response_suggestion(temperature=0.0, top_p=1.0)
|
|
@@ -135,8 +135,7 @@ def urgency_analysis(
|
|
|
135
135
|
business_context: str = "general customer support",
|
|
136
136
|
business_hours: str = "24/7 support",
|
|
137
137
|
sla_rules: Dict[str, str] | None = None,
|
|
138
|
-
|
|
139
|
-
top_p: float = 1.0,
|
|
138
|
+
**api_kwargs,
|
|
140
139
|
) -> PreparedTask:
|
|
141
140
|
"""Create a configurable urgency analysis task.
|
|
142
141
|
|
|
@@ -149,8 +148,8 @@ def urgency_analysis(
|
|
|
149
148
|
business_context (str): Description of the business context.
|
|
150
149
|
business_hours (str): Description of business hours for response time calculation.
|
|
151
150
|
sla_rules (dict[str, str] | None): Dictionary mapping customer tiers to SLA requirements.
|
|
152
|
-
|
|
153
|
-
|
|
151
|
+
**api_kwargs: Additional keyword arguments to pass to the OpenAI API,
|
|
152
|
+
such as temperature, top_p, etc.
|
|
154
153
|
|
|
155
154
|
Returns:
|
|
156
155
|
PreparedTask configured for urgency analysis.
|
|
@@ -287,10 +286,8 @@ urgency_level.
|
|
|
287
286
|
|
|
288
287
|
Provide detailed analysis with clear reasoning for urgency level and response time recommendations."""
|
|
289
288
|
|
|
290
|
-
return PreparedTask(
|
|
291
|
-
instructions=instructions, response_format=UrgencyAnalysis, temperature=temperature, top_p=top_p
|
|
292
|
-
)
|
|
289
|
+
return PreparedTask(instructions=instructions, response_format=UrgencyAnalysis, api_kwargs=api_kwargs)
|
|
293
290
|
|
|
294
291
|
|
|
295
292
|
# Backward compatibility - default configuration
|
|
296
|
-
URGENCY_ANALYSIS = urgency_analysis()
|
|
293
|
+
URGENCY_ANALYSIS = urgency_analysis(temperature=0.0, top_p=1.0)
|
|
@@ -75,6 +75,5 @@ DEPENDENCY_PARSING = PreparedTask(
|
|
|
75
75
|
"relations between words, determine the root word, and provide a tree representation of the "
|
|
76
76
|
"syntactic structure.",
|
|
77
77
|
response_format=DependencyParsing,
|
|
78
|
-
temperature
|
|
79
|
-
top_p=1.0,
|
|
78
|
+
api_kwargs={"temperature": 0.0, "top_p": 1.0},
|
|
80
79
|
)
|
|
@@ -75,6 +75,5 @@ KEYWORD_EXTRACTION = PreparedTask(
|
|
|
75
75
|
instructions="Extract important keywords and phrases from the following text. Rank them "
|
|
76
76
|
"by importance, provide frequency counts, identify main topics, and generate a brief summary.",
|
|
77
77
|
response_format=KeywordExtraction,
|
|
78
|
-
temperature
|
|
79
|
-
top_p=1.0,
|
|
78
|
+
api_kwargs={"temperature": 0.0, "top_p": 1.0},
|
|
80
79
|
)
|
|
@@ -70,6 +70,5 @@ MORPHOLOGICAL_ANALYSIS = PreparedTask(
|
|
|
70
70
|
"identify part-of-speech tags, provide lemmatized forms, and extract morphological features "
|
|
71
71
|
"for each token.",
|
|
72
72
|
response_format=MorphologicalAnalysis,
|
|
73
|
-
temperature
|
|
74
|
-
top_p=1.0,
|
|
73
|
+
api_kwargs={"temperature": 0.0, "top_p": 1.0},
|
|
75
74
|
)
|
|
@@ -78,6 +78,5 @@ NAMED_ENTITY_RECOGNITION = PreparedTask(
|
|
|
78
78
|
"organizations, locations, dates, money, percentages, and other miscellaneous entities "
|
|
79
79
|
"with their positions and confidence scores.",
|
|
80
80
|
response_format=NamedEntityRecognition,
|
|
81
|
-
temperature
|
|
82
|
-
top_p=1.0,
|
|
81
|
+
api_kwargs={"temperature": 0.0, "top_p": 1.0},
|
|
83
82
|
)
|
|
@@ -78,6 +78,5 @@ SENTIMENT_ANALYSIS = PreparedTask(
|
|
|
78
78
|
"English values specified (positive/negative/neutral for sentiment, and "
|
|
79
79
|
"joy/sadness/anger/fear/surprise/disgust for emotions).",
|
|
80
80
|
response_format=SentimentAnalysis,
|
|
81
|
-
temperature
|
|
82
|
-
top_p=1.0,
|
|
81
|
+
api_kwargs={"temperature": 0.0, "top_p": 1.0},
|
|
83
82
|
)
|
|
@@ -157,5 +157,5 @@ class TranslatedString(BaseModel):
|
|
|
157
157
|
instructions = "Translate the following text into multiple languages. "
|
|
158
158
|
|
|
159
159
|
MULTILINGUAL_TRANSLATION = PreparedTask(
|
|
160
|
-
instructions=instructions, response_format=TranslatedString, temperature
|
|
160
|
+
instructions=instructions, response_format=TranslatedString, api_kwargs={"temperature": 0.0, "top_p": 1.0}
|
|
161
161
|
)
|
openaivec/task/table/fillna.py
CHANGED
|
@@ -125,7 +125,7 @@ class FillNaResponse(BaseModel):
|
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -> PreparedTask:
|
|
128
|
+
def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500, **api_kwargs) -> PreparedTask:
|
|
129
129
|
"""Create a prepared task for filling missing values in a DataFrame column.
|
|
130
130
|
|
|
131
131
|
Analyzes the provided DataFrame to understand data patterns and creates
|
|
@@ -141,12 +141,14 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
|
|
|
141
141
|
max_examples (int): Maximum number of example rows to use for few-shot
|
|
142
142
|
learning. Defaults to 500. Higher values provide more context
|
|
143
143
|
but increase token usage and processing time.
|
|
144
|
+
**api_kwargs: Additional keyword arguments to pass to the OpenAI API,
|
|
145
|
+
such as temperature, top_p, etc.
|
|
144
146
|
|
|
145
147
|
Returns:
|
|
146
148
|
PreparedTask configured for missing value imputation with:
|
|
147
149
|
- Instructions based on DataFrame patterns
|
|
148
150
|
- FillNaResponse format for structured output
|
|
149
|
-
-
|
|
151
|
+
- Default deterministic settings (temperature=0.0, top_p=1.0)
|
|
150
152
|
|
|
151
153
|
Raises:
|
|
152
154
|
ValueError: If target_column_name doesn't exist in DataFrame,
|
|
@@ -180,4 +182,7 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
|
|
|
180
182
|
if df[target_column_name].notna().sum() == 0:
|
|
181
183
|
raise ValueError(f"Column '{target_column_name}' contains no non-null values for training examples.")
|
|
182
184
|
instructions = get_instructions(df, target_column_name, max_examples)
|
|
183
|
-
|
|
185
|
+
# Set default values for deterministic results if not provided
|
|
186
|
+
if not api_kwargs:
|
|
187
|
+
api_kwargs = {"temperature": 0.0, "top_p": 1.0}
|
|
188
|
+
return PreparedTask(instructions=instructions, response_format=FillNaResponse, api_kwargs=api_kwargs)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
openaivec/__init__.py,sha256=mXCGNNTjYbmE4CAXGvAs78soxUsoy_mxxnvaCk_CL6Y,361
|
|
2
|
+
openaivec/_di.py,sha256=Cl1ZoNBlQsJL1bpzoMDl08uT9pZFVSlqOdLbS3_MwPE,11462
|
|
3
|
+
openaivec/_dynamic.py,sha256=7ZaC59w2Edemnao57XeZVO4qmSOA-Kus6TchZC3Dd5o,14821
|
|
4
|
+
openaivec/_embeddings.py,sha256=nirLqOu69fTB7aSCYhbbRbwAA6ggwEYJiQoPDsHqAqQ,8200
|
|
5
|
+
openaivec/_log.py,sha256=LHNs6AbJzM4weaRARZFroigxR6D148d7WSIMLk1IhbU,1439
|
|
6
|
+
openaivec/_model.py,sha256=71oiENUKwpY58ilj1LE7fDOAhs7PUSiZRiUHKUIuu7Y,3235
|
|
7
|
+
openaivec/_optimize.py,sha256=3nS8VehbS7iGC1tPDDQh-iAgyKHbVYmMbCRBWM77U_U,3827
|
|
8
|
+
openaivec/_prompt.py,sha256=NWE7jZKYphkD856haynJLmRadPugJ68emT42pd7Ciso,20633
|
|
9
|
+
openaivec/_provider.py,sha256=8z8gPYY5-Z7rzDlj_NC6hR__DUqVAH7VLHJn6LalzRg,6158
|
|
10
|
+
openaivec/_proxy.py,sha256=AiGuC1MCFjZCRXCac-pHUI3Np3nf1HIpWY6nC9ZVCFY,29671
|
|
11
|
+
openaivec/_responses.py,sha256=qBrYv4qblDIs5dRvj9t96r8UfAJmy4ZvtAe6csNZ7oM,20412
|
|
12
|
+
openaivec/_schema.py,sha256=iOeR5J_ihZRDZtzmqvOK1ZtInKcx4OnoR38DB3VmmQw,15666
|
|
13
|
+
openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
|
|
14
|
+
openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
|
|
15
|
+
openaivec/pandas_ext.py,sha256=r2jpFqDnWcQYK3pMv5hCtOStOMltccDyLkpprLmIOls,85715
|
|
16
|
+
openaivec/spark.py,sha256=zaEivVOe3ukG8coa9JEUyISQ1YcMqCvAbhaarvn2SOM,32507
|
|
17
|
+
openaivec/task/__init__.py,sha256=RkYIKrcE83M_9Um9cSMkeGzL9kPRAovajfRvr31YxLE,6178
|
|
18
|
+
openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
|
|
19
|
+
openaivec/task/customer_support/customer_sentiment.py,sha256=d8spZUtImjePK0xWGvIW98ghbdyOZ0KEZmaUpG8QB7M,7532
|
|
20
|
+
openaivec/task/customer_support/inquiry_classification.py,sha256=NKz1oTm06eU6W-plHe3T3o20lCk6M2NemVXZ4Y_IozU,9602
|
|
21
|
+
openaivec/task/customer_support/inquiry_summary.py,sha256=8X1J8lZwlgX6s02cs86-K0moZ5gTrX7E7WEKiY2vpiQ,6896
|
|
22
|
+
openaivec/task/customer_support/intent_analysis.py,sha256=Jnokzi0wTlHpuTRl5uqxdoHClYU71b9iFTzn3KNeNVM,7478
|
|
23
|
+
openaivec/task/customer_support/response_suggestion.py,sha256=IykZE-BJ_ENhe5frnVl4bQKpArwOuNAITGlBxlu62c0,8306
|
|
24
|
+
openaivec/task/customer_support/urgency_analysis.py,sha256=fdBT0Ud-InGqou-ZuFcVc3EpUNAq5N55_Q9D6D74WlQ,11531
|
|
25
|
+
openaivec/task/nlp/__init__.py,sha256=QoQ0egEK9IEh5hdrE07rZ_KCmC0gy_2FPrWJYRWiipY,512
|
|
26
|
+
openaivec/task/nlp/dependency_parsing.py,sha256=V7pd4_EbBBvdpnFDkfZh08u7kfJ7XJLq_qLkec48yr0,2832
|
|
27
|
+
openaivec/task/nlp/keyword_extraction.py,sha256=e6niCt8XU0EPJLGYOJXQvbfWtl7w9CgfnCE188kecb4,2819
|
|
28
|
+
openaivec/task/nlp/morphological_analysis.py,sha256=qTFFBkFP8CRZU87S59ju5ygXWlEBCtjYlH9Su7czLjs,2416
|
|
29
|
+
openaivec/task/nlp/named_entity_recognition.py,sha256=9BFKYk0PZlyNN8pItGIEFecvZew4K_F5GgY5Ub8xDtM,3052
|
|
30
|
+
openaivec/task/nlp/sentiment_analysis.py,sha256=u-zpqAaQYcr7I3mqMv_CTJXkfxtoLft3qm-qwmqb_p4,3100
|
|
31
|
+
openaivec/task/nlp/translation.py,sha256=kgWj2oN8pUId3vuHTJNx636gB49AGEKXWICA_XJgE_0,6628
|
|
32
|
+
openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
|
|
33
|
+
openaivec/task/table/fillna.py,sha256=zL6m5hGD4kamV7qHETnn__B59wIY540Ks0EzNgUJgdI,6888
|
|
34
|
+
openaivec-0.14.13.dist-info/METADATA,sha256=rB_WJhIVX11WUoA-r2Ryn57QIuTWj0q0JhjPlz6wXv4,28216
|
|
35
|
+
openaivec-0.14.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
36
|
+
openaivec-0.14.13.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
37
|
+
openaivec-0.14.13.dist-info/RECORD,,
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
openaivec/__init__.py,sha256=mXCGNNTjYbmE4CAXGvAs78soxUsoy_mxxnvaCk_CL6Y,361
|
|
2
|
-
openaivec/_di.py,sha256=Cl1ZoNBlQsJL1bpzoMDl08uT9pZFVSlqOdLbS3_MwPE,11462
|
|
3
|
-
openaivec/_dynamic.py,sha256=7ZaC59w2Edemnao57XeZVO4qmSOA-Kus6TchZC3Dd5o,14821
|
|
4
|
-
openaivec/_embeddings.py,sha256=upCjl8m9h1CihP6t7wvIH_vivOAPSgmgooAxIhnUMUw,7449
|
|
5
|
-
openaivec/_log.py,sha256=LHNs6AbJzM4weaRARZFroigxR6D148d7WSIMLk1IhbU,1439
|
|
6
|
-
openaivec/_model.py,sha256=toS2oBubrJa9jrdYy-87Fb2XivjXUlk_8Zn5gKUAcFI,3345
|
|
7
|
-
openaivec/_optimize.py,sha256=3nS8VehbS7iGC1tPDDQh-iAgyKHbVYmMbCRBWM77U_U,3827
|
|
8
|
-
openaivec/_prompt.py,sha256=zLv13q47CKV3jnETUyWAIlnjXFSEMs70c8m0yN7_Hek,20820
|
|
9
|
-
openaivec/_provider.py,sha256=8z8gPYY5-Z7rzDlj_NC6hR__DUqVAH7VLHJn6LalzRg,6158
|
|
10
|
-
openaivec/_proxy.py,sha256=AiGuC1MCFjZCRXCac-pHUI3Np3nf1HIpWY6nC9ZVCFY,29671
|
|
11
|
-
openaivec/_responses.py,sha256=lVJRa_Uc7hQJnYJRgumqwBbu6GToZqsLFS6tIAFO1Fc,24014
|
|
12
|
-
openaivec/_schema.py,sha256=RKjDPqet1TlReYibah0R0NIvCV1VWN5SZxiaBeV0gCY,15492
|
|
13
|
-
openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
|
|
14
|
-
openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
|
|
15
|
-
openaivec/pandas_ext.py,sha256=fjBW_TU4zsew3j7g7x67t9ESCwZ0fIuxbh9bZdOmRA0,85407
|
|
16
|
-
openaivec/spark.py,sha256=V0Gg9b9Q-2ycet33ENAN21aA-GltNj57tWoE2pCZIRQ,32601
|
|
17
|
-
openaivec/task/__init__.py,sha256=lrgoc9UIox7XnxZ96dQRl88a-8QfuZRFBHshxctpMB8,6178
|
|
18
|
-
openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
|
|
19
|
-
openaivec/task/customer_support/customer_sentiment.py,sha256=NHIr9nm2d2Bu1MSpxFsM3_w1UuQrQEwnHrClVbhdCUw,7612
|
|
20
|
-
openaivec/task/customer_support/inquiry_classification.py,sha256=NUU_apX6ADi4SyGUbvflGt-v5Ka7heHXlJOHPAeVoGg,9640
|
|
21
|
-
openaivec/task/customer_support/inquiry_summary.py,sha256=PDQvF_ZEZ9TnFhLM2yIinP-OKz_PSPeIET48P9UIgzQ,6920
|
|
22
|
-
openaivec/task/customer_support/intent_analysis.py,sha256=uWdza2pkqnRJn3JtPWbsTAUDL1Sn-BwH-ZpN2cUxhe8,7504
|
|
23
|
-
openaivec/task/customer_support/response_suggestion.py,sha256=Hxt5MDpdfoo5S7_I_eQ302AOIsSCyNBeaDSMMMfPYoQ,8344
|
|
24
|
-
openaivec/task/customer_support/urgency_analysis.py,sha256=DRd4pmFnwuiNGBKxxkEkfp5CZZeDppmBUThs5NYOL9g,11569
|
|
25
|
-
openaivec/task/nlp/__init__.py,sha256=QoQ0egEK9IEh5hdrE07rZ_KCmC0gy_2FPrWJYRWiipY,512
|
|
26
|
-
openaivec/task/nlp/dependency_parsing.py,sha256=MhrHNCqSd-JmlQ21ISYwGYXazNVZGsVuX_v0ZpyI50w,2817
|
|
27
|
-
openaivec/task/nlp/keyword_extraction.py,sha256=seFeuk6Z2dmlVBFoDN-tOVgCnR7jq36sTsWySjb_ric,2804
|
|
28
|
-
openaivec/task/nlp/morphological_analysis.py,sha256=TcNGA0cYrPczr1ZxflBiokh-qdwMSvRDHq66fP7gi2c,2401
|
|
29
|
-
openaivec/task/nlp/named_entity_recognition.py,sha256=jnVfGtf7TDCNNHrLQ5rhMYvmHc8FKXQxEzC5ib6NnVc,3037
|
|
30
|
-
openaivec/task/nlp/sentiment_analysis.py,sha256=Np-yY0d4Kr5WEjGjq4tNFHDNarBLajJr8Q2E6K9ms3A,3085
|
|
31
|
-
openaivec/task/nlp/translation.py,sha256=VYgiXtr2TL1tbqZkBpyVAy4ahrgd8UO4ZjhIL6xMdkI,6609
|
|
32
|
-
openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
|
|
33
|
-
openaivec/task/table/fillna.py,sha256=g_CpLnLzK1C5rCiVq15L3X0kywJK6CtSrKRYxQFuhn8,6606
|
|
34
|
-
openaivec-0.14.12.dist-info/METADATA,sha256=GC5evUtog4LhK1XhJXfF-jO9DeyDq7l9Ii8KN1sVIBo,28216
|
|
35
|
-
openaivec-0.14.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
36
|
-
openaivec-0.14.12.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
37
|
-
openaivec-0.14.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|