openaivec 0.14.6__py3-none-any.whl → 0.14.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/_di.py +10 -9
- openaivec/_embeddings.py +12 -13
- openaivec/_log.py +1 -1
- openaivec/_model.py +3 -3
- openaivec/_optimize.py +3 -4
- openaivec/_prompt.py +4 -5
- openaivec/_proxy.py +34 -35
- openaivec/_responses.py +29 -29
- openaivec/_schema.py +80 -20
- openaivec/_serialize.py +19 -15
- openaivec/_util.py +9 -8
- openaivec/pandas_ext.py +20 -19
- openaivec/spark.py +11 -10
- openaivec/task/customer_support/customer_sentiment.py +2 -2
- openaivec/task/customer_support/inquiry_classification.py +8 -8
- openaivec/task/customer_support/inquiry_summary.py +4 -4
- openaivec/task/customer_support/intent_analysis.py +5 -5
- openaivec/task/customer_support/response_suggestion.py +4 -4
- openaivec/task/customer_support/urgency_analysis.py +9 -9
- openaivec/task/nlp/dependency_parsing.py +2 -4
- openaivec/task/nlp/keyword_extraction.py +3 -5
- openaivec/task/nlp/morphological_analysis.py +4 -6
- openaivec/task/nlp/named_entity_recognition.py +7 -9
- openaivec/task/nlp/sentiment_analysis.py +3 -3
- openaivec/task/nlp/translation.py +1 -2
- openaivec/task/table/fillna.py +2 -3
- {openaivec-0.14.6.dist-info → openaivec-0.14.8.dist-info}/METADATA +1 -1
- openaivec-0.14.8.dist-info/RECORD +36 -0
- openaivec-0.14.6.dist-info/RECORD +0 -36
- {openaivec-0.14.6.dist-info → openaivec-0.14.8.dist-info}/WHEEL +0 -0
- {openaivec-0.14.6.dist-info → openaivec-0.14.8.dist-info}/licenses/LICENSE +0 -0
openaivec/pandas_ext.py
CHANGED
|
@@ -42,7 +42,8 @@ to easily interact with OpenAI APIs for tasks like generating responses or embed
|
|
|
42
42
|
import inspect
|
|
43
43
|
import json
|
|
44
44
|
import logging
|
|
45
|
-
from
|
|
45
|
+
from collections.abc import Awaitable, Callable
|
|
46
|
+
from typing import TypeVar
|
|
46
47
|
|
|
47
48
|
import numpy as np
|
|
48
49
|
import pandas as pd
|
|
@@ -179,7 +180,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
179
180
|
self,
|
|
180
181
|
instructions: str,
|
|
181
182
|
cache: BatchingMapProxy[str, ResponseFormat],
|
|
182
|
-
response_format:
|
|
183
|
+
response_format: type[ResponseFormat] = str,
|
|
183
184
|
temperature: float | None = 0.0,
|
|
184
185
|
top_p: float = 1.0,
|
|
185
186
|
**api_kwargs,
|
|
@@ -193,7 +194,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
193
194
|
instructions (str): System prompt prepended to every user message.
|
|
194
195
|
cache (BatchingMapProxy[str, ResponseFormat]): Explicit cache instance for
|
|
195
196
|
batching and deduplication control.
|
|
196
|
-
response_format (
|
|
197
|
+
response_format (type[ResponseFormat], optional): Pydantic model or built-in
|
|
197
198
|
type the assistant should return. Defaults to ``str``.
|
|
198
199
|
temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
|
|
199
200
|
top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
|
|
@@ -221,7 +222,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
221
222
|
def responses(
|
|
222
223
|
self,
|
|
223
224
|
instructions: str,
|
|
224
|
-
response_format:
|
|
225
|
+
response_format: type[ResponseFormat] = str,
|
|
225
226
|
batch_size: int | None = None,
|
|
226
227
|
temperature: float | None = 0.0,
|
|
227
228
|
top_p: float = 1.0,
|
|
@@ -247,7 +248,7 @@ class OpenAIVecSeriesAccessor:
|
|
|
247
248
|
|
|
248
249
|
Args:
|
|
249
250
|
instructions (str): System prompt prepended to every user message.
|
|
250
|
-
response_format (
|
|
251
|
+
response_format (type[ResponseFormat], optional): Pydantic model or built‑in
|
|
251
252
|
type the assistant should return. Defaults to ``str``.
|
|
252
253
|
batch_size (int | None, optional): Number of prompts grouped into a single
|
|
253
254
|
request. Defaults to ``None`` (automatic batch size optimization
|
|
@@ -633,7 +634,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
633
634
|
self,
|
|
634
635
|
instructions: str,
|
|
635
636
|
cache: BatchingMapProxy[str, ResponseFormat],
|
|
636
|
-
response_format:
|
|
637
|
+
response_format: type[ResponseFormat] = str,
|
|
637
638
|
temperature: float | None = 0.0,
|
|
638
639
|
top_p: float = 1.0,
|
|
639
640
|
**api_kwargs,
|
|
@@ -667,7 +668,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
667
668
|
cache (BatchingMapProxy[str, ResponseFormat]): Pre-configured cache
|
|
668
669
|
instance for managing API call batching and deduplication.
|
|
669
670
|
Set cache.batch_size=None to enable automatic batch size optimization.
|
|
670
|
-
response_format (
|
|
671
|
+
response_format (type[ResponseFormat], optional): Desired Python type of the
|
|
671
672
|
responses. Defaults to ``str``.
|
|
672
673
|
temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
|
|
673
674
|
top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
|
|
@@ -687,7 +688,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
687
688
|
def responses(
|
|
688
689
|
self,
|
|
689
690
|
instructions: str,
|
|
690
|
-
response_format:
|
|
691
|
+
response_format: type[ResponseFormat] = str,
|
|
691
692
|
batch_size: int | None = None,
|
|
692
693
|
temperature: float | None = 0.0,
|
|
693
694
|
top_p: float = 1.0,
|
|
@@ -717,7 +718,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
717
718
|
|
|
718
719
|
Args:
|
|
719
720
|
instructions (str): System prompt for the assistant.
|
|
720
|
-
response_format (
|
|
721
|
+
response_format (type[ResponseFormat], optional): Desired Python type of the
|
|
721
722
|
responses. Defaults to ``str``.
|
|
722
723
|
batch_size (int | None, optional): Number of requests sent in one batch.
|
|
723
724
|
Defaults to ``None`` (automatic batch size optimization
|
|
@@ -969,7 +970,7 @@ class OpenAIVecDataFrameAccessor:
|
|
|
969
970
|
if missing_rows.empty:
|
|
970
971
|
return self._obj
|
|
971
972
|
|
|
972
|
-
filled_values:
|
|
973
|
+
filled_values: list[FillNaResponse] = missing_rows.ai.task(
|
|
973
974
|
task=task, batch_size=batch_size, show_progress=show_progress, **api_kwargs
|
|
974
975
|
)
|
|
975
976
|
|
|
@@ -1123,7 +1124,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1123
1124
|
self,
|
|
1124
1125
|
instructions: str,
|
|
1125
1126
|
cache: AsyncBatchingMapProxy[str, ResponseFormat],
|
|
1126
|
-
response_format:
|
|
1127
|
+
response_format: type[ResponseFormat] = str,
|
|
1127
1128
|
temperature: float | None = 0.0,
|
|
1128
1129
|
top_p: float = 1.0,
|
|
1129
1130
|
**api_kwargs,
|
|
@@ -1150,7 +1151,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1150
1151
|
cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
|
|
1151
1152
|
instance for managing API call batching and deduplication.
|
|
1152
1153
|
Set cache.batch_size=None to enable automatic batch size optimization.
|
|
1153
|
-
response_format (
|
|
1154
|
+
response_format (type[ResponseFormat], optional): Pydantic model or built‑in
|
|
1154
1155
|
type the assistant should return. Defaults to ``str``.
|
|
1155
1156
|
temperature (float | None, optional): Sampling temperature. ``None`` omits the
|
|
1156
1157
|
parameter (recommended for reasoning models). Defaults to ``0.0``.
|
|
@@ -1181,7 +1182,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1181
1182
|
async def responses(
|
|
1182
1183
|
self,
|
|
1183
1184
|
instructions: str,
|
|
1184
|
-
response_format:
|
|
1185
|
+
response_format: type[ResponseFormat] = str,
|
|
1185
1186
|
batch_size: int | None = None,
|
|
1186
1187
|
temperature: float | None = 0.0,
|
|
1187
1188
|
top_p: float = 1.0,
|
|
@@ -1209,7 +1210,7 @@ class AsyncOpenAIVecSeriesAccessor:
|
|
|
1209
1210
|
|
|
1210
1211
|
Args:
|
|
1211
1212
|
instructions (str): System prompt prepended to every user message.
|
|
1212
|
-
response_format (
|
|
1213
|
+
response_format (type[ResponseFormat], optional): Pydantic model or built‑in
|
|
1213
1214
|
type the assistant should return. Defaults to ``str``.
|
|
1214
1215
|
batch_size (int | None, optional): Number of prompts grouped into a single
|
|
1215
1216
|
request. Defaults to ``None`` (automatic batch size optimization
|
|
@@ -1558,7 +1559,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1558
1559
|
self,
|
|
1559
1560
|
instructions: str,
|
|
1560
1561
|
cache: AsyncBatchingMapProxy[str, ResponseFormat],
|
|
1561
|
-
response_format:
|
|
1562
|
+
response_format: type[ResponseFormat] = str,
|
|
1562
1563
|
temperature: float | None = 0.0,
|
|
1563
1564
|
top_p: float = 1.0,
|
|
1564
1565
|
**api_kwargs,
|
|
@@ -1594,7 +1595,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1594
1595
|
cache (AsyncBatchingMapProxy[str, ResponseFormat]): Pre-configured cache
|
|
1595
1596
|
instance for managing API call batching and deduplication.
|
|
1596
1597
|
Set cache.batch_size=None to enable automatic batch size optimization.
|
|
1597
|
-
response_format (
|
|
1598
|
+
response_format (type[ResponseFormat], optional): Desired Python type of the
|
|
1598
1599
|
responses. Defaults to ``str``.
|
|
1599
1600
|
temperature (float | None, optional): Sampling temperature. Defaults to ``0.0``.
|
|
1600
1601
|
top_p (float, optional): Nucleus sampling parameter. Defaults to ``1.0``.
|
|
@@ -1618,7 +1619,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1618
1619
|
async def responses(
|
|
1619
1620
|
self,
|
|
1620
1621
|
instructions: str,
|
|
1621
|
-
response_format:
|
|
1622
|
+
response_format: type[ResponseFormat] = str,
|
|
1622
1623
|
batch_size: int | None = None,
|
|
1623
1624
|
temperature: float | None = 0.0,
|
|
1624
1625
|
top_p: float = 1.0,
|
|
@@ -1650,7 +1651,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1650
1651
|
|
|
1651
1652
|
Args:
|
|
1652
1653
|
instructions (str): System prompt for the assistant.
|
|
1653
|
-
response_format (
|
|
1654
|
+
response_format (type[ResponseFormat], optional): Desired Python type of the
|
|
1654
1655
|
responses. Defaults to ``str``.
|
|
1655
1656
|
batch_size (int | None, optional): Number of requests sent in one batch.
|
|
1656
1657
|
Defaults to ``None`` (automatic batch size optimization
|
|
@@ -1936,7 +1937,7 @@ class AsyncOpenAIVecDataFrameAccessor:
|
|
|
1936
1937
|
if missing_rows.empty:
|
|
1937
1938
|
return self._obj
|
|
1938
1939
|
|
|
1939
|
-
filled_values:
|
|
1940
|
+
filled_values: list[FillNaResponse] = await missing_rows.aio.task(
|
|
1940
1941
|
task=task, batch_size=batch_size, max_concurrency=max_concurrency, show_progress=show_progress, **api_kwargs
|
|
1941
1942
|
)
|
|
1942
1943
|
|
openaivec/spark.py
CHANGED
|
@@ -123,8 +123,9 @@ Note: This module provides asynchronous support through the pandas extensions.
|
|
|
123
123
|
|
|
124
124
|
import asyncio
|
|
125
125
|
import logging
|
|
126
|
+
from collections.abc import Iterator
|
|
126
127
|
from enum import Enum
|
|
127
|
-
from typing import
|
|
128
|
+
from typing import Union, get_args, get_origin
|
|
128
129
|
|
|
129
130
|
import numpy as np
|
|
130
131
|
import pandas as pd
|
|
@@ -157,13 +158,13 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
|
|
|
157
158
|
def _python_type_to_spark(python_type):
|
|
158
159
|
origin = get_origin(python_type)
|
|
159
160
|
|
|
160
|
-
# For list types (e.g.,
|
|
161
|
-
if origin is list
|
|
161
|
+
# For list types (e.g., list[int])
|
|
162
|
+
if origin is list:
|
|
162
163
|
# Retrieve the inner type and recursively convert it
|
|
163
164
|
inner_type = get_args(python_type)[0]
|
|
164
165
|
return ArrayType(_python_type_to_spark(inner_type))
|
|
165
166
|
|
|
166
|
-
# For Optional types (Union
|
|
167
|
+
# For Optional types (T | None via Union internally)
|
|
167
168
|
elif origin is Union:
|
|
168
169
|
non_none_args = [arg for arg in get_args(python_type) if arg is not type(None)]
|
|
169
170
|
if len(non_none_args) == 1:
|
|
@@ -196,7 +197,7 @@ def _python_type_to_spark(python_type):
|
|
|
196
197
|
raise ValueError(f"Unsupported type: {python_type}")
|
|
197
198
|
|
|
198
199
|
|
|
199
|
-
def _pydantic_to_spark_schema(model:
|
|
200
|
+
def _pydantic_to_spark_schema(model: type[BaseModel]) -> StructType:
|
|
200
201
|
fields = []
|
|
201
202
|
for field_name, field in model.model_fields.items():
|
|
202
203
|
field_type = field.annotation
|
|
@@ -218,7 +219,7 @@ def _safe_cast_str(x: str | None) -> str | None:
|
|
|
218
219
|
return None
|
|
219
220
|
|
|
220
221
|
|
|
221
|
-
def _safe_dump(x: BaseModel | None) ->
|
|
222
|
+
def _safe_dump(x: BaseModel | None) -> dict:
|
|
222
223
|
try:
|
|
223
224
|
if x is None:
|
|
224
225
|
return {}
|
|
@@ -231,7 +232,7 @@ def _safe_dump(x: BaseModel | None) -> Dict:
|
|
|
231
232
|
|
|
232
233
|
def responses_udf(
|
|
233
234
|
instructions: str,
|
|
234
|
-
response_format:
|
|
235
|
+
response_format: type[ResponseFormat] = str,
|
|
235
236
|
model_name: str = "gpt-4.1-mini",
|
|
236
237
|
batch_size: int | None = None,
|
|
237
238
|
temperature: float | None = 0.0,
|
|
@@ -261,7 +262,7 @@ def responses_udf(
|
|
|
261
262
|
|
|
262
263
|
Args:
|
|
263
264
|
instructions (str): The system prompt or instructions for the model.
|
|
264
|
-
response_format (
|
|
265
|
+
response_format (type[ResponseFormat]): The desired output format. Either `str` for plain text
|
|
265
266
|
or a Pydantic `BaseModel` for structured JSON output. Defaults to `str`.
|
|
266
267
|
model_name (str): For Azure OpenAI, use your deployment name (e.g., "my-gpt4-deployment").
|
|
267
268
|
For OpenAI, use the model name (e.g., "gpt-4.1-mini"). Defaults to "gpt-4.1-mini".
|
|
@@ -555,12 +556,12 @@ def embeddings_udf(
|
|
|
555
556
|
return _embeddings_udf # type: ignore[return-value]
|
|
556
557
|
|
|
557
558
|
|
|
558
|
-
def split_to_chunks_udf(max_tokens: int, sep:
|
|
559
|
+
def split_to_chunks_udf(max_tokens: int, sep: list[str]) -> UserDefinedFunction:
|
|
559
560
|
"""Create a pandas‑UDF that splits text into token‑bounded chunks.
|
|
560
561
|
|
|
561
562
|
Args:
|
|
562
563
|
max_tokens (int): Maximum tokens allowed per chunk.
|
|
563
|
-
sep (
|
|
564
|
+
sep (list[str]): Ordered list of separator strings used by ``TextChunker``.
|
|
564
565
|
|
|
565
566
|
Returns:
|
|
566
567
|
A pandas UDF producing an ``ArrayType(StringType())`` column whose
|
|
@@ -61,7 +61,7 @@ Attributes:
|
|
|
61
61
|
top_p=1.0 for deterministic output.
|
|
62
62
|
"""
|
|
63
63
|
|
|
64
|
-
from typing import
|
|
64
|
+
from typing import Literal
|
|
65
65
|
|
|
66
66
|
from pydantic import BaseModel, Field
|
|
67
67
|
|
|
@@ -86,7 +86,7 @@ class CustomerSentiment(BaseModel):
|
|
|
86
86
|
)
|
|
87
87
|
sentiment_intensity: float = Field(description="Intensity of sentiment from 0.0 (mild) to 1.0 (extreme)")
|
|
88
88
|
polarity_score: float = Field(description="Polarity score from -1.0 (very negative) to 1.0 (very positive)")
|
|
89
|
-
tone_indicators:
|
|
89
|
+
tone_indicators: list[str] = Field(description="Specific words or phrases indicating tone")
|
|
90
90
|
relationship_status: Literal["new", "loyal", "at_risk", "detractor", "advocate"] = Field(
|
|
91
91
|
description="Customer relationship status (new, loyal, at_risk, detractor, advocate)"
|
|
92
92
|
)
|
|
@@ -92,7 +92,7 @@ Example:
|
|
|
92
92
|
```
|
|
93
93
|
"""
|
|
94
94
|
|
|
95
|
-
from typing import Dict,
|
|
95
|
+
from typing import Dict, Literal
|
|
96
96
|
|
|
97
97
|
from pydantic import BaseModel, Field
|
|
98
98
|
|
|
@@ -106,7 +106,7 @@ class InquiryClassification(BaseModel):
|
|
|
106
106
|
subcategory: str = Field(description="Specific subcategory within the primary category")
|
|
107
107
|
confidence: float = Field(description="Confidence score for classification (0.0-1.0)")
|
|
108
108
|
routing: str = Field(description="Recommended routing destination")
|
|
109
|
-
keywords:
|
|
109
|
+
keywords: list[str] = Field(description="Key terms that influenced the classification")
|
|
110
110
|
priority: Literal["low", "medium", "high", "urgent"] = Field(
|
|
111
111
|
description="Suggested priority level (low, medium, high, urgent)"
|
|
112
112
|
)
|
|
@@ -114,25 +114,25 @@ class InquiryClassification(BaseModel):
|
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
def inquiry_classification(
|
|
117
|
-
categories: Dict[str,
|
|
117
|
+
categories: Dict[str, list[str]] | None = None,
|
|
118
118
|
routing_rules: Dict[str, str] | None = None,
|
|
119
119
|
priority_rules: Dict[str, str] | None = None,
|
|
120
120
|
business_context: str = "general customer support",
|
|
121
|
-
custom_keywords: Dict[str,
|
|
121
|
+
custom_keywords: Dict[str, list[str]] | None = None,
|
|
122
122
|
temperature: float = 0.0,
|
|
123
123
|
top_p: float = 1.0,
|
|
124
124
|
) -> PreparedTask:
|
|
125
125
|
"""Create a configurable inquiry classification task.
|
|
126
126
|
|
|
127
127
|
Args:
|
|
128
|
-
categories (
|
|
128
|
+
categories (dict[str, list[str]] | None): Dictionary mapping category names to lists of subcategories.
|
|
129
129
|
Default provides standard support categories.
|
|
130
|
-
routing_rules (
|
|
130
|
+
routing_rules (dict[str, str] | None): Dictionary mapping categories to routing destinations.
|
|
131
131
|
Default provides standard routing options.
|
|
132
|
-
priority_rules (
|
|
132
|
+
priority_rules (dict[str, str] | None): Dictionary mapping keywords/patterns to priority levels.
|
|
133
133
|
Default uses standard priority indicators.
|
|
134
134
|
business_context (str): Description of the business context to help with classification.
|
|
135
|
-
custom_keywords (
|
|
135
|
+
custom_keywords (dict[str, list[str]] | None): Dictionary mapping categories to relevant keywords.
|
|
136
136
|
temperature (float): Sampling temperature (0.0-1.0).
|
|
137
137
|
top_p (float): Nucleus sampling parameter (0.0-1.0).
|
|
138
138
|
|
|
@@ -59,7 +59,7 @@ Attributes:
|
|
|
59
59
|
top_p=1.0 for deterministic output.
|
|
60
60
|
"""
|
|
61
61
|
|
|
62
|
-
from typing import
|
|
62
|
+
from typing import Literal
|
|
63
63
|
|
|
64
64
|
from pydantic import BaseModel, Field
|
|
65
65
|
|
|
@@ -71,15 +71,15 @@ __all__ = ["inquiry_summary"]
|
|
|
71
71
|
class InquirySummary(BaseModel):
|
|
72
72
|
summary: str = Field(description="Concise summary of the customer inquiry (2-3 sentences)")
|
|
73
73
|
main_issue: str = Field(description="Primary problem or request being addressed")
|
|
74
|
-
secondary_issues:
|
|
74
|
+
secondary_issues: list[str] = Field(description="Additional issues mentioned in the inquiry")
|
|
75
75
|
customer_background: str = Field(description="Relevant customer context or history mentioned")
|
|
76
|
-
actions_taken:
|
|
76
|
+
actions_taken: list[str] = Field(description="Steps the customer has already attempted")
|
|
77
77
|
timeline: str = Field(description="Timeline of events or when the issue started")
|
|
78
78
|
impact_description: str = Field(description="How the issue affects the customer")
|
|
79
79
|
resolution_status: Literal["not_started", "in_progress", "needs_escalation", "resolved"] = Field(
|
|
80
80
|
description="Current status (not_started, in_progress, needs_escalation, resolved)"
|
|
81
81
|
)
|
|
82
|
-
key_details:
|
|
82
|
+
key_details: list[str] = Field(description="Important technical details, error messages, or specifics")
|
|
83
83
|
follow_up_needed: bool = Field(description="Whether follow-up communication is required")
|
|
84
84
|
summary_confidence: float = Field(description="Confidence in summary accuracy (0.0-1.0)")
|
|
85
85
|
|
|
@@ -57,7 +57,7 @@ Attributes:
|
|
|
57
57
|
top_p=1.0 for deterministic output.
|
|
58
58
|
"""
|
|
59
59
|
|
|
60
|
-
from typing import
|
|
60
|
+
from typing import Literal
|
|
61
61
|
|
|
62
62
|
from pydantic import BaseModel, Field
|
|
63
63
|
|
|
@@ -80,7 +80,7 @@ class IntentAnalysis(BaseModel):
|
|
|
80
80
|
description="Primary customer intent (get_help, make_purchase, cancel_service, "
|
|
81
81
|
"get_refund, report_issue, seek_information, request_feature, provide_feedback)"
|
|
82
82
|
)
|
|
83
|
-
secondary_intents:
|
|
83
|
+
secondary_intents: list[str] = Field(description="Additional intents if multiple goals are present")
|
|
84
84
|
action_required: Literal[
|
|
85
85
|
"provide_information", "troubleshoot", "process_request", "escalate", "redirect", "schedule_callback"
|
|
86
86
|
] = Field(
|
|
@@ -92,9 +92,9 @@ class IntentAnalysis(BaseModel):
|
|
|
92
92
|
description="Likelihood of successful resolution (very_high, high, medium, low, very_low)"
|
|
93
93
|
)
|
|
94
94
|
customer_goal: str = Field(description="What the customer ultimately wants to achieve")
|
|
95
|
-
implicit_needs:
|
|
96
|
-
blocking_factors:
|
|
97
|
-
next_steps:
|
|
95
|
+
implicit_needs: list[str] = Field(description="Unstated needs or concerns that may need addressing")
|
|
96
|
+
blocking_factors: list[str] = Field(description="Potential obstacles to achieving customer goal")
|
|
97
|
+
next_steps: list[str] = Field(description="Recommended next steps to address customer intent")
|
|
98
98
|
resolution_complexity: Literal["simple", "moderate", "complex", "very_complex"] = Field(
|
|
99
99
|
description="Complexity of resolution (simple, moderate, complex, very_complex)"
|
|
100
100
|
)
|
|
@@ -57,7 +57,7 @@ Attributes:
|
|
|
57
57
|
top_p=1.0 for deterministic output.
|
|
58
58
|
"""
|
|
59
59
|
|
|
60
|
-
from typing import
|
|
60
|
+
from typing import Literal
|
|
61
61
|
|
|
62
62
|
from pydantic import BaseModel, Field
|
|
63
63
|
|
|
@@ -77,14 +77,14 @@ class ResponseSuggestion(BaseModel):
|
|
|
77
77
|
response_type: Literal["acknowledgment", "solution", "escalation", "information_request", "closure"] = Field(
|
|
78
78
|
description="Type of response (acknowledgment, solution, escalation, information_request, closure)"
|
|
79
79
|
)
|
|
80
|
-
key_points:
|
|
80
|
+
key_points: list[str] = Field(description="Main points that must be addressed in the response")
|
|
81
81
|
follow_up_required: bool = Field(description="Whether follow-up communication is needed")
|
|
82
82
|
escalation_suggested: bool = Field(description="Whether escalation to management is recommended")
|
|
83
|
-
resources_needed:
|
|
83
|
+
resources_needed: list[str] = Field(description="Additional resources or information required")
|
|
84
84
|
estimated_resolution_time: Literal["immediate", "hours", "days", "weeks"] = Field(
|
|
85
85
|
description="Estimated time to resolution (immediate, hours, days, weeks)"
|
|
86
86
|
)
|
|
87
|
-
alternative_responses:
|
|
87
|
+
alternative_responses: list[str] = Field(description="Alternative response options for different scenarios")
|
|
88
88
|
personalization_notes: str = Field(description="Suggestions for personalizing the response")
|
|
89
89
|
|
|
90
90
|
|
|
@@ -96,7 +96,7 @@ Example:
|
|
|
96
96
|
```
|
|
97
97
|
"""
|
|
98
98
|
|
|
99
|
-
from typing import Dict,
|
|
99
|
+
from typing import Dict, Literal
|
|
100
100
|
|
|
101
101
|
from pydantic import BaseModel, Field
|
|
102
102
|
|
|
@@ -115,7 +115,7 @@ class UrgencyAnalysis(BaseModel):
|
|
|
115
115
|
"(immediate, within_1_hour, within_4_hours, within_24_hours)"
|
|
116
116
|
)
|
|
117
117
|
escalation_required: bool = Field(description="Whether this inquiry requires escalation to management")
|
|
118
|
-
urgency_indicators:
|
|
118
|
+
urgency_indicators: list[str] = Field(description="Specific words or phrases that indicate urgency")
|
|
119
119
|
business_impact: Literal["none", "low", "medium", "high", "critical"] = Field(
|
|
120
120
|
description="Potential business impact (none, low, medium, high, critical)"
|
|
121
121
|
)
|
|
@@ -131,7 +131,7 @@ def urgency_analysis(
|
|
|
131
131
|
response_times: Dict[str, str] | None = None,
|
|
132
132
|
customer_tiers: Dict[str, str] | None = None,
|
|
133
133
|
escalation_rules: Dict[str, str] | None = None,
|
|
134
|
-
urgency_keywords: Dict[str,
|
|
134
|
+
urgency_keywords: Dict[str, list[str]] | None = None,
|
|
135
135
|
business_context: str = "general customer support",
|
|
136
136
|
business_hours: str = "24/7 support",
|
|
137
137
|
sla_rules: Dict[str, str] | None = None,
|
|
@@ -141,14 +141,14 @@ def urgency_analysis(
|
|
|
141
141
|
"""Create a configurable urgency analysis task.
|
|
142
142
|
|
|
143
143
|
Args:
|
|
144
|
-
urgency_levels (
|
|
145
|
-
response_times (
|
|
146
|
-
customer_tiers (
|
|
147
|
-
escalation_rules (
|
|
148
|
-
urgency_keywords (
|
|
144
|
+
urgency_levels (dict[str, str] | None): Dictionary mapping urgency levels to descriptions.
|
|
145
|
+
response_times (dict[str, str] | None): Dictionary mapping urgency levels to response times.
|
|
146
|
+
customer_tiers (dict[str, str] | None): Dictionary mapping tier names to descriptions.
|
|
147
|
+
escalation_rules (dict[str, str] | None): Dictionary mapping conditions to escalation actions.
|
|
148
|
+
urgency_keywords (dict[str, list[str]] | None): Dictionary mapping urgency levels to indicator keywords.
|
|
149
149
|
business_context (str): Description of the business context.
|
|
150
150
|
business_hours (str): Description of business hours for response time calculation.
|
|
151
|
-
sla_rules (
|
|
151
|
+
sla_rules (dict[str, str] | None): Dictionary mapping customer tiers to SLA requirements.
|
|
152
152
|
temperature (float): Sampling temperature (0.0-1.0).
|
|
153
153
|
top_p (float): Nucleus sampling parameter (0.0-1.0).
|
|
154
154
|
|
|
@@ -48,8 +48,6 @@ Attributes:
|
|
|
48
48
|
top_p=1.0 for deterministic output.
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
|
-
from typing import List
|
|
52
|
-
|
|
53
51
|
from pydantic import BaseModel, Field
|
|
54
52
|
|
|
55
53
|
from openaivec._model import PreparedTask
|
|
@@ -66,8 +64,8 @@ class DependencyRelation(BaseModel):
|
|
|
66
64
|
|
|
67
65
|
|
|
68
66
|
class DependencyParsing(BaseModel):
|
|
69
|
-
tokens:
|
|
70
|
-
dependencies:
|
|
67
|
+
tokens: list[str] = Field(description="List of tokens in the sentence")
|
|
68
|
+
dependencies: list[DependencyRelation] = Field(description="Dependency relations between tokens")
|
|
71
69
|
root_word: str = Field(description="Root word of the sentence")
|
|
72
70
|
syntactic_structure: str = Field(description="Tree representation of the syntactic structure")
|
|
73
71
|
|
|
@@ -50,8 +50,6 @@ Attributes:
|
|
|
50
50
|
top_p=1.0 for deterministic output.
|
|
51
51
|
"""
|
|
52
52
|
|
|
53
|
-
from typing import List
|
|
54
|
-
|
|
55
53
|
from pydantic import BaseModel, Field
|
|
56
54
|
|
|
57
55
|
from openaivec._model import PreparedTask
|
|
@@ -67,9 +65,9 @@ class Keyword(BaseModel):
|
|
|
67
65
|
|
|
68
66
|
|
|
69
67
|
class KeywordExtraction(BaseModel):
|
|
70
|
-
keywords:
|
|
71
|
-
keyphrases:
|
|
72
|
-
topics:
|
|
68
|
+
keywords: list[Keyword] = Field(description="Extracted keywords ranked by importance")
|
|
69
|
+
keyphrases: list[Keyword] = Field(description="Extracted multi-word phrases ranked by importance")
|
|
70
|
+
topics: list[str] = Field(description="Identified main topics in the text")
|
|
73
71
|
summary: str = Field(description="Brief summary of the text content")
|
|
74
72
|
|
|
75
73
|
|
|
@@ -49,8 +49,6 @@ Attributes:
|
|
|
49
49
|
top_p=1.0 for deterministic output.
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
|
-
from typing import List
|
|
53
|
-
|
|
54
52
|
from pydantic import BaseModel, Field
|
|
55
53
|
|
|
56
54
|
from openaivec._model import PreparedTask
|
|
@@ -59,10 +57,10 @@ __all__ = ["MORPHOLOGICAL_ANALYSIS"]
|
|
|
59
57
|
|
|
60
58
|
|
|
61
59
|
class MorphologicalAnalysis(BaseModel):
|
|
62
|
-
tokens:
|
|
63
|
-
pos_tags:
|
|
64
|
-
lemmas:
|
|
65
|
-
morphological_features:
|
|
60
|
+
tokens: list[str] = Field(description="List of tokens in the text")
|
|
61
|
+
pos_tags: list[str] = Field(description="Part-of-speech tags for each token")
|
|
62
|
+
lemmas: list[str] = Field(description="Lemmatized form of each token")
|
|
63
|
+
morphological_features: list[str] = Field(
|
|
66
64
|
description="Morphological features for each token (e.g., tense, number, case)"
|
|
67
65
|
)
|
|
68
66
|
|
|
@@ -48,8 +48,6 @@ Attributes:
|
|
|
48
48
|
top_p=1.0 for deterministic output.
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
|
-
from typing import List
|
|
52
|
-
|
|
53
51
|
from pydantic import BaseModel, Field
|
|
54
52
|
|
|
55
53
|
from openaivec._model import PreparedTask
|
|
@@ -66,13 +64,13 @@ class NamedEntity(BaseModel):
|
|
|
66
64
|
|
|
67
65
|
|
|
68
66
|
class NamedEntityRecognition(BaseModel):
|
|
69
|
-
persons:
|
|
70
|
-
organizations:
|
|
71
|
-
locations:
|
|
72
|
-
dates:
|
|
73
|
-
money:
|
|
74
|
-
percentages:
|
|
75
|
-
miscellaneous:
|
|
67
|
+
persons: list[NamedEntity] = Field(description="Person entities")
|
|
68
|
+
organizations: list[NamedEntity] = Field(description="Organization entities")
|
|
69
|
+
locations: list[NamedEntity] = Field(description="Location entities")
|
|
70
|
+
dates: list[NamedEntity] = Field(description="Date and time entities")
|
|
71
|
+
money: list[NamedEntity] = Field(description="Money and currency entities")
|
|
72
|
+
percentages: list[NamedEntity] = Field(description="Percentage entities")
|
|
73
|
+
miscellaneous: list[NamedEntity] = Field(description="Other named entities")
|
|
76
74
|
|
|
77
75
|
|
|
78
76
|
NAMED_ENTITY_RECOGNITION = PreparedTask(
|
|
@@ -48,7 +48,7 @@ Attributes:
|
|
|
48
48
|
top_p=1.0 for deterministic output.
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
|
-
from typing import
|
|
51
|
+
from typing import Literal
|
|
52
52
|
|
|
53
53
|
from pydantic import BaseModel, Field
|
|
54
54
|
|
|
@@ -62,10 +62,10 @@ class SentimentAnalysis(BaseModel):
|
|
|
62
62
|
description="Overall sentiment (positive, negative, neutral)"
|
|
63
63
|
)
|
|
64
64
|
confidence: float = Field(description="Confidence score for sentiment (0.0-1.0)")
|
|
65
|
-
emotions:
|
|
65
|
+
emotions: list[Literal["joy", "sadness", "anger", "fear", "surprise", "disgust"]] = Field(
|
|
66
66
|
description="Detected emotions (joy, sadness, anger, fear, surprise, disgust)"
|
|
67
67
|
)
|
|
68
|
-
emotion_scores:
|
|
68
|
+
emotion_scores: list[float] = Field(description="Confidence scores for each emotion (0.0-1.0)")
|
|
69
69
|
polarity: float = Field(description="Polarity score from -1.0 (negative) to 1.0 (positive)")
|
|
70
70
|
subjectivity: float = Field(description="Subjectivity score from 0.0 (objective) to 1.0 (subjective)")
|
|
71
71
|
|
openaivec/task/table/fillna.py
CHANGED
|
@@ -65,7 +65,6 @@ Example:
|
|
|
65
65
|
"""
|
|
66
66
|
|
|
67
67
|
import json
|
|
68
|
-
from typing import Dict, List
|
|
69
68
|
|
|
70
69
|
import pandas as pd
|
|
71
70
|
from pydantic import BaseModel, Field
|
|
@@ -76,8 +75,8 @@ from openaivec._prompt import FewShotPromptBuilder
|
|
|
76
75
|
__all__ = ["fillna", "FillNaResponse"]
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) ->
|
|
80
|
-
examples:
|
|
78
|
+
def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> list[dict]:
|
|
79
|
+
examples: list[dict] = []
|
|
81
80
|
|
|
82
81
|
samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
|
|
83
82
|
samples = samples.dropna(subset=[target_column_name])
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
openaivec/__init__.py,sha256=mXCGNNTjYbmE4CAXGvAs78soxUsoy_mxxnvaCk_CL6Y,361
|
|
2
|
+
openaivec/_di.py,sha256=1MXaBzaH_ZenQnWKQzBY2z-egHwiteMvg7byoUH3ZZI,10658
|
|
3
|
+
openaivec/_embeddings.py,sha256=upCjl8m9h1CihP6t7wvIH_vivOAPSgmgooAxIhnUMUw,7449
|
|
4
|
+
openaivec/_log.py,sha256=LHNs6AbJzM4weaRARZFroigxR6D148d7WSIMLk1IhbU,1439
|
|
5
|
+
openaivec/_model.py,sha256=toS2oBubrJa9jrdYy-87Fb2XivjXUlk_8Zn5gKUAcFI,3345
|
|
6
|
+
openaivec/_optimize.py,sha256=3nS8VehbS7iGC1tPDDQh-iAgyKHbVYmMbCRBWM77U_U,3827
|
|
7
|
+
openaivec/_prompt.py,sha256=zLv13q47CKV3jnETUyWAIlnjXFSEMs70c8m0yN7_Hek,20820
|
|
8
|
+
openaivec/_provider.py,sha256=YLrEcb4aWBD1fj0n6PNcJpCtEXK6jkUuRH_WxcLDCuI,7145
|
|
9
|
+
openaivec/_proxy.py,sha256=AiGuC1MCFjZCRXCac-pHUI3Np3nf1HIpWY6nC9ZVCFY,29671
|
|
10
|
+
openaivec/_responses.py,sha256=lVJRa_Uc7hQJnYJRgumqwBbu6GToZqsLFS6tIAFO1Fc,24014
|
|
11
|
+
openaivec/_schema.py,sha256=fVsFkCZWSbh2-fiGxnT8cSVrlUQOYWJX5EeL2F6aX4s,24039
|
|
12
|
+
openaivec/_serialize.py,sha256=u2Om94Sc_QgJkTlW2BAGw8wd6gYDhc6IRqvS-qevFSs,8399
|
|
13
|
+
openaivec/_util.py,sha256=XfueAycVCQvgRLS7wF7e306b53lebORvZOBzbQjy4vE,6438
|
|
14
|
+
openaivec/pandas_ext.py,sha256=rCkh8g9eqHn0gUG8j_-jdppQt_Yq_1Wg6FmsCEcpv3k,85985
|
|
15
|
+
openaivec/spark.py,sha256=Dbuhlk8Z89Fwk3fbWp1Ud9uTpfNyfjZOIx8ARJMnQf0,25371
|
|
16
|
+
openaivec/task/__init__.py,sha256=lrgoc9UIox7XnxZ96dQRl88a-8QfuZRFBHshxctpMB8,6178
|
|
17
|
+
openaivec/task/customer_support/__init__.py,sha256=KWfGyXPdZyfGdRH17x7hPpJJ1N2EP9PPhZx0fvBAwSI,884
|
|
18
|
+
openaivec/task/customer_support/customer_sentiment.py,sha256=NHIr9nm2d2Bu1MSpxFsM3_w1UuQrQEwnHrClVbhdCUw,7612
|
|
19
|
+
openaivec/task/customer_support/inquiry_classification.py,sha256=NUU_apX6ADi4SyGUbvflGt-v5Ka7heHXlJOHPAeVoGg,9640
|
|
20
|
+
openaivec/task/customer_support/inquiry_summary.py,sha256=PDQvF_ZEZ9TnFhLM2yIinP-OKz_PSPeIET48P9UIgzQ,6920
|
|
21
|
+
openaivec/task/customer_support/intent_analysis.py,sha256=uWdza2pkqnRJn3JtPWbsTAUDL1Sn-BwH-ZpN2cUxhe8,7504
|
|
22
|
+
openaivec/task/customer_support/response_suggestion.py,sha256=Hxt5MDpdfoo5S7_I_eQ302AOIsSCyNBeaDSMMMfPYoQ,8344
|
|
23
|
+
openaivec/task/customer_support/urgency_analysis.py,sha256=DRd4pmFnwuiNGBKxxkEkfp5CZZeDppmBUThs5NYOL9g,11569
|
|
24
|
+
openaivec/task/nlp/__init__.py,sha256=QoQ0egEK9IEh5hdrE07rZ_KCmC0gy_2FPrWJYRWiipY,512
|
|
25
|
+
openaivec/task/nlp/dependency_parsing.py,sha256=MhrHNCqSd-JmlQ21ISYwGYXazNVZGsVuX_v0ZpyI50w,2817
|
|
26
|
+
openaivec/task/nlp/keyword_extraction.py,sha256=seFeuk6Z2dmlVBFoDN-tOVgCnR7jq36sTsWySjb_ric,2804
|
|
27
|
+
openaivec/task/nlp/morphological_analysis.py,sha256=TcNGA0cYrPczr1ZxflBiokh-qdwMSvRDHq66fP7gi2c,2401
|
|
28
|
+
openaivec/task/nlp/named_entity_recognition.py,sha256=jnVfGtf7TDCNNHrLQ5rhMYvmHc8FKXQxEzC5ib6NnVc,3037
|
|
29
|
+
openaivec/task/nlp/sentiment_analysis.py,sha256=Np-yY0d4Kr5WEjGjq4tNFHDNarBLajJr8Q2E6K9ms3A,3085
|
|
30
|
+
openaivec/task/nlp/translation.py,sha256=VYgiXtr2TL1tbqZkBpyVAy4ahrgd8UO4ZjhIL6xMdkI,6609
|
|
31
|
+
openaivec/task/table/__init__.py,sha256=kJz15WDJXjyC7UIHKBvlTRhCf347PCDMH5T5fONV2sU,83
|
|
32
|
+
openaivec/task/table/fillna.py,sha256=g_CpLnLzK1C5rCiVq15L3X0kywJK6CtSrKRYxQFuhn8,6606
|
|
33
|
+
openaivec-0.14.8.dist-info/METADATA,sha256=ItqzTCNsPigyX9fe5WBQHih3gzT68XjdwFsAOa9-qrI,27566
|
|
34
|
+
openaivec-0.14.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
35
|
+
openaivec-0.14.8.dist-info/licenses/LICENSE,sha256=ws_MuBL-SCEBqPBFl9_FqZkaaydIJmxHrJG2parhU4M,1141
|
|
36
|
+
openaivec-0.14.8.dist-info/RECORD,,
|