openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/__init__.py +13 -4
- openaivec/_cache/__init__.py +12 -0
- openaivec/_cache/optimize.py +109 -0
- openaivec/_cache/proxy.py +806 -0
- openaivec/_di.py +326 -0
- openaivec/_embeddings.py +203 -0
- openaivec/{log.py → _log.py} +2 -2
- openaivec/_model.py +113 -0
- openaivec/{prompt.py → _prompt.py} +95 -28
- openaivec/_provider.py +207 -0
- openaivec/_responses.py +511 -0
- openaivec/_schema/__init__.py +9 -0
- openaivec/_schema/infer.py +340 -0
- openaivec/_schema/spec.py +350 -0
- openaivec/_serialize.py +234 -0
- openaivec/{util.py → _util.py} +25 -85
- openaivec/pandas_ext.py +1635 -425
- openaivec/spark.py +604 -335
- openaivec/task/__init__.py +27 -29
- openaivec/task/customer_support/__init__.py +9 -15
- openaivec/task/customer_support/customer_sentiment.py +51 -41
- openaivec/task/customer_support/inquiry_classification.py +86 -61
- openaivec/task/customer_support/inquiry_summary.py +44 -45
- openaivec/task/customer_support/intent_analysis.py +56 -41
- openaivec/task/customer_support/response_suggestion.py +49 -43
- openaivec/task/customer_support/urgency_analysis.py +76 -71
- openaivec/task/nlp/__init__.py +4 -4
- openaivec/task/nlp/dependency_parsing.py +19 -20
- openaivec/task/nlp/keyword_extraction.py +22 -24
- openaivec/task/nlp/morphological_analysis.py +25 -25
- openaivec/task/nlp/named_entity_recognition.py +26 -28
- openaivec/task/nlp/sentiment_analysis.py +29 -21
- openaivec/task/nlp/translation.py +24 -30
- openaivec/task/table/__init__.py +3 -0
- openaivec/task/table/fillna.py +183 -0
- openaivec-1.0.10.dist-info/METADATA +399 -0
- openaivec-1.0.10.dist-info/RECORD +39 -0
- {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
- openaivec/embeddings.py +0 -172
- openaivec/responses.py +0 -392
- openaivec/serialize.py +0 -225
- openaivec/task/model.py +0 -84
- openaivec-0.10.0.dist-info/METADATA +0 -546
- openaivec-0.10.0.dist-info/RECORD +0 -29
- {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,26 +5,26 @@ inquiries to help prioritize support queue and response times.
|
|
|
5
5
|
|
|
6
6
|
Example:
|
|
7
7
|
Basic usage with default settings:
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
```python
|
|
10
10
|
from openai import OpenAI
|
|
11
|
-
from openaivec
|
|
11
|
+
from openaivec import BatchResponses
|
|
12
12
|
from openaivec.task import customer_support
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
client = OpenAI()
|
|
15
15
|
analyzer = BatchResponses.of_task(
|
|
16
16
|
client=client,
|
|
17
|
-
model_name="gpt-
|
|
17
|
+
model_name="gpt-4.1-mini",
|
|
18
18
|
task=customer_support.urgency_analysis()
|
|
19
19
|
)
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
inquiries = [
|
|
22
22
|
"URGENT: My website is down and I'm losing customers!",
|
|
23
23
|
"Can you help me understand how to use the new feature?",
|
|
24
24
|
"I haven't received my order from last week"
|
|
25
25
|
]
|
|
26
26
|
analyses = analyzer.parse(inquiries)
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
for analysis in analyses:
|
|
29
29
|
print(f"Urgency Level: {analysis.urgency_level}")
|
|
30
30
|
print(f"Score: {analysis.urgency_score}")
|
|
@@ -33,10 +33,10 @@ Example:
|
|
|
33
33
|
```
|
|
34
34
|
|
|
35
35
|
Customized for SaaS platform with business hours:
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
```python
|
|
38
38
|
from openaivec.task import customer_support
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
# SaaS-specific urgency levels
|
|
41
41
|
saas_urgency_levels = {
|
|
42
42
|
"critical": "Service outages, security breaches, data loss",
|
|
@@ -44,15 +44,15 @@ Example:
|
|
|
44
44
|
"medium": "Feature bugs, performance issues, billing questions",
|
|
45
45
|
"low": "Feature requests, documentation questions, general feedback"
|
|
46
46
|
}
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
# Custom response times based on SLA
|
|
49
49
|
saas_response_times = {
|
|
50
50
|
"critical": "immediate",
|
|
51
51
|
"high": "within_1_hour",
|
|
52
|
-
"medium": "within_4_hours",
|
|
52
|
+
"medium": "within_4_hours",
|
|
53
53
|
"low": "within_24_hours"
|
|
54
54
|
}
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
# Enterprise customer tier gets priority
|
|
57
57
|
enterprise_customer_tiers = {
|
|
58
58
|
"enterprise": "Priority support, dedicated account manager",
|
|
@@ -60,7 +60,7 @@ Example:
|
|
|
60
60
|
"professional": "Professional plan support",
|
|
61
61
|
"starter": "Basic support"
|
|
62
62
|
}
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
task = customer_support.urgency_analysis(
|
|
65
65
|
urgency_levels=saas_urgency_levels,
|
|
66
66
|
response_times=saas_response_times,
|
|
@@ -68,168 +68,174 @@ Example:
|
|
|
68
68
|
business_context="SaaS platform",
|
|
69
69
|
business_hours="9 AM - 5 PM EST, Monday-Friday"
|
|
70
70
|
)
|
|
71
|
-
|
|
71
|
+
|
|
72
72
|
analyzer = BatchResponses.of_task(
|
|
73
73
|
client=client,
|
|
74
|
-
model_name="gpt-
|
|
74
|
+
model_name="gpt-4.1-mini",
|
|
75
75
|
task=task
|
|
76
76
|
)
|
|
77
77
|
```
|
|
78
78
|
|
|
79
79
|
With pandas integration:
|
|
80
|
-
|
|
80
|
+
|
|
81
81
|
```python
|
|
82
82
|
import pandas as pd
|
|
83
83
|
from openaivec import pandas_ext # Required for .ai accessor
|
|
84
84
|
from openaivec.task import customer_support
|
|
85
|
-
|
|
85
|
+
|
|
86
86
|
df = pd.DataFrame({"inquiry": [
|
|
87
87
|
"URGENT: My website is down and I'm losing customers!",
|
|
88
88
|
"Can you help me understand how to use the new feature?",
|
|
89
89
|
"I haven't received my order from last week"
|
|
90
90
|
]})
|
|
91
91
|
df["urgency"] = df["inquiry"].ai.task(customer_support.urgency_analysis())
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
# Extract urgency components
|
|
94
94
|
extracted_df = df.ai.extract("urgency")
|
|
95
95
|
print(extracted_df[["inquiry", "urgency_urgency_level", "urgency_urgency_score", "urgency_response_time"]])
|
|
96
96
|
```
|
|
97
97
|
"""
|
|
98
98
|
|
|
99
|
-
from typing import
|
|
99
|
+
from typing import Dict, Literal
|
|
100
|
+
|
|
100
101
|
from pydantic import BaseModel, Field
|
|
101
102
|
|
|
102
|
-
from
|
|
103
|
+
from openaivec._model import PreparedTask
|
|
103
104
|
|
|
104
105
|
__all__ = ["urgency_analysis"]
|
|
105
106
|
|
|
106
107
|
|
|
107
108
|
class UrgencyAnalysis(BaseModel):
|
|
108
|
-
urgency_level: Literal["critical", "high", "medium", "low"] = Field(
|
|
109
|
+
urgency_level: Literal["critical", "high", "medium", "low"] = Field(
|
|
110
|
+
description="Urgency level from configured levels (critical, high, medium, low)"
|
|
111
|
+
)
|
|
109
112
|
urgency_score: float = Field(description="Urgency score from 0.0 (not urgent) to 1.0 (extremely urgent)")
|
|
110
|
-
response_time: Literal["immediate", "within_1_hour", "within_4_hours", "within_24_hours"] = Field(
|
|
113
|
+
response_time: Literal["immediate", "within_1_hour", "within_4_hours", "within_24_hours"] = Field(
|
|
114
|
+
description="Recommended response time from configured times "
|
|
115
|
+
"(immediate, within_1_hour, within_4_hours, within_24_hours)"
|
|
116
|
+
)
|
|
111
117
|
escalation_required: bool = Field(description="Whether this inquiry requires escalation to management")
|
|
112
|
-
urgency_indicators:
|
|
113
|
-
business_impact: Literal["none", "low", "medium", "high", "critical"] = Field(
|
|
114
|
-
|
|
118
|
+
urgency_indicators: list[str] = Field(description="Specific words or phrases that indicate urgency")
|
|
119
|
+
business_impact: Literal["none", "low", "medium", "high", "critical"] = Field(
|
|
120
|
+
description="Potential business impact (none, low, medium, high, critical)"
|
|
121
|
+
)
|
|
122
|
+
customer_tier: Literal["enterprise", "premium", "standard", "basic"] = Field(
|
|
123
|
+
description="Inferred customer tier from configured tiers (enterprise, premium, standard, basic)"
|
|
124
|
+
)
|
|
115
125
|
reasoning: str = Field(description="Brief explanation of urgency assessment")
|
|
116
126
|
sla_compliance: bool = Field(description="Whether response time aligns with SLA requirements")
|
|
117
127
|
|
|
118
128
|
|
|
119
129
|
def urgency_analysis(
|
|
120
|
-
urgency_levels:
|
|
121
|
-
response_times:
|
|
122
|
-
customer_tiers:
|
|
123
|
-
escalation_rules:
|
|
124
|
-
urgency_keywords:
|
|
130
|
+
urgency_levels: Dict[str, str] | None = None,
|
|
131
|
+
response_times: Dict[str, str] | None = None,
|
|
132
|
+
customer_tiers: Dict[str, str] | None = None,
|
|
133
|
+
escalation_rules: Dict[str, str] | None = None,
|
|
134
|
+
urgency_keywords: Dict[str, list[str]] | None = None,
|
|
125
135
|
business_context: str = "general customer support",
|
|
126
136
|
business_hours: str = "24/7 support",
|
|
127
|
-
sla_rules:
|
|
128
|
-
temperature: float = 0.0,
|
|
129
|
-
top_p: float = 1.0
|
|
137
|
+
sla_rules: Dict[str, str] | None = None,
|
|
130
138
|
) -> PreparedTask:
|
|
131
139
|
"""Create a configurable urgency analysis task.
|
|
132
|
-
|
|
140
|
+
|
|
133
141
|
Args:
|
|
134
|
-
urgency_levels: Dictionary mapping urgency levels to descriptions.
|
|
135
|
-
response_times: Dictionary mapping urgency levels to response times.
|
|
136
|
-
customer_tiers: Dictionary mapping tier names to descriptions.
|
|
137
|
-
escalation_rules: Dictionary mapping conditions to escalation actions.
|
|
138
|
-
urgency_keywords: Dictionary mapping urgency levels to indicator keywords.
|
|
139
|
-
business_context: Description of the business context.
|
|
140
|
-
business_hours: Description of business hours for response time calculation.
|
|
141
|
-
sla_rules: Dictionary mapping customer tiers to SLA requirements.
|
|
142
|
-
|
|
143
|
-
top_p: Nucleus sampling parameter (0.0-1.0).
|
|
144
|
-
|
|
142
|
+
urgency_levels (dict[str, str] | None): Dictionary mapping urgency levels to descriptions.
|
|
143
|
+
response_times (dict[str, str] | None): Dictionary mapping urgency levels to response times.
|
|
144
|
+
customer_tiers (dict[str, str] | None): Dictionary mapping tier names to descriptions.
|
|
145
|
+
escalation_rules (dict[str, str] | None): Dictionary mapping conditions to escalation actions.
|
|
146
|
+
urgency_keywords (dict[str, list[str]] | None): Dictionary mapping urgency levels to indicator keywords.
|
|
147
|
+
business_context (str): Description of the business context.
|
|
148
|
+
business_hours (str): Description of business hours for response time calculation.
|
|
149
|
+
sla_rules (dict[str, str] | None): Dictionary mapping customer tiers to SLA requirements.
|
|
150
|
+
|
|
145
151
|
Returns:
|
|
146
152
|
PreparedTask configured for urgency analysis.
|
|
147
153
|
"""
|
|
148
|
-
|
|
154
|
+
|
|
149
155
|
# Default urgency levels
|
|
150
156
|
if urgency_levels is None:
|
|
151
157
|
urgency_levels = {
|
|
152
158
|
"critical": "Service outages, security breaches, data loss, system failures affecting business operations",
|
|
153
159
|
"high": "Account locked, payment failures, urgent deadlines, angry customers, revenue-impacting issues",
|
|
154
160
|
"medium": "Feature not working, delivery delays, billing questions, moderate customer frustration",
|
|
155
|
-
"low": "General questions, feature requests, feedback, compliments, minor issues"
|
|
161
|
+
"low": "General questions, feature requests, feedback, compliments, minor issues",
|
|
156
162
|
}
|
|
157
|
-
|
|
163
|
+
|
|
158
164
|
# Default response times
|
|
159
165
|
if response_times is None:
|
|
160
166
|
response_times = {
|
|
161
167
|
"critical": "immediate",
|
|
162
168
|
"high": "within_1_hour",
|
|
163
169
|
"medium": "within_4_hours",
|
|
164
|
-
"low": "within_24_hours"
|
|
170
|
+
"low": "within_24_hours",
|
|
165
171
|
}
|
|
166
|
-
|
|
172
|
+
|
|
167
173
|
# Default customer tiers
|
|
168
174
|
if customer_tiers is None:
|
|
169
175
|
customer_tiers = {
|
|
170
176
|
"enterprise": "Large contracts, multiple users, business-critical usage",
|
|
171
177
|
"premium": "Paid plans, professional use, higher expectations",
|
|
172
178
|
"standard": "Regular paid users, normal expectations",
|
|
173
|
-
"basic": "Free users, casual usage, lower priority"
|
|
179
|
+
"basic": "Free users, casual usage, lower priority",
|
|
174
180
|
}
|
|
175
|
-
|
|
181
|
+
|
|
176
182
|
# Default escalation rules
|
|
177
183
|
if escalation_rules is None:
|
|
178
184
|
escalation_rules = {
|
|
179
185
|
"immediate": "Critical issues, security breaches, service outages",
|
|
180
186
|
"within_1_hour": "High urgency with customer tier enterprise or premium",
|
|
181
187
|
"manager_review": "Threats to cancel, legal language, compliance issues",
|
|
182
|
-
"no_escalation": "Standard support can handle"
|
|
188
|
+
"no_escalation": "Standard support can handle",
|
|
183
189
|
}
|
|
184
|
-
|
|
190
|
+
|
|
185
191
|
# Default urgency keywords
|
|
186
192
|
if urgency_keywords is None:
|
|
187
193
|
urgency_keywords = {
|
|
188
194
|
"critical": ["urgent", "emergency", "critical", "down", "outage", "security", "breach", "immediate"],
|
|
189
195
|
"high": ["ASAP", "urgent", "problem", "issue", "error", "bug", "frustrated", "angry"],
|
|
190
196
|
"medium": ["question", "help", "support", "feedback", "concern", "delayed"],
|
|
191
|
-
"low": ["information", "thank", "compliment", "suggestion", "general", "when convenient"]
|
|
197
|
+
"low": ["information", "thank", "compliment", "suggestion", "general", "when convenient"],
|
|
192
198
|
}
|
|
193
|
-
|
|
199
|
+
|
|
194
200
|
# Default SLA rules
|
|
195
201
|
if sla_rules is None:
|
|
196
202
|
sla_rules = {
|
|
197
203
|
"enterprise": "Critical: 15min, High: 1hr, Medium: 4hr, Low: 24hr",
|
|
198
204
|
"premium": "Critical: 30min, High: 2hr, Medium: 8hr, Low: 48hr",
|
|
199
205
|
"standard": "Critical: 1hr, High: 4hr, Medium: 24hr, Low: 72hr",
|
|
200
|
-
"basic": "Critical: 4hr, High: 24hr, Medium: 72hr, Low: 1week"
|
|
206
|
+
"basic": "Critical: 4hr, High: 24hr, Medium: 72hr, Low: 1week",
|
|
201
207
|
}
|
|
202
|
-
|
|
208
|
+
|
|
203
209
|
# Build urgency levels section
|
|
204
210
|
urgency_text = "Urgency Levels:\n"
|
|
205
211
|
for level, description in urgency_levels.items():
|
|
206
212
|
urgency_text += f"- {level}: {description}\n"
|
|
207
|
-
|
|
213
|
+
|
|
208
214
|
# Build response times section
|
|
209
215
|
response_text = "Response Times:\n"
|
|
210
216
|
for level, time in response_times.items():
|
|
211
217
|
response_text += f"- {level}: {time}\n"
|
|
212
|
-
|
|
218
|
+
|
|
213
219
|
# Build customer tiers section
|
|
214
220
|
tiers_text = "Customer Tiers:\n"
|
|
215
221
|
for tier, description in customer_tiers.items():
|
|
216
222
|
tiers_text += f"- {tier}: {description}\n"
|
|
217
|
-
|
|
223
|
+
|
|
218
224
|
# Build escalation rules section
|
|
219
225
|
escalation_text = "Escalation Rules:\n"
|
|
220
226
|
for condition, action in escalation_rules.items():
|
|
221
227
|
escalation_text += f"- {condition}: {action}\n"
|
|
222
|
-
|
|
228
|
+
|
|
223
229
|
# Build urgency keywords section
|
|
224
230
|
keywords_text = "Urgency Keywords:\n"
|
|
225
231
|
for level, keywords in urgency_keywords.items():
|
|
226
232
|
keywords_text += f"- {level}: {', '.join(keywords)}\n"
|
|
227
|
-
|
|
233
|
+
|
|
228
234
|
# Build SLA rules section
|
|
229
235
|
sla_text = "SLA Rules:\n"
|
|
230
236
|
for tier, sla in sla_rules.items():
|
|
231
237
|
sla_text += f"- {tier}: {sla}\n"
|
|
232
|
-
|
|
238
|
+
|
|
233
239
|
instructions = f"""Analyze the urgency level of the customer inquiry based on language, content, and context.
|
|
234
240
|
|
|
235
241
|
Business Context: {business_context}
|
|
@@ -269,17 +275,16 @@ Consider:
|
|
|
269
275
|
- Revenue or operational impact
|
|
270
276
|
- Compliance or legal implications
|
|
271
277
|
|
|
272
|
-
IMPORTANT: Provide analysis responses in the same language as the input text, except for the
|
|
278
|
+
IMPORTANT: Provide analysis responses in the same language as the input text, except for the
|
|
279
|
+
predefined categorical fields (urgency_level, response_time, business_impact, customer_tier)
|
|
280
|
+
which must use the exact English values specified above. For example, if the input is in French,
|
|
281
|
+
provide urgency_indicators and reasoning in French, but use English values like "critical" for
|
|
282
|
+
urgency_level.
|
|
273
283
|
|
|
274
284
|
Provide detailed analysis with clear reasoning for urgency level and response time recommendations."""
|
|
275
285
|
|
|
276
|
-
return PreparedTask(
|
|
277
|
-
instructions=instructions,
|
|
278
|
-
response_format=UrgencyAnalysis,
|
|
279
|
-
temperature=temperature,
|
|
280
|
-
top_p=top_p
|
|
281
|
-
)
|
|
286
|
+
return PreparedTask(instructions=instructions, response_format=UrgencyAnalysis)
|
|
282
287
|
|
|
283
288
|
|
|
284
289
|
# Backward compatibility - default configuration
|
|
285
|
-
URGENCY_ANALYSIS = urgency_analysis()
|
|
290
|
+
URGENCY_ANALYSIS = urgency_analysis()
|
openaivec/task/nlp/__init__.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from .dependency_parsing import DEPENDENCY_PARSING
|
|
2
|
+
from .keyword_extraction import KEYWORD_EXTRACTION
|
|
2
3
|
from .morphological_analysis import MORPHOLOGICAL_ANALYSIS
|
|
3
4
|
from .named_entity_recognition import NAMED_ENTITY_RECOGNITION
|
|
4
5
|
from .sentiment_analysis import SENTIMENT_ANALYSIS
|
|
5
|
-
from .
|
|
6
|
-
from .keyword_extraction import KEYWORD_EXTRACTION
|
|
6
|
+
from .translation import MULTILINGUAL_TRANSLATION
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
9
9
|
"MULTILINGUAL_TRANSLATION",
|
|
@@ -12,4 +12,4 @@ __all__ = [
|
|
|
12
12
|
"SENTIMENT_ANALYSIS",
|
|
13
13
|
"DEPENDENCY_PARSING",
|
|
14
14
|
"KEYWORD_EXTRACTION",
|
|
15
|
-
]
|
|
15
|
+
]
|
|
@@ -5,22 +5,22 @@ syntactic dependencies between words in sentences using OpenAI's language models
|
|
|
5
5
|
|
|
6
6
|
Example:
|
|
7
7
|
Basic usage with BatchResponses:
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
```python
|
|
10
10
|
from openai import OpenAI
|
|
11
|
-
from openaivec
|
|
11
|
+
from openaivec import BatchResponses
|
|
12
12
|
from openaivec.task import nlp
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
client = OpenAI()
|
|
15
15
|
analyzer = BatchResponses.of_task(
|
|
16
16
|
client=client,
|
|
17
|
-
model_name="gpt-
|
|
17
|
+
model_name="gpt-4.1-mini",
|
|
18
18
|
task=nlp.DEPENDENCY_PARSING
|
|
19
19
|
)
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
texts = ["The cat sat on the mat.", "She quickly ran to the store."]
|
|
22
22
|
analyses = analyzer.parse(texts)
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
for analysis in analyses:
|
|
25
25
|
print(f"Tokens: {analysis.tokens}")
|
|
26
26
|
print(f"Dependencies: {analysis.dependencies}")
|
|
@@ -28,30 +28,29 @@ Example:
|
|
|
28
28
|
```
|
|
29
29
|
|
|
30
30
|
With pandas integration:
|
|
31
|
-
|
|
31
|
+
|
|
32
32
|
```python
|
|
33
33
|
import pandas as pd
|
|
34
34
|
from openaivec import pandas_ext # Required for .ai accessor
|
|
35
35
|
from openaivec.task import nlp
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
df = pd.DataFrame({"text": ["The cat sat on the mat.", "She quickly ran to the store."]})
|
|
38
38
|
df["parsing"] = df["text"].ai.task(nlp.DEPENDENCY_PARSING)
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
# Extract parsing components
|
|
41
41
|
extracted_df = df.ai.extract("parsing")
|
|
42
42
|
print(extracted_df[["text", "parsing_tokens", "parsing_root_word", "parsing_syntactic_structure"]])
|
|
43
43
|
```
|
|
44
44
|
|
|
45
45
|
Attributes:
|
|
46
|
-
DEPENDENCY_PARSING (PreparedTask): A prepared task instance
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
DEPENDENCY_PARSING (PreparedTask): A prepared task instance configured for dependency
|
|
47
|
+
parsing. Provide ``temperature=0.0`` and ``top_p=1.0`` when calling the API for
|
|
48
|
+
deterministic output.
|
|
49
49
|
"""
|
|
50
50
|
|
|
51
|
-
from typing import List
|
|
52
51
|
from pydantic import BaseModel, Field
|
|
53
52
|
|
|
54
|
-
from
|
|
53
|
+
from openaivec._model import PreparedTask
|
|
55
54
|
|
|
56
55
|
__all__ = ["DEPENDENCY_PARSING"]
|
|
57
56
|
|
|
@@ -65,15 +64,15 @@ class DependencyRelation(BaseModel):
|
|
|
65
64
|
|
|
66
65
|
|
|
67
66
|
class DependencyParsing(BaseModel):
|
|
68
|
-
tokens:
|
|
69
|
-
dependencies:
|
|
67
|
+
tokens: list[str] = Field(description="List of tokens in the sentence")
|
|
68
|
+
dependencies: list[DependencyRelation] = Field(description="Dependency relations between tokens")
|
|
70
69
|
root_word: str = Field(description="Root word of the sentence")
|
|
71
70
|
syntactic_structure: str = Field(description="Tree representation of the syntactic structure")
|
|
72
71
|
|
|
73
72
|
|
|
74
73
|
DEPENDENCY_PARSING = PreparedTask(
|
|
75
|
-
instructions="Parse the syntactic dependencies in the following text. Identify dependency
|
|
74
|
+
instructions="Parse the syntactic dependencies in the following text. Identify dependency "
|
|
75
|
+
"relations between words, determine the root word, and provide a tree representation of the "
|
|
76
|
+
"syntactic structure.",
|
|
76
77
|
response_format=DependencyParsing,
|
|
77
|
-
|
|
78
|
-
top_p=1.0
|
|
79
|
-
)
|
|
78
|
+
)
|
|
@@ -5,23 +5,23 @@ important keywords and phrases from text using OpenAI's language models.
|
|
|
5
5
|
|
|
6
6
|
Example:
|
|
7
7
|
Basic usage with BatchResponses:
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
```python
|
|
10
10
|
from openai import OpenAI
|
|
11
|
-
from openaivec
|
|
11
|
+
from openaivec import BatchResponses
|
|
12
12
|
from openaivec.task import nlp
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
client = OpenAI()
|
|
15
15
|
analyzer = BatchResponses.of_task(
|
|
16
16
|
client=client,
|
|
17
|
-
model_name="gpt-
|
|
17
|
+
model_name="gpt-4.1-mini",
|
|
18
18
|
task=nlp.KEYWORD_EXTRACTION
|
|
19
19
|
)
|
|
20
|
-
|
|
21
|
-
texts = ["Machine learning is transforming the technology industry.",
|
|
20
|
+
|
|
21
|
+
texts = ["Machine learning is transforming the technology industry.",
|
|
22
22
|
"Climate change affects global weather patterns."]
|
|
23
23
|
analyses = analyzer.parse(texts)
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
for analysis in analyses:
|
|
26
26
|
print(f"Keywords: {analysis.keywords}")
|
|
27
27
|
print(f"Key phrases: {analysis.keyphrases}")
|
|
@@ -29,31 +29,30 @@ Example:
|
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
With pandas integration:
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
```python
|
|
34
34
|
import pandas as pd
|
|
35
35
|
from openaivec import pandas_ext # Required for .ai accessor
|
|
36
36
|
from openaivec.task import nlp
|
|
37
|
-
|
|
38
|
-
df = pd.DataFrame({"text": ["Machine learning is transforming the technology industry.",
|
|
37
|
+
|
|
38
|
+
df = pd.DataFrame({"text": ["Machine learning is transforming the technology industry.",
|
|
39
39
|
"Climate change affects global weather patterns."]})
|
|
40
40
|
df["keywords"] = df["text"].ai.task(nlp.KEYWORD_EXTRACTION)
|
|
41
|
-
|
|
41
|
+
|
|
42
42
|
# Extract keyword components
|
|
43
43
|
extracted_df = df.ai.extract("keywords")
|
|
44
44
|
print(extracted_df[["text", "keywords_keywords", "keywords_topics", "keywords_summary"]])
|
|
45
45
|
```
|
|
46
46
|
|
|
47
47
|
Attributes:
|
|
48
|
-
KEYWORD_EXTRACTION (PreparedTask): A prepared task instance
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
KEYWORD_EXTRACTION (PreparedTask): A prepared task instance configured for keyword
|
|
49
|
+
extraction. Provide ``temperature=0.0`` and ``top_p=1.0`` when calling the API
|
|
50
|
+
for deterministic output.
|
|
51
51
|
"""
|
|
52
52
|
|
|
53
|
-
from typing import List, Optional
|
|
54
53
|
from pydantic import BaseModel, Field
|
|
55
54
|
|
|
56
|
-
from
|
|
55
|
+
from openaivec._model import PreparedTask
|
|
57
56
|
|
|
58
57
|
__all__ = ["KEYWORD_EXTRACTION"]
|
|
59
58
|
|
|
@@ -62,19 +61,18 @@ class Keyword(BaseModel):
|
|
|
62
61
|
text: str = Field(description="The keyword or phrase")
|
|
63
62
|
score: float = Field(description="Importance score (0.0-1.0)")
|
|
64
63
|
frequency: int = Field(description="Frequency of occurrence in the text")
|
|
65
|
-
context:
|
|
64
|
+
context: str | None = Field(description="Context where the keyword appears")
|
|
66
65
|
|
|
67
66
|
|
|
68
67
|
class KeywordExtraction(BaseModel):
|
|
69
|
-
keywords:
|
|
70
|
-
keyphrases:
|
|
71
|
-
topics:
|
|
68
|
+
keywords: list[Keyword] = Field(description="Extracted keywords ranked by importance")
|
|
69
|
+
keyphrases: list[Keyword] = Field(description="Extracted multi-word phrases ranked by importance")
|
|
70
|
+
topics: list[str] = Field(description="Identified main topics in the text")
|
|
72
71
|
summary: str = Field(description="Brief summary of the text content")
|
|
73
72
|
|
|
74
73
|
|
|
75
74
|
KEYWORD_EXTRACTION = PreparedTask(
|
|
76
|
-
instructions="Extract important keywords and phrases from the following text. Rank them
|
|
75
|
+
instructions="Extract important keywords and phrases from the following text. Rank them "
|
|
76
|
+
"by importance, provide frequency counts, identify main topics, and generate a brief summary.",
|
|
77
77
|
response_format=KeywordExtraction,
|
|
78
|
-
|
|
79
|
-
top_p=1.0
|
|
80
|
-
)
|
|
78
|
+
)
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
"""Morphological analysis task for OpenAI API.
|
|
2
2
|
|
|
3
3
|
This module provides a predefined task for morphological analysis including
|
|
4
|
-
tokenization, part-of-speech tagging, and lemmatization using OpenAI's
|
|
4
|
+
tokenization, part-of-speech tagging, and lemmatization using OpenAI's
|
|
5
5
|
language models.
|
|
6
6
|
|
|
7
7
|
Example:
|
|
8
8
|
Basic usage with BatchResponses:
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
```python
|
|
11
11
|
from openai import OpenAI
|
|
12
|
-
from openaivec
|
|
12
|
+
from openaivec import BatchResponses
|
|
13
13
|
from openaivec.task import nlp
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
client = OpenAI()
|
|
16
16
|
analyzer = BatchResponses.of_task(
|
|
17
17
|
client=client,
|
|
18
|
-
model_name="gpt-
|
|
18
|
+
model_name="gpt-4.1-mini",
|
|
19
19
|
task=nlp.MORPHOLOGICAL_ANALYSIS
|
|
20
20
|
)
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
texts = ["Running quickly", "The cats are sleeping"]
|
|
23
23
|
analyses = analyzer.parse(texts)
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
for analysis in analyses:
|
|
26
26
|
print(f"Tokens: {analysis.tokens}")
|
|
27
27
|
print(f"POS Tags: {analysis.pos_tags}")
|
|
@@ -29,45 +29,45 @@ Example:
|
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
With pandas integration:
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
```python
|
|
34
34
|
import pandas as pd
|
|
35
35
|
from openaivec import pandas_ext # Required for .ai accessor
|
|
36
36
|
from openaivec.task import nlp
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
df = pd.DataFrame({"text": ["Running quickly", "The cats are sleeping"]})
|
|
39
39
|
df["analysis"] = df["text"].ai.task(nlp.MORPHOLOGICAL_ANALYSIS)
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
# Extract analysis components
|
|
42
42
|
extracted_df = df.ai.extract("analysis")
|
|
43
43
|
print(extracted_df[["text", "analysis_tokens", "analysis_pos_tags", "analysis_lemmas"]])
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
Attributes:
|
|
47
|
-
MORPHOLOGICAL_ANALYSIS (PreparedTask): A prepared task instance
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
MORPHOLOGICAL_ANALYSIS (PreparedTask): A prepared task instance configured
|
|
48
|
+
for morphological analysis. Provide ``temperature=0.0`` and ``top_p=1.0`` to
|
|
49
|
+
API calls for deterministic output.
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
|
-
from
|
|
53
|
-
from pydantic import BaseModel
|
|
54
|
-
from pydantic import Field
|
|
52
|
+
from pydantic import BaseModel, Field
|
|
55
53
|
|
|
56
|
-
from
|
|
54
|
+
from openaivec._model import PreparedTask
|
|
57
55
|
|
|
58
56
|
__all__ = ["MORPHOLOGICAL_ANALYSIS"]
|
|
59
57
|
|
|
60
58
|
|
|
61
59
|
class MorphologicalAnalysis(BaseModel):
|
|
62
|
-
tokens:
|
|
63
|
-
pos_tags:
|
|
64
|
-
lemmas:
|
|
65
|
-
morphological_features:
|
|
60
|
+
tokens: list[str] = Field(description="List of tokens in the text")
|
|
61
|
+
pos_tags: list[str] = Field(description="Part-of-speech tags for each token")
|
|
62
|
+
lemmas: list[str] = Field(description="Lemmatized form of each token")
|
|
63
|
+
morphological_features: list[str] = Field(
|
|
64
|
+
description="Morphological features for each token (e.g., tense, number, case)"
|
|
65
|
+
)
|
|
66
66
|
|
|
67
67
|
|
|
68
68
|
MORPHOLOGICAL_ANALYSIS = PreparedTask(
|
|
69
|
-
instructions="Perform morphological analysis on the following text. Break it down into tokens,
|
|
69
|
+
instructions="Perform morphological analysis on the following text. Break it down into tokens, "
|
|
70
|
+
"identify part-of-speech tags, provide lemmatized forms, and extract morphological features "
|
|
71
|
+
"for each token.",
|
|
70
72
|
response_format=MorphologicalAnalysis,
|
|
71
|
-
|
|
72
|
-
top_p=1.0
|
|
73
|
-
)
|
|
73
|
+
)
|