openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. openaivec/__init__.py +13 -4
  2. openaivec/_cache/__init__.py +12 -0
  3. openaivec/_cache/optimize.py +109 -0
  4. openaivec/_cache/proxy.py +806 -0
  5. openaivec/_di.py +326 -0
  6. openaivec/_embeddings.py +203 -0
  7. openaivec/{log.py → _log.py} +2 -2
  8. openaivec/_model.py +113 -0
  9. openaivec/{prompt.py → _prompt.py} +95 -28
  10. openaivec/_provider.py +207 -0
  11. openaivec/_responses.py +511 -0
  12. openaivec/_schema/__init__.py +9 -0
  13. openaivec/_schema/infer.py +340 -0
  14. openaivec/_schema/spec.py +350 -0
  15. openaivec/_serialize.py +234 -0
  16. openaivec/{util.py → _util.py} +25 -85
  17. openaivec/pandas_ext.py +1635 -425
  18. openaivec/spark.py +604 -335
  19. openaivec/task/__init__.py +27 -29
  20. openaivec/task/customer_support/__init__.py +9 -15
  21. openaivec/task/customer_support/customer_sentiment.py +51 -41
  22. openaivec/task/customer_support/inquiry_classification.py +86 -61
  23. openaivec/task/customer_support/inquiry_summary.py +44 -45
  24. openaivec/task/customer_support/intent_analysis.py +56 -41
  25. openaivec/task/customer_support/response_suggestion.py +49 -43
  26. openaivec/task/customer_support/urgency_analysis.py +76 -71
  27. openaivec/task/nlp/__init__.py +4 -4
  28. openaivec/task/nlp/dependency_parsing.py +19 -20
  29. openaivec/task/nlp/keyword_extraction.py +22 -24
  30. openaivec/task/nlp/morphological_analysis.py +25 -25
  31. openaivec/task/nlp/named_entity_recognition.py +26 -28
  32. openaivec/task/nlp/sentiment_analysis.py +29 -21
  33. openaivec/task/nlp/translation.py +24 -30
  34. openaivec/task/table/__init__.py +3 -0
  35. openaivec/task/table/fillna.py +183 -0
  36. openaivec-1.0.10.dist-info/METADATA +399 -0
  37. openaivec-1.0.10.dist-info/RECORD +39 -0
  38. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
  39. openaivec/embeddings.py +0 -172
  40. openaivec/responses.py +0 -392
  41. openaivec/serialize.py +0 -225
  42. openaivec/task/model.py +0 -84
  43. openaivec-0.10.0.dist-info/METADATA +0 -546
  44. openaivec-0.10.0.dist-info/RECORD +0 -29
  45. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
@@ -5,26 +5,26 @@ inquiries to help prioritize support queue and response times.
5
5
 
6
6
  Example:
7
7
  Basic usage with default settings:
8
-
8
+
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import customer_support
13
-
13
+
14
14
  client = OpenAI()
15
15
  analyzer = BatchResponses.of_task(
16
16
  client=client,
17
- model_name="gpt-4o-mini",
17
+ model_name="gpt-4.1-mini",
18
18
  task=customer_support.urgency_analysis()
19
19
  )
20
-
20
+
21
21
  inquiries = [
22
22
  "URGENT: My website is down and I'm losing customers!",
23
23
  "Can you help me understand how to use the new feature?",
24
24
  "I haven't received my order from last week"
25
25
  ]
26
26
  analyses = analyzer.parse(inquiries)
27
-
27
+
28
28
  for analysis in analyses:
29
29
  print(f"Urgency Level: {analysis.urgency_level}")
30
30
  print(f"Score: {analysis.urgency_score}")
@@ -33,10 +33,10 @@ Example:
33
33
  ```
34
34
 
35
35
  Customized for SaaS platform with business hours:
36
-
36
+
37
37
  ```python
38
38
  from openaivec.task import customer_support
39
-
39
+
40
40
  # SaaS-specific urgency levels
41
41
  saas_urgency_levels = {
42
42
  "critical": "Service outages, security breaches, data loss",
@@ -44,15 +44,15 @@ Example:
44
44
  "medium": "Feature bugs, performance issues, billing questions",
45
45
  "low": "Feature requests, documentation questions, general feedback"
46
46
  }
47
-
47
+
48
48
  # Custom response times based on SLA
49
49
  saas_response_times = {
50
50
  "critical": "immediate",
51
51
  "high": "within_1_hour",
52
- "medium": "within_4_hours",
52
+ "medium": "within_4_hours",
53
53
  "low": "within_24_hours"
54
54
  }
55
-
55
+
56
56
  # Enterprise customer tier gets priority
57
57
  enterprise_customer_tiers = {
58
58
  "enterprise": "Priority support, dedicated account manager",
@@ -60,7 +60,7 @@ Example:
60
60
  "professional": "Professional plan support",
61
61
  "starter": "Basic support"
62
62
  }
63
-
63
+
64
64
  task = customer_support.urgency_analysis(
65
65
  urgency_levels=saas_urgency_levels,
66
66
  response_times=saas_response_times,
@@ -68,168 +68,174 @@ Example:
68
68
  business_context="SaaS platform",
69
69
  business_hours="9 AM - 5 PM EST, Monday-Friday"
70
70
  )
71
-
71
+
72
72
  analyzer = BatchResponses.of_task(
73
73
  client=client,
74
- model_name="gpt-4o-mini",
74
+ model_name="gpt-4.1-mini",
75
75
  task=task
76
76
  )
77
77
  ```
78
78
 
79
79
  With pandas integration:
80
-
80
+
81
81
  ```python
82
82
  import pandas as pd
83
83
  from openaivec import pandas_ext # Required for .ai accessor
84
84
  from openaivec.task import customer_support
85
-
85
+
86
86
  df = pd.DataFrame({"inquiry": [
87
87
  "URGENT: My website is down and I'm losing customers!",
88
88
  "Can you help me understand how to use the new feature?",
89
89
  "I haven't received my order from last week"
90
90
  ]})
91
91
  df["urgency"] = df["inquiry"].ai.task(customer_support.urgency_analysis())
92
-
92
+
93
93
  # Extract urgency components
94
94
  extracted_df = df.ai.extract("urgency")
95
95
  print(extracted_df[["inquiry", "urgency_urgency_level", "urgency_urgency_score", "urgency_response_time"]])
96
96
  ```
97
97
  """
98
98
 
99
- from typing import List, Dict, Optional, Literal
99
+ from typing import Dict, Literal
100
+
100
101
  from pydantic import BaseModel, Field
101
102
 
102
- from ..model import PreparedTask
103
+ from openaivec._model import PreparedTask
103
104
 
104
105
  __all__ = ["urgency_analysis"]
105
106
 
106
107
 
107
108
  class UrgencyAnalysis(BaseModel):
108
- urgency_level: Literal["critical", "high", "medium", "low"] = Field(description="Urgency level from configured levels (critical, high, medium, low)")
109
+ urgency_level: Literal["critical", "high", "medium", "low"] = Field(
110
+ description="Urgency level from configured levels (critical, high, medium, low)"
111
+ )
109
112
  urgency_score: float = Field(description="Urgency score from 0.0 (not urgent) to 1.0 (extremely urgent)")
110
- response_time: Literal["immediate", "within_1_hour", "within_4_hours", "within_24_hours"] = Field(description="Recommended response time from configured times (immediate, within_1_hour, within_4_hours, within_24_hours)")
113
+ response_time: Literal["immediate", "within_1_hour", "within_4_hours", "within_24_hours"] = Field(
114
+ description="Recommended response time from configured times "
115
+ "(immediate, within_1_hour, within_4_hours, within_24_hours)"
116
+ )
111
117
  escalation_required: bool = Field(description="Whether this inquiry requires escalation to management")
112
- urgency_indicators: List[str] = Field(description="Specific words or phrases that indicate urgency")
113
- business_impact: Literal["none", "low", "medium", "high", "critical"] = Field(description="Potential business impact (none, low, medium, high, critical)")
114
- customer_tier: Literal["enterprise", "premium", "standard", "basic"] = Field(description="Inferred customer tier from configured tiers (enterprise, premium, standard, basic)")
118
+ urgency_indicators: list[str] = Field(description="Specific words or phrases that indicate urgency")
119
+ business_impact: Literal["none", "low", "medium", "high", "critical"] = Field(
120
+ description="Potential business impact (none, low, medium, high, critical)"
121
+ )
122
+ customer_tier: Literal["enterprise", "premium", "standard", "basic"] = Field(
123
+ description="Inferred customer tier from configured tiers (enterprise, premium, standard, basic)"
124
+ )
115
125
  reasoning: str = Field(description="Brief explanation of urgency assessment")
116
126
  sla_compliance: bool = Field(description="Whether response time aligns with SLA requirements")
117
127
 
118
128
 
119
129
  def urgency_analysis(
120
- urgency_levels: Optional[Dict[str, str]] = None,
121
- response_times: Optional[Dict[str, str]] = None,
122
- customer_tiers: Optional[Dict[str, str]] = None,
123
- escalation_rules: Optional[Dict[str, str]] = None,
124
- urgency_keywords: Optional[Dict[str, List[str]]] = None,
130
+ urgency_levels: Dict[str, str] | None = None,
131
+ response_times: Dict[str, str] | None = None,
132
+ customer_tiers: Dict[str, str] | None = None,
133
+ escalation_rules: Dict[str, str] | None = None,
134
+ urgency_keywords: Dict[str, list[str]] | None = None,
125
135
  business_context: str = "general customer support",
126
136
  business_hours: str = "24/7 support",
127
- sla_rules: Optional[Dict[str, str]] = None,
128
- temperature: float = 0.0,
129
- top_p: float = 1.0
137
+ sla_rules: Dict[str, str] | None = None,
130
138
  ) -> PreparedTask:
131
139
  """Create a configurable urgency analysis task.
132
-
140
+
133
141
  Args:
134
- urgency_levels: Dictionary mapping urgency levels to descriptions.
135
- response_times: Dictionary mapping urgency levels to response times.
136
- customer_tiers: Dictionary mapping tier names to descriptions.
137
- escalation_rules: Dictionary mapping conditions to escalation actions.
138
- urgency_keywords: Dictionary mapping urgency levels to indicator keywords.
139
- business_context: Description of the business context.
140
- business_hours: Description of business hours for response time calculation.
141
- sla_rules: Dictionary mapping customer tiers to SLA requirements.
142
- temperature: Sampling temperature (0.0-1.0).
143
- top_p: Nucleus sampling parameter (0.0-1.0).
144
-
142
+ urgency_levels (dict[str, str] | None): Dictionary mapping urgency levels to descriptions.
143
+ response_times (dict[str, str] | None): Dictionary mapping urgency levels to response times.
144
+ customer_tiers (dict[str, str] | None): Dictionary mapping tier names to descriptions.
145
+ escalation_rules (dict[str, str] | None): Dictionary mapping conditions to escalation actions.
146
+ urgency_keywords (dict[str, list[str]] | None): Dictionary mapping urgency levels to indicator keywords.
147
+ business_context (str): Description of the business context.
148
+ business_hours (str): Description of business hours for response time calculation.
149
+ sla_rules (dict[str, str] | None): Dictionary mapping customer tiers to SLA requirements.
150
+
145
151
  Returns:
146
152
  PreparedTask configured for urgency analysis.
147
153
  """
148
-
154
+
149
155
  # Default urgency levels
150
156
  if urgency_levels is None:
151
157
  urgency_levels = {
152
158
  "critical": "Service outages, security breaches, data loss, system failures affecting business operations",
153
159
  "high": "Account locked, payment failures, urgent deadlines, angry customers, revenue-impacting issues",
154
160
  "medium": "Feature not working, delivery delays, billing questions, moderate customer frustration",
155
- "low": "General questions, feature requests, feedback, compliments, minor issues"
161
+ "low": "General questions, feature requests, feedback, compliments, minor issues",
156
162
  }
157
-
163
+
158
164
  # Default response times
159
165
  if response_times is None:
160
166
  response_times = {
161
167
  "critical": "immediate",
162
168
  "high": "within_1_hour",
163
169
  "medium": "within_4_hours",
164
- "low": "within_24_hours"
170
+ "low": "within_24_hours",
165
171
  }
166
-
172
+
167
173
  # Default customer tiers
168
174
  if customer_tiers is None:
169
175
  customer_tiers = {
170
176
  "enterprise": "Large contracts, multiple users, business-critical usage",
171
177
  "premium": "Paid plans, professional use, higher expectations",
172
178
  "standard": "Regular paid users, normal expectations",
173
- "basic": "Free users, casual usage, lower priority"
179
+ "basic": "Free users, casual usage, lower priority",
174
180
  }
175
-
181
+
176
182
  # Default escalation rules
177
183
  if escalation_rules is None:
178
184
  escalation_rules = {
179
185
  "immediate": "Critical issues, security breaches, service outages",
180
186
  "within_1_hour": "High urgency with customer tier enterprise or premium",
181
187
  "manager_review": "Threats to cancel, legal language, compliance issues",
182
- "no_escalation": "Standard support can handle"
188
+ "no_escalation": "Standard support can handle",
183
189
  }
184
-
190
+
185
191
  # Default urgency keywords
186
192
  if urgency_keywords is None:
187
193
  urgency_keywords = {
188
194
  "critical": ["urgent", "emergency", "critical", "down", "outage", "security", "breach", "immediate"],
189
195
  "high": ["ASAP", "urgent", "problem", "issue", "error", "bug", "frustrated", "angry"],
190
196
  "medium": ["question", "help", "support", "feedback", "concern", "delayed"],
191
- "low": ["information", "thank", "compliment", "suggestion", "general", "when convenient"]
197
+ "low": ["information", "thank", "compliment", "suggestion", "general", "when convenient"],
192
198
  }
193
-
199
+
194
200
  # Default SLA rules
195
201
  if sla_rules is None:
196
202
  sla_rules = {
197
203
  "enterprise": "Critical: 15min, High: 1hr, Medium: 4hr, Low: 24hr",
198
204
  "premium": "Critical: 30min, High: 2hr, Medium: 8hr, Low: 48hr",
199
205
  "standard": "Critical: 1hr, High: 4hr, Medium: 24hr, Low: 72hr",
200
- "basic": "Critical: 4hr, High: 24hr, Medium: 72hr, Low: 1week"
206
+ "basic": "Critical: 4hr, High: 24hr, Medium: 72hr, Low: 1week",
201
207
  }
202
-
208
+
203
209
  # Build urgency levels section
204
210
  urgency_text = "Urgency Levels:\n"
205
211
  for level, description in urgency_levels.items():
206
212
  urgency_text += f"- {level}: {description}\n"
207
-
213
+
208
214
  # Build response times section
209
215
  response_text = "Response Times:\n"
210
216
  for level, time in response_times.items():
211
217
  response_text += f"- {level}: {time}\n"
212
-
218
+
213
219
  # Build customer tiers section
214
220
  tiers_text = "Customer Tiers:\n"
215
221
  for tier, description in customer_tiers.items():
216
222
  tiers_text += f"- {tier}: {description}\n"
217
-
223
+
218
224
  # Build escalation rules section
219
225
  escalation_text = "Escalation Rules:\n"
220
226
  for condition, action in escalation_rules.items():
221
227
  escalation_text += f"- {condition}: {action}\n"
222
-
228
+
223
229
  # Build urgency keywords section
224
230
  keywords_text = "Urgency Keywords:\n"
225
231
  for level, keywords in urgency_keywords.items():
226
232
  keywords_text += f"- {level}: {', '.join(keywords)}\n"
227
-
233
+
228
234
  # Build SLA rules section
229
235
  sla_text = "SLA Rules:\n"
230
236
  for tier, sla in sla_rules.items():
231
237
  sla_text += f"- {tier}: {sla}\n"
232
-
238
+
233
239
  instructions = f"""Analyze the urgency level of the customer inquiry based on language, content, and context.
234
240
 
235
241
  Business Context: {business_context}
@@ -269,17 +275,16 @@ Consider:
269
275
  - Revenue or operational impact
270
276
  - Compliance or legal implications
271
277
 
272
- IMPORTANT: Provide analysis responses in the same language as the input text, except for the predefined categorical fields (urgency_level, response_time, business_impact, customer_tier) which must use the exact English values specified above. For example, if the input is in French, provide urgency_indicators and reasoning in French, but use English values like "critical" for urgency_level.
278
+ IMPORTANT: Provide analysis responses in the same language as the input text, except for the
279
+ predefined categorical fields (urgency_level, response_time, business_impact, customer_tier)
280
+ which must use the exact English values specified above. For example, if the input is in French,
281
+ provide urgency_indicators and reasoning in French, but use English values like "critical" for
282
+ urgency_level.
273
283
 
274
284
  Provide detailed analysis with clear reasoning for urgency level and response time recommendations."""
275
285
 
276
- return PreparedTask(
277
- instructions=instructions,
278
- response_format=UrgencyAnalysis,
279
- temperature=temperature,
280
- top_p=top_p
281
- )
286
+ return PreparedTask(instructions=instructions, response_format=UrgencyAnalysis)
282
287
 
283
288
 
284
289
  # Backward compatibility - default configuration
285
- URGENCY_ANALYSIS = urgency_analysis()
290
+ URGENCY_ANALYSIS = urgency_analysis()
@@ -1,9 +1,9 @@
1
- from .translation import MULTILINGUAL_TRANSLATION
1
+ from .dependency_parsing import DEPENDENCY_PARSING
2
+ from .keyword_extraction import KEYWORD_EXTRACTION
2
3
  from .morphological_analysis import MORPHOLOGICAL_ANALYSIS
3
4
  from .named_entity_recognition import NAMED_ENTITY_RECOGNITION
4
5
  from .sentiment_analysis import SENTIMENT_ANALYSIS
5
- from .dependency_parsing import DEPENDENCY_PARSING
6
- from .keyword_extraction import KEYWORD_EXTRACTION
6
+ from .translation import MULTILINGUAL_TRANSLATION
7
7
 
8
8
  __all__ = [
9
9
  "MULTILINGUAL_TRANSLATION",
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "SENTIMENT_ANALYSIS",
13
13
  "DEPENDENCY_PARSING",
14
14
  "KEYWORD_EXTRACTION",
15
- ]
15
+ ]
@@ -5,22 +5,22 @@ syntactic dependencies between words in sentences using OpenAI's language models
5
5
 
6
6
  Example:
7
7
  Basic usage with BatchResponses:
8
-
8
+
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
-
13
+
14
14
  client = OpenAI()
15
15
  analyzer = BatchResponses.of_task(
16
16
  client=client,
17
- model_name="gpt-4o-mini",
17
+ model_name="gpt-4.1-mini",
18
18
  task=nlp.DEPENDENCY_PARSING
19
19
  )
20
-
20
+
21
21
  texts = ["The cat sat on the mat.", "She quickly ran to the store."]
22
22
  analyses = analyzer.parse(texts)
23
-
23
+
24
24
  for analysis in analyses:
25
25
  print(f"Tokens: {analysis.tokens}")
26
26
  print(f"Dependencies: {analysis.dependencies}")
@@ -28,30 +28,29 @@ Example:
28
28
  ```
29
29
 
30
30
  With pandas integration:
31
-
31
+
32
32
  ```python
33
33
  import pandas as pd
34
34
  from openaivec import pandas_ext # Required for .ai accessor
35
35
  from openaivec.task import nlp
36
-
36
+
37
37
  df = pd.DataFrame({"text": ["The cat sat on the mat.", "She quickly ran to the store."]})
38
38
  df["parsing"] = df["text"].ai.task(nlp.DEPENDENCY_PARSING)
39
-
39
+
40
40
  # Extract parsing components
41
41
  extracted_df = df.ai.extract("parsing")
42
42
  print(extracted_df[["text", "parsing_tokens", "parsing_root_word", "parsing_syntactic_structure"]])
43
43
  ```
44
44
 
45
45
  Attributes:
46
- DEPENDENCY_PARSING (PreparedTask): A prepared task instance
47
- configured for dependency parsing with temperature=0.0 and
48
- top_p=1.0 for deterministic output.
46
+ DEPENDENCY_PARSING (PreparedTask): A prepared task instance configured for dependency
47
+ parsing. Provide ``temperature=0.0`` and ``top_p=1.0`` when calling the API for
48
+ deterministic output.
49
49
  """
50
50
 
51
- from typing import List
52
51
  from pydantic import BaseModel, Field
53
52
 
54
- from ..model import PreparedTask
53
+ from openaivec._model import PreparedTask
55
54
 
56
55
  __all__ = ["DEPENDENCY_PARSING"]
57
56
 
@@ -65,15 +64,15 @@ class DependencyRelation(BaseModel):
65
64
 
66
65
 
67
66
  class DependencyParsing(BaseModel):
68
- tokens: List[str] = Field(description="List of tokens in the sentence")
69
- dependencies: List[DependencyRelation] = Field(description="Dependency relations between tokens")
67
+ tokens: list[str] = Field(description="List of tokens in the sentence")
68
+ dependencies: list[DependencyRelation] = Field(description="Dependency relations between tokens")
70
69
  root_word: str = Field(description="Root word of the sentence")
71
70
  syntactic_structure: str = Field(description="Tree representation of the syntactic structure")
72
71
 
73
72
 
74
73
  DEPENDENCY_PARSING = PreparedTask(
75
- instructions="Parse the syntactic dependencies in the following text. Identify dependency relations between words, determine the root word, and provide a tree representation of the syntactic structure.",
74
+ instructions="Parse the syntactic dependencies in the following text. Identify dependency "
75
+ "relations between words, determine the root word, and provide a tree representation of the "
76
+ "syntactic structure.",
76
77
  response_format=DependencyParsing,
77
- temperature=0.0,
78
- top_p=1.0
79
- )
78
+ )
@@ -5,23 +5,23 @@ important keywords and phrases from text using OpenAI's language models.
5
5
 
6
6
  Example:
7
7
  Basic usage with BatchResponses:
8
-
8
+
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
-
13
+
14
14
  client = OpenAI()
15
15
  analyzer = BatchResponses.of_task(
16
16
  client=client,
17
- model_name="gpt-4o-mini",
17
+ model_name="gpt-4.1-mini",
18
18
  task=nlp.KEYWORD_EXTRACTION
19
19
  )
20
-
21
- texts = ["Machine learning is transforming the technology industry.",
20
+
21
+ texts = ["Machine learning is transforming the technology industry.",
22
22
  "Climate change affects global weather patterns."]
23
23
  analyses = analyzer.parse(texts)
24
-
24
+
25
25
  for analysis in analyses:
26
26
  print(f"Keywords: {analysis.keywords}")
27
27
  print(f"Key phrases: {analysis.keyphrases}")
@@ -29,31 +29,30 @@ Example:
29
29
  ```
30
30
 
31
31
  With pandas integration:
32
-
32
+
33
33
  ```python
34
34
  import pandas as pd
35
35
  from openaivec import pandas_ext # Required for .ai accessor
36
36
  from openaivec.task import nlp
37
-
38
- df = pd.DataFrame({"text": ["Machine learning is transforming the technology industry.",
37
+
38
+ df = pd.DataFrame({"text": ["Machine learning is transforming the technology industry.",
39
39
  "Climate change affects global weather patterns."]})
40
40
  df["keywords"] = df["text"].ai.task(nlp.KEYWORD_EXTRACTION)
41
-
41
+
42
42
  # Extract keyword components
43
43
  extracted_df = df.ai.extract("keywords")
44
44
  print(extracted_df[["text", "keywords_keywords", "keywords_topics", "keywords_summary"]])
45
45
  ```
46
46
 
47
47
  Attributes:
48
- KEYWORD_EXTRACTION (PreparedTask): A prepared task instance
49
- configured for keyword extraction with temperature=0.0 and
50
- top_p=1.0 for deterministic output.
48
+ KEYWORD_EXTRACTION (PreparedTask): A prepared task instance configured for keyword
49
+ extraction. Provide ``temperature=0.0`` and ``top_p=1.0`` when calling the API
50
+ for deterministic output.
51
51
  """
52
52
 
53
- from typing import List, Optional
54
53
  from pydantic import BaseModel, Field
55
54
 
56
- from ..model import PreparedTask
55
+ from openaivec._model import PreparedTask
57
56
 
58
57
  __all__ = ["KEYWORD_EXTRACTION"]
59
58
 
@@ -62,19 +61,18 @@ class Keyword(BaseModel):
62
61
  text: str = Field(description="The keyword or phrase")
63
62
  score: float = Field(description="Importance score (0.0-1.0)")
64
63
  frequency: int = Field(description="Frequency of occurrence in the text")
65
- context: Optional[str] = Field(description="Context where the keyword appears")
64
+ context: str | None = Field(description="Context where the keyword appears")
66
65
 
67
66
 
68
67
  class KeywordExtraction(BaseModel):
69
- keywords: List[Keyword] = Field(description="Extracted keywords ranked by importance")
70
- keyphrases: List[Keyword] = Field(description="Extracted multi-word phrases ranked by importance")
71
- topics: List[str] = Field(description="Identified main topics in the text")
68
+ keywords: list[Keyword] = Field(description="Extracted keywords ranked by importance")
69
+ keyphrases: list[Keyword] = Field(description="Extracted multi-word phrases ranked by importance")
70
+ topics: list[str] = Field(description="Identified main topics in the text")
72
71
  summary: str = Field(description="Brief summary of the text content")
73
72
 
74
73
 
75
74
  KEYWORD_EXTRACTION = PreparedTask(
76
- instructions="Extract important keywords and phrases from the following text. Rank them by importance, provide frequency counts, identify main topics, and generate a brief summary.",
75
+ instructions="Extract important keywords and phrases from the following text. Rank them "
76
+ "by importance, provide frequency counts, identify main topics, and generate a brief summary.",
77
77
  response_format=KeywordExtraction,
78
- temperature=0.0,
79
- top_p=1.0
80
- )
78
+ )
@@ -1,27 +1,27 @@
1
1
  """Morphological analysis task for OpenAI API.
2
2
 
3
3
  This module provides a predefined task for morphological analysis including
4
- tokenization, part-of-speech tagging, and lemmatization using OpenAI's
4
+ tokenization, part-of-speech tagging, and lemmatization using OpenAI's
5
5
  language models.
6
6
 
7
7
  Example:
8
8
  Basic usage with BatchResponses:
9
-
9
+
10
10
  ```python
11
11
  from openai import OpenAI
12
- from openaivec.responses import BatchResponses
12
+ from openaivec import BatchResponses
13
13
  from openaivec.task import nlp
14
-
14
+
15
15
  client = OpenAI()
16
16
  analyzer = BatchResponses.of_task(
17
17
  client=client,
18
- model_name="gpt-4o-mini",
18
+ model_name="gpt-4.1-mini",
19
19
  task=nlp.MORPHOLOGICAL_ANALYSIS
20
20
  )
21
-
21
+
22
22
  texts = ["Running quickly", "The cats are sleeping"]
23
23
  analyses = analyzer.parse(texts)
24
-
24
+
25
25
  for analysis in analyses:
26
26
  print(f"Tokens: {analysis.tokens}")
27
27
  print(f"POS Tags: {analysis.pos_tags}")
@@ -29,45 +29,45 @@ Example:
29
29
  ```
30
30
 
31
31
  With pandas integration:
32
-
32
+
33
33
  ```python
34
34
  import pandas as pd
35
35
  from openaivec import pandas_ext # Required for .ai accessor
36
36
  from openaivec.task import nlp
37
-
37
+
38
38
  df = pd.DataFrame({"text": ["Running quickly", "The cats are sleeping"]})
39
39
  df["analysis"] = df["text"].ai.task(nlp.MORPHOLOGICAL_ANALYSIS)
40
-
40
+
41
41
  # Extract analysis components
42
42
  extracted_df = df.ai.extract("analysis")
43
43
  print(extracted_df[["text", "analysis_tokens", "analysis_pos_tags", "analysis_lemmas"]])
44
44
  ```
45
45
 
46
46
  Attributes:
47
- MORPHOLOGICAL_ANALYSIS (PreparedTask): A prepared task instance
48
- configured for morphological analysis with temperature=0.0 and
49
- top_p=1.0 for deterministic output.
47
+ MORPHOLOGICAL_ANALYSIS (PreparedTask): A prepared task instance configured
48
+ for morphological analysis. Provide ``temperature=0.0`` and ``top_p=1.0`` to
49
+ API calls for deterministic output.
50
50
  """
51
51
 
52
- from typing import List
53
- from pydantic import BaseModel
54
- from pydantic import Field
52
+ from pydantic import BaseModel, Field
55
53
 
56
- from ..model import PreparedTask
54
+ from openaivec._model import PreparedTask
57
55
 
58
56
  __all__ = ["MORPHOLOGICAL_ANALYSIS"]
59
57
 
60
58
 
61
59
  class MorphologicalAnalysis(BaseModel):
62
- tokens: List[str] = Field(description="List of tokens in the text")
63
- pos_tags: List[str] = Field(description="Part-of-speech tags for each token")
64
- lemmas: List[str] = Field(description="Lemmatized form of each token")
65
- morphological_features: List[str] = Field(description="Morphological features for each token (e.g., tense, number, case)")
60
+ tokens: list[str] = Field(description="List of tokens in the text")
61
+ pos_tags: list[str] = Field(description="Part-of-speech tags for each token")
62
+ lemmas: list[str] = Field(description="Lemmatized form of each token")
63
+ morphological_features: list[str] = Field(
64
+ description="Morphological features for each token (e.g., tense, number, case)"
65
+ )
66
66
 
67
67
 
68
68
  MORPHOLOGICAL_ANALYSIS = PreparedTask(
69
- instructions="Perform morphological analysis on the following text. Break it down into tokens, identify part-of-speech tags, provide lemmatized forms, and extract morphological features for each token.",
69
+ instructions="Perform morphological analysis on the following text. Break it down into tokens, "
70
+ "identify part-of-speech tags, provide lemmatized forms, and extract morphological features "
71
+ "for each token.",
70
72
  response_format=MorphologicalAnalysis,
71
- temperature=0.0,
72
- top_p=1.0
73
- )
73
+ )