openaivec 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. openaivec/__init__.py +13 -4
  2. openaivec/_cache/__init__.py +12 -0
  3. openaivec/_cache/optimize.py +109 -0
  4. openaivec/_cache/proxy.py +806 -0
  5. openaivec/{di.py → _di.py} +36 -12
  6. openaivec/_embeddings.py +203 -0
  7. openaivec/{log.py → _log.py} +2 -2
  8. openaivec/_model.py +113 -0
  9. openaivec/{prompt.py → _prompt.py} +95 -28
  10. openaivec/_provider.py +207 -0
  11. openaivec/_responses.py +511 -0
  12. openaivec/_schema/__init__.py +9 -0
  13. openaivec/_schema/infer.py +340 -0
  14. openaivec/_schema/spec.py +350 -0
  15. openaivec/_serialize.py +234 -0
  16. openaivec/{util.py → _util.py} +25 -85
  17. openaivec/pandas_ext.py +1496 -318
  18. openaivec/spark.py +485 -183
  19. openaivec/task/__init__.py +9 -7
  20. openaivec/task/customer_support/__init__.py +9 -15
  21. openaivec/task/customer_support/customer_sentiment.py +17 -15
  22. openaivec/task/customer_support/inquiry_classification.py +23 -22
  23. openaivec/task/customer_support/inquiry_summary.py +14 -13
  24. openaivec/task/customer_support/intent_analysis.py +21 -19
  25. openaivec/task/customer_support/response_suggestion.py +16 -16
  26. openaivec/task/customer_support/urgency_analysis.py +24 -25
  27. openaivec/task/nlp/__init__.py +4 -4
  28. openaivec/task/nlp/dependency_parsing.py +10 -12
  29. openaivec/task/nlp/keyword_extraction.py +11 -14
  30. openaivec/task/nlp/morphological_analysis.py +12 -14
  31. openaivec/task/nlp/named_entity_recognition.py +16 -18
  32. openaivec/task/nlp/sentiment_analysis.py +14 -11
  33. openaivec/task/nlp/translation.py +6 -9
  34. openaivec/task/table/__init__.py +2 -2
  35. openaivec/task/table/fillna.py +11 -11
  36. openaivec-1.0.10.dist-info/METADATA +399 -0
  37. openaivec-1.0.10.dist-info/RECORD +39 -0
  38. {openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
  39. openaivec/embeddings.py +0 -172
  40. openaivec/model.py +0 -67
  41. openaivec/provider.py +0 -45
  42. openaivec/responses.py +0 -393
  43. openaivec/serialize.py +0 -225
  44. openaivec-0.12.5.dist-info/METADATA +0 -696
  45. openaivec-0.12.5.dist-info/RECORD +0 -33
  46. {openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,9 @@
1
- from .translation import MULTILINGUAL_TRANSLATION
1
+ from .dependency_parsing import DEPENDENCY_PARSING
2
+ from .keyword_extraction import KEYWORD_EXTRACTION
2
3
  from .morphological_analysis import MORPHOLOGICAL_ANALYSIS
3
4
  from .named_entity_recognition import NAMED_ENTITY_RECOGNITION
4
5
  from .sentiment_analysis import SENTIMENT_ANALYSIS
5
- from .dependency_parsing import DEPENDENCY_PARSING
6
- from .keyword_extraction import KEYWORD_EXTRACTION
6
+ from .translation import MULTILINGUAL_TRANSLATION
7
7
 
8
8
  __all__ = [
9
9
  "MULTILINGUAL_TRANSLATION",
@@ -12,4 +12,4 @@ __all__ = [
12
12
  "SENTIMENT_ANALYSIS",
13
13
  "DEPENDENCY_PARSING",
14
14
  "KEYWORD_EXTRACTION",
15
- ]
15
+ ]
@@ -8,7 +8,7 @@ Example:
8
8
 
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
13
 
14
14
  client = OpenAI()
@@ -43,16 +43,14 @@ Example:
43
43
  ```
44
44
 
45
45
  Attributes:
46
- DEPENDENCY_PARSING (PreparedTask): A prepared task instance
47
- configured for dependency parsing with temperature=0.0 and
48
- top_p=1.0 for deterministic output.
46
+ DEPENDENCY_PARSING (PreparedTask): A prepared task instance configured for dependency
47
+ parsing. Provide ``temperature=0.0`` and ``top_p=1.0`` when calling the API for
48
+ deterministic output.
49
49
  """
50
50
 
51
- from typing import List
52
-
53
51
  from pydantic import BaseModel, Field
54
52
 
55
- from ...model import PreparedTask
53
+ from openaivec._model import PreparedTask
56
54
 
57
55
  __all__ = ["DEPENDENCY_PARSING"]
58
56
 
@@ -66,15 +64,15 @@ class DependencyRelation(BaseModel):
66
64
 
67
65
 
68
66
  class DependencyParsing(BaseModel):
69
- tokens: List[str] = Field(description="List of tokens in the sentence")
70
- dependencies: List[DependencyRelation] = Field(description="Dependency relations between tokens")
67
+ tokens: list[str] = Field(description="List of tokens in the sentence")
68
+ dependencies: list[DependencyRelation] = Field(description="Dependency relations between tokens")
71
69
  root_word: str = Field(description="Root word of the sentence")
72
70
  syntactic_structure: str = Field(description="Tree representation of the syntactic structure")
73
71
 
74
72
 
75
73
  DEPENDENCY_PARSING = PreparedTask(
76
- instructions="Parse the syntactic dependencies in the following text. Identify dependency relations between words, determine the root word, and provide a tree representation of the syntactic structure.",
74
+ instructions="Parse the syntactic dependencies in the following text. Identify dependency "
75
+ "relations between words, determine the root word, and provide a tree representation of the "
76
+ "syntactic structure.",
77
77
  response_format=DependencyParsing,
78
- temperature=0.0,
79
- top_p=1.0,
80
78
  )
@@ -8,7 +8,7 @@ Example:
8
8
 
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
13
 
14
14
  client = OpenAI()
@@ -45,16 +45,14 @@ Example:
45
45
  ```
46
46
 
47
47
  Attributes:
48
- KEYWORD_EXTRACTION (PreparedTask): A prepared task instance
49
- configured for keyword extraction with temperature=0.0 and
50
- top_p=1.0 for deterministic output.
48
+ KEYWORD_EXTRACTION (PreparedTask): A prepared task instance configured for keyword
49
+ extraction. Provide ``temperature=0.0`` and ``top_p=1.0`` when calling the API
50
+ for deterministic output.
51
51
  """
52
52
 
53
- from typing import List, Optional
54
-
55
53
  from pydantic import BaseModel, Field
56
54
 
57
- from ...model import PreparedTask
55
+ from openaivec._model import PreparedTask
58
56
 
59
57
  __all__ = ["KEYWORD_EXTRACTION"]
60
58
 
@@ -63,19 +61,18 @@ class Keyword(BaseModel):
63
61
  text: str = Field(description="The keyword or phrase")
64
62
  score: float = Field(description="Importance score (0.0-1.0)")
65
63
  frequency: int = Field(description="Frequency of occurrence in the text")
66
- context: Optional[str] = Field(description="Context where the keyword appears")
64
+ context: str | None = Field(description="Context where the keyword appears")
67
65
 
68
66
 
69
67
  class KeywordExtraction(BaseModel):
70
- keywords: List[Keyword] = Field(description="Extracted keywords ranked by importance")
71
- keyphrases: List[Keyword] = Field(description="Extracted multi-word phrases ranked by importance")
72
- topics: List[str] = Field(description="Identified main topics in the text")
68
+ keywords: list[Keyword] = Field(description="Extracted keywords ranked by importance")
69
+ keyphrases: list[Keyword] = Field(description="Extracted multi-word phrases ranked by importance")
70
+ topics: list[str] = Field(description="Identified main topics in the text")
73
71
  summary: str = Field(description="Brief summary of the text content")
74
72
 
75
73
 
76
74
  KEYWORD_EXTRACTION = PreparedTask(
77
- instructions="Extract important keywords and phrases from the following text. Rank them by importance, provide frequency counts, identify main topics, and generate a brief summary.",
75
+ instructions="Extract important keywords and phrases from the following text. Rank them "
76
+ "by importance, provide frequency counts, identify main topics, and generate a brief summary.",
78
77
  response_format=KeywordExtraction,
79
- temperature=0.0,
80
- top_p=1.0,
81
78
  )
@@ -9,7 +9,7 @@ Example:
9
9
 
10
10
  ```python
11
11
  from openai import OpenAI
12
- from openaivec.responses import BatchResponses
12
+ from openaivec import BatchResponses
13
13
  from openaivec.task import nlp
14
14
 
15
15
  client = OpenAI()
@@ -44,32 +44,30 @@ Example:
44
44
  ```
45
45
 
46
46
  Attributes:
47
- MORPHOLOGICAL_ANALYSIS (PreparedTask): A prepared task instance
48
- configured for morphological analysis with temperature=0.0 and
49
- top_p=1.0 for deterministic output.
47
+ MORPHOLOGICAL_ANALYSIS (PreparedTask): A prepared task instance configured
48
+ for morphological analysis. Provide ``temperature=0.0`` and ``top_p=1.0`` to
49
+ API calls for deterministic output.
50
50
  """
51
51
 
52
- from typing import List
53
-
54
52
  from pydantic import BaseModel, Field
55
53
 
56
- from ...model import PreparedTask
54
+ from openaivec._model import PreparedTask
57
55
 
58
56
  __all__ = ["MORPHOLOGICAL_ANALYSIS"]
59
57
 
60
58
 
61
59
  class MorphologicalAnalysis(BaseModel):
62
- tokens: List[str] = Field(description="List of tokens in the text")
63
- pos_tags: List[str] = Field(description="Part-of-speech tags for each token")
64
- lemmas: List[str] = Field(description="Lemmatized form of each token")
65
- morphological_features: List[str] = Field(
60
+ tokens: list[str] = Field(description="List of tokens in the text")
61
+ pos_tags: list[str] = Field(description="Part-of-speech tags for each token")
62
+ lemmas: list[str] = Field(description="Lemmatized form of each token")
63
+ morphological_features: list[str] = Field(
66
64
  description="Morphological features for each token (e.g., tense, number, case)"
67
65
  )
68
66
 
69
67
 
70
68
  MORPHOLOGICAL_ANALYSIS = PreparedTask(
71
- instructions="Perform morphological analysis on the following text. Break it down into tokens, identify part-of-speech tags, provide lemmatized forms, and extract morphological features for each token.",
69
+ instructions="Perform morphological analysis on the following text. Break it down into tokens, "
70
+ "identify part-of-speech tags, provide lemmatized forms, and extract morphological features "
71
+ "for each token.",
72
72
  response_format=MorphologicalAnalysis,
73
- temperature=0.0,
74
- top_p=1.0,
75
73
  )
@@ -8,7 +8,7 @@ Example:
8
8
 
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
13
 
14
14
  client = OpenAI()
@@ -43,16 +43,14 @@ Example:
43
43
  ```
44
44
 
45
45
  Attributes:
46
- NAMED_ENTITY_RECOGNITION (PreparedTask): A prepared task instance
47
- configured for named entity recognition with temperature=0.0 and
48
- top_p=1.0 for deterministic output.
46
+ NAMED_ENTITY_RECOGNITION (PreparedTask): A prepared task instance configured for named
47
+ entity recognition. Provide ``temperature=0.0`` and ``top_p=1.0`` to API calls for
48
+ deterministic output.
49
49
  """
50
50
 
51
- from typing import List, Optional
52
-
53
51
  from pydantic import BaseModel, Field
54
52
 
55
- from ...model import PreparedTask
53
+ from openaivec._model import PreparedTask
56
54
 
57
55
  __all__ = ["NAMED_ENTITY_RECOGNITION"]
58
56
 
@@ -62,22 +60,22 @@ class NamedEntity(BaseModel):
62
60
  label: str = Field(description="Entity type label")
63
61
  start: int = Field(description="Start position in the original text")
64
62
  end: int = Field(description="End position in the original text")
65
- confidence: Optional[float] = Field(description="Confidence score (0.0-1.0)")
63
+ confidence: float | None = Field(description="Confidence score (0.0-1.0)")
66
64
 
67
65
 
68
66
  class NamedEntityRecognition(BaseModel):
69
- persons: List[NamedEntity] = Field(description="Person entities")
70
- organizations: List[NamedEntity] = Field(description="Organization entities")
71
- locations: List[NamedEntity] = Field(description="Location entities")
72
- dates: List[NamedEntity] = Field(description="Date and time entities")
73
- money: List[NamedEntity] = Field(description="Money and currency entities")
74
- percentages: List[NamedEntity] = Field(description="Percentage entities")
75
- miscellaneous: List[NamedEntity] = Field(description="Other named entities")
67
+ persons: list[NamedEntity] = Field(description="Person entities")
68
+ organizations: list[NamedEntity] = Field(description="Organization entities")
69
+ locations: list[NamedEntity] = Field(description="Location entities")
70
+ dates: list[NamedEntity] = Field(description="Date and time entities")
71
+ money: list[NamedEntity] = Field(description="Money and currency entities")
72
+ percentages: list[NamedEntity] = Field(description="Percentage entities")
73
+ miscellaneous: list[NamedEntity] = Field(description="Other named entities")
76
74
 
77
75
 
78
76
  NAMED_ENTITY_RECOGNITION = PreparedTask(
79
- instructions="Identify and classify named entities in the following text. Extract persons, organizations, locations, dates, money, percentages, and other miscellaneous entities with their positions and confidence scores.",
77
+ instructions="Identify and classify named entities in the following text. Extract persons, "
78
+ "organizations, locations, dates, money, percentages, and other miscellaneous entities "
79
+ "with their positions and confidence scores.",
80
80
  response_format=NamedEntityRecognition,
81
- temperature=0.0,
82
- top_p=1.0,
83
81
  )
@@ -8,7 +8,7 @@ Example:
8
8
 
9
9
  ```python
10
10
  from openai import OpenAI
11
- from openaivec.responses import BatchResponses
11
+ from openaivec import BatchResponses
12
12
  from openaivec.task import nlp
13
13
 
14
14
  client = OpenAI()
@@ -43,16 +43,16 @@ Example:
43
43
  ```
44
44
 
45
45
  Attributes:
46
- SENTIMENT_ANALYSIS (PreparedTask): A prepared task instance
47
- configured for sentiment analysis with temperature=0.0 and
48
- top_p=1.0 for deterministic output.
46
+ SENTIMENT_ANALYSIS (PreparedTask): A prepared task instance configured for sentiment
47
+ analysis. Provide ``temperature=0.0`` and ``top_p=1.0`` to API calls for
48
+ deterministic output.
49
49
  """
50
50
 
51
- from typing import List, Literal
51
+ from typing import Literal
52
52
 
53
53
  from pydantic import BaseModel, Field
54
54
 
55
- from ...model import PreparedTask
55
+ from openaivec._model import PreparedTask
56
56
 
57
57
  __all__ = ["SENTIMENT_ANALYSIS"]
58
58
 
@@ -62,17 +62,20 @@ class SentimentAnalysis(BaseModel):
62
62
  description="Overall sentiment (positive, negative, neutral)"
63
63
  )
64
64
  confidence: float = Field(description="Confidence score for sentiment (0.0-1.0)")
65
- emotions: List[Literal["joy", "sadness", "anger", "fear", "surprise", "disgust"]] = Field(
65
+ emotions: list[Literal["joy", "sadness", "anger", "fear", "surprise", "disgust"]] = Field(
66
66
  description="Detected emotions (joy, sadness, anger, fear, surprise, disgust)"
67
67
  )
68
- emotion_scores: List[float] = Field(description="Confidence scores for each emotion (0.0-1.0)")
68
+ emotion_scores: list[float] = Field(description="Confidence scores for each emotion (0.0-1.0)")
69
69
  polarity: float = Field(description="Polarity score from -1.0 (negative) to 1.0 (positive)")
70
70
  subjectivity: float = Field(description="Subjectivity score from 0.0 (objective) to 1.0 (subjective)")
71
71
 
72
72
 
73
73
  SENTIMENT_ANALYSIS = PreparedTask(
74
- instructions="Analyze the sentiment and emotions in the following text. Provide overall sentiment classification, confidence scores, detected emotions, polarity, and subjectivity measures.\n\nIMPORTANT: Provide all analysis in the same language as the input text, except for the predefined categorical fields (sentiment, emotions) which must use the exact English values specified (positive/negative/neutral for sentiment, and joy/sadness/anger/fear/surprise/disgust for emotions).",
74
+ instructions="Analyze the sentiment and emotions in the following text. Provide overall "
75
+ "sentiment classification, confidence scores, detected emotions, polarity, and subjectivity "
76
+ "measures.\n\nIMPORTANT: Provide all analysis in the same language as the input text, except "
77
+ "for the predefined categorical fields (sentiment, emotions) which must use the exact "
78
+ "English values specified (positive/negative/neutral for sentiment, and "
79
+ "joy/sadness/anger/fear/surprise/disgust for emotions).",
75
80
  response_format=SentimentAnalysis,
76
- temperature=0.0,
77
- top_p=1.0,
78
81
  )
@@ -13,7 +13,7 @@ Example:
13
13
 
14
14
  ```python
15
15
  from openai import OpenAI
16
- from openaivec.responses import BatchResponses
16
+ from openaivec import BatchResponses
17
17
  from openaivec.task import nlp
18
18
 
19
19
  client = OpenAI()
@@ -49,8 +49,8 @@ Example:
49
49
 
50
50
  Attributes:
51
51
  MULTILINGUAL_TRANSLATION (PreparedTask): A prepared task instance configured
52
- for multilingual translation with temperature=0.0 and top_p=1.0 for
53
- deterministic output.
52
+ for multilingual translation. Provide ``temperature=0.0`` and ``top_p=1.0``
53
+ to the calling API wrapper for deterministic output.
54
54
 
55
55
  Note:
56
56
  The translation covers 58 languages across major language families. All field
@@ -72,10 +72,9 @@ Note:
72
72
  - Other: Basque, Maltese
73
73
  """
74
74
 
75
- from openai import BaseModel
76
- from pydantic import Field
75
+ from pydantic import BaseModel, Field
77
76
 
78
- from ...model import PreparedTask
77
+ from openaivec._model import PreparedTask
79
78
 
80
79
  __all__ = ["MULTILINGUAL_TRANSLATION"]
81
80
 
@@ -157,6 +156,4 @@ class TranslatedString(BaseModel):
157
156
 
158
157
  instructions = "Translate the following text into multiple languages. "
159
158
 
160
- MULTILINGUAL_TRANSLATION = PreparedTask(
161
- instructions=instructions, response_format=TranslatedString, temperature=0.0, top_p=1.0
162
- )
159
+ MULTILINGUAL_TRANSLATION = PreparedTask(instructions=instructions, response_format=TranslatedString)
@@ -1,3 +1,3 @@
1
- from .fillna import fillna, FillNaResponse
1
+ from .fillna import FillNaResponse, fillna
2
2
 
3
- __all__ = ["fillna", "FillNaResponse"]
3
+ __all__ = ["fillna", "FillNaResponse"]
@@ -33,7 +33,7 @@ Example:
33
33
 
34
34
  ```python
35
35
  from openai import OpenAI
36
- from openaivec.responses import BatchResponses
36
+ from openaivec import BatchResponses
37
37
  from openaivec.task.table import fillna
38
38
 
39
39
  client = OpenAI()
@@ -65,21 +65,20 @@ Example:
65
65
  """
66
66
 
67
67
  import json
68
- from typing import Dict, List
69
68
 
70
69
  import pandas as pd
71
70
  from pydantic import BaseModel, Field
72
71
 
73
- from ...model import PreparedTask
74
- from ...prompt import FewShotPromptBuilder
72
+ from openaivec._model import PreparedTask
73
+ from openaivec._prompt import FewShotPromptBuilder
75
74
 
76
75
  __all__ = ["fillna", "FillNaResponse"]
77
76
 
78
77
 
79
- def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> List[Dict]:
80
- examples: List[Dict] = []
78
+ def get_examples(df: pd.DataFrame, target_column_name: str, max_examples: int) -> list[dict]:
79
+ examples: list[dict] = []
81
80
 
82
- samples: pd.DataFrame = df.sample(frac=1)
81
+ samples: pd.DataFrame = df.sample(frac=1).reset_index(drop=True).drop_duplicates()
83
82
  samples = samples.dropna(subset=[target_column_name])
84
83
 
85
84
  for i, row in samples.head(max_examples).iterrows():
@@ -109,7 +108,7 @@ def get_instructions(df: pd.DataFrame, target_column_name: str, max_examples: in
109
108
  output_value=json.dumps({"index": row["index"], "output": row["output"]}, ensure_ascii=False),
110
109
  )
111
110
 
112
- return builder.build()
111
+ return builder.improve().build()
113
112
 
114
113
 
115
114
  class FillNaResponse(BaseModel):
@@ -121,7 +120,8 @@ class FillNaResponse(BaseModel):
121
120
 
122
121
  index: int = Field(description="Index of the row in the original DataFrame")
123
122
  output: int | float | str | bool | None = Field(
124
- description="Filled value for the target column. This value should be JSON-compatible and match the target column type in the original DataFrame."
123
+ description="Filled value for the target column. This value should be JSON-compatible "
124
+ "and match the target column type in the original DataFrame."
125
125
  )
126
126
 
127
127
 
@@ -146,7 +146,7 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
146
146
  PreparedTask configured for missing value imputation with:
147
147
  - Instructions based on DataFrame patterns
148
148
  - FillNaResponse format for structured output
149
- - Temperature=0.0 and top_p=1.0 for deterministic results
149
+ - Default deterministic settings (temperature=0.0, top_p=1.0)
150
150
 
151
151
  Raises:
152
152
  ValueError: If target_column_name doesn't exist in DataFrame,
@@ -180,4 +180,4 @@ def fillna(df: pd.DataFrame, target_column_name: str, max_examples: int = 500) -
180
180
  if df[target_column_name].notna().sum() == 0:
181
181
  raise ValueError(f"Column '{target_column_name}' contains no non-null values for training examples.")
182
182
  instructions = get_instructions(df, target_column_name, max_examples)
183
- return PreparedTask(instructions=instructions, response_format=FillNaResponse, temperature=0.0, top_p=1.0)
183
+ return PreparedTask(instructions=instructions, response_format=FillNaResponse)