openaivec 0.12.5__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openaivec/__init__.py +13 -4
- openaivec/_cache/__init__.py +12 -0
- openaivec/_cache/optimize.py +109 -0
- openaivec/_cache/proxy.py +806 -0
- openaivec/{di.py → _di.py} +36 -12
- openaivec/_embeddings.py +203 -0
- openaivec/{log.py → _log.py} +2 -2
- openaivec/_model.py +113 -0
- openaivec/{prompt.py → _prompt.py} +95 -28
- openaivec/_provider.py +207 -0
- openaivec/_responses.py +511 -0
- openaivec/_schema/__init__.py +9 -0
- openaivec/_schema/infer.py +340 -0
- openaivec/_schema/spec.py +350 -0
- openaivec/_serialize.py +234 -0
- openaivec/{util.py → _util.py} +25 -85
- openaivec/pandas_ext.py +1496 -318
- openaivec/spark.py +485 -183
- openaivec/task/__init__.py +9 -7
- openaivec/task/customer_support/__init__.py +9 -15
- openaivec/task/customer_support/customer_sentiment.py +17 -15
- openaivec/task/customer_support/inquiry_classification.py +23 -22
- openaivec/task/customer_support/inquiry_summary.py +14 -13
- openaivec/task/customer_support/intent_analysis.py +21 -19
- openaivec/task/customer_support/response_suggestion.py +16 -16
- openaivec/task/customer_support/urgency_analysis.py +24 -25
- openaivec/task/nlp/__init__.py +4 -4
- openaivec/task/nlp/dependency_parsing.py +10 -12
- openaivec/task/nlp/keyword_extraction.py +11 -14
- openaivec/task/nlp/morphological_analysis.py +12 -14
- openaivec/task/nlp/named_entity_recognition.py +16 -18
- openaivec/task/nlp/sentiment_analysis.py +14 -11
- openaivec/task/nlp/translation.py +6 -9
- openaivec/task/table/__init__.py +2 -2
- openaivec/task/table/fillna.py +11 -11
- openaivec-1.0.10.dist-info/METADATA +399 -0
- openaivec-1.0.10.dist-info/RECORD +39 -0
- {openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
- openaivec/embeddings.py +0 -172
- openaivec/model.py +0 -67
- openaivec/provider.py +0 -45
- openaivec/responses.py +0 -393
- openaivec/serialize.py +0 -225
- openaivec-0.12.5.dist-info/METADATA +0 -696
- openaivec-0.12.5.dist-info/RECORD +0 -33
- {openaivec-0.12.5.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
openaivec/__init__.py
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
1
|
+
from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
|
|
2
|
+
from ._model import PreparedTask
|
|
3
|
+
from ._prompt import FewShotPrompt, FewShotPromptBuilder
|
|
4
|
+
from ._responses import AsyncBatchResponses, BatchResponses
|
|
5
|
+
from ._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
|
|
3
6
|
|
|
4
7
|
__all__ = [
|
|
5
|
-
"
|
|
8
|
+
"AsyncBatchEmbeddings",
|
|
6
9
|
"AsyncBatchResponses",
|
|
7
10
|
"BatchEmbeddings",
|
|
8
|
-
"
|
|
11
|
+
"BatchResponses",
|
|
12
|
+
"FewShotPrompt",
|
|
13
|
+
"FewShotPromptBuilder",
|
|
14
|
+
"SchemaInferenceOutput",
|
|
15
|
+
"PreparedTask",
|
|
16
|
+
"SchemaInferenceInput",
|
|
17
|
+
"SchemaInferer",
|
|
9
18
|
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Caching utilities used across OpenAIVec."""
|
|
2
|
+
|
|
3
|
+
from .optimize import BatchSizeSuggester, PerformanceMetric
|
|
4
|
+
from .proxy import AsyncBatchingMapProxy, BatchingMapProxy, ProxyBase
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AsyncBatchingMapProxy",
|
|
8
|
+
"BatchSizeSuggester",
|
|
9
|
+
"BatchingMapProxy",
|
|
10
|
+
"PerformanceMetric",
|
|
11
|
+
"ProxyBase",
|
|
12
|
+
]
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import time
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
|
|
7
|
+
__all__ = []
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class PerformanceMetric:
|
|
12
|
+
duration: float
|
|
13
|
+
batch_size: int
|
|
14
|
+
executed_at: datetime
|
|
15
|
+
exception: BaseException | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class BatchSizeSuggester:
|
|
20
|
+
current_batch_size: int = 10
|
|
21
|
+
min_batch_size: int = 10
|
|
22
|
+
min_duration: float = 30.0
|
|
23
|
+
max_duration: float = 60.0
|
|
24
|
+
step_ratio: float = 0.2
|
|
25
|
+
sample_size: int = 4
|
|
26
|
+
_history: list[PerformanceMetric] = field(default_factory=list)
|
|
27
|
+
_lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
|
|
28
|
+
_batch_size_changed_at: datetime | None = field(default=None, init=False)
|
|
29
|
+
|
|
30
|
+
def __post_init__(self) -> None:
|
|
31
|
+
if self.min_batch_size <= 0:
|
|
32
|
+
raise ValueError("min_batch_size must be > 0")
|
|
33
|
+
if self.current_batch_size < self.min_batch_size:
|
|
34
|
+
raise ValueError("current_batch_size must be >= min_batch_size")
|
|
35
|
+
if self.sample_size <= 0:
|
|
36
|
+
raise ValueError("sample_size must be > 0")
|
|
37
|
+
if self.step_ratio <= 0:
|
|
38
|
+
raise ValueError("step_ratio must be > 0")
|
|
39
|
+
if self.min_duration <= 0 or self.max_duration <= 0:
|
|
40
|
+
raise ValueError("min_duration and max_duration must be > 0")
|
|
41
|
+
if self.min_duration >= self.max_duration:
|
|
42
|
+
raise ValueError("min_duration must be < max_duration")
|
|
43
|
+
|
|
44
|
+
@contextmanager
|
|
45
|
+
def record(self, batch_size: int):
|
|
46
|
+
start_time = time.perf_counter()
|
|
47
|
+
executed_at = datetime.now(timezone.utc)
|
|
48
|
+
caught_exception: BaseException | None = None
|
|
49
|
+
try:
|
|
50
|
+
yield
|
|
51
|
+
except BaseException as e:
|
|
52
|
+
caught_exception = e
|
|
53
|
+
raise
|
|
54
|
+
finally:
|
|
55
|
+
duration = time.perf_counter() - start_time
|
|
56
|
+
with self._lock:
|
|
57
|
+
self._history.append(
|
|
58
|
+
PerformanceMetric(
|
|
59
|
+
duration=duration,
|
|
60
|
+
batch_size=batch_size,
|
|
61
|
+
executed_at=executed_at,
|
|
62
|
+
exception=caught_exception,
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def samples(self) -> list[PerformanceMetric]:
|
|
68
|
+
with self._lock:
|
|
69
|
+
selected: list[PerformanceMetric] = []
|
|
70
|
+
for metric in reversed(self._history):
|
|
71
|
+
if metric.exception is not None:
|
|
72
|
+
continue
|
|
73
|
+
if self._batch_size_changed_at and metric.executed_at < self._batch_size_changed_at:
|
|
74
|
+
continue
|
|
75
|
+
selected.append(metric)
|
|
76
|
+
if len(selected) >= self.sample_size:
|
|
77
|
+
break
|
|
78
|
+
return list(reversed(selected))
|
|
79
|
+
|
|
80
|
+
def clear_history(self):
|
|
81
|
+
with self._lock:
|
|
82
|
+
self._history.clear()
|
|
83
|
+
|
|
84
|
+
def suggest_batch_size(self) -> int:
|
|
85
|
+
selected = self.samples
|
|
86
|
+
|
|
87
|
+
if len(selected) < self.sample_size:
|
|
88
|
+
with self._lock:
|
|
89
|
+
return self.current_batch_size
|
|
90
|
+
|
|
91
|
+
average_duration = sum(m.duration for m in selected) / len(selected)
|
|
92
|
+
|
|
93
|
+
with self._lock:
|
|
94
|
+
current_size = self.current_batch_size
|
|
95
|
+
|
|
96
|
+
if average_duration < self.min_duration:
|
|
97
|
+
new_batch_size = int(current_size * (1 + self.step_ratio))
|
|
98
|
+
elif average_duration > self.max_duration:
|
|
99
|
+
new_batch_size = int(current_size * (1 - self.step_ratio))
|
|
100
|
+
else:
|
|
101
|
+
new_batch_size = current_size
|
|
102
|
+
|
|
103
|
+
new_batch_size = max(new_batch_size, self.min_batch_size)
|
|
104
|
+
|
|
105
|
+
if new_batch_size != self.current_batch_size:
|
|
106
|
+
self._batch_size_changed_at = datetime.now(timezone.utc)
|
|
107
|
+
self.current_batch_size = new_batch_size
|
|
108
|
+
|
|
109
|
+
return self.current_batch_size
|