openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. openaivec/__init__.py +13 -4
  2. openaivec/_cache/__init__.py +12 -0
  3. openaivec/_cache/optimize.py +109 -0
  4. openaivec/_cache/proxy.py +806 -0
  5. openaivec/_di.py +326 -0
  6. openaivec/_embeddings.py +203 -0
  7. openaivec/{log.py → _log.py} +2 -2
  8. openaivec/_model.py +113 -0
  9. openaivec/{prompt.py → _prompt.py} +95 -28
  10. openaivec/_provider.py +207 -0
  11. openaivec/_responses.py +511 -0
  12. openaivec/_schema/__init__.py +9 -0
  13. openaivec/_schema/infer.py +340 -0
  14. openaivec/_schema/spec.py +350 -0
  15. openaivec/_serialize.py +234 -0
  16. openaivec/{util.py → _util.py} +25 -85
  17. openaivec/pandas_ext.py +1635 -425
  18. openaivec/spark.py +604 -335
  19. openaivec/task/__init__.py +27 -29
  20. openaivec/task/customer_support/__init__.py +9 -15
  21. openaivec/task/customer_support/customer_sentiment.py +51 -41
  22. openaivec/task/customer_support/inquiry_classification.py +86 -61
  23. openaivec/task/customer_support/inquiry_summary.py +44 -45
  24. openaivec/task/customer_support/intent_analysis.py +56 -41
  25. openaivec/task/customer_support/response_suggestion.py +49 -43
  26. openaivec/task/customer_support/urgency_analysis.py +76 -71
  27. openaivec/task/nlp/__init__.py +4 -4
  28. openaivec/task/nlp/dependency_parsing.py +19 -20
  29. openaivec/task/nlp/keyword_extraction.py +22 -24
  30. openaivec/task/nlp/morphological_analysis.py +25 -25
  31. openaivec/task/nlp/named_entity_recognition.py +26 -28
  32. openaivec/task/nlp/sentiment_analysis.py +29 -21
  33. openaivec/task/nlp/translation.py +24 -30
  34. openaivec/task/table/__init__.py +3 -0
  35. openaivec/task/table/fillna.py +183 -0
  36. openaivec-1.0.10.dist-info/METADATA +399 -0
  37. openaivec-1.0.10.dist-info/RECORD +39 -0
  38. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
  39. openaivec/embeddings.py +0 -172
  40. openaivec/responses.py +0 -392
  41. openaivec/serialize.py +0 -225
  42. openaivec/task/model.py +0 -84
  43. openaivec-0.10.0.dist-info/METADATA +0 -546
  44. openaivec-0.10.0.dist-info/RECORD +0 -29
  45. {openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0
openaivec/__init__.py CHANGED
@@ -1,9 +1,18 @@
1
- from .embeddings import BatchEmbeddings, AsyncBatchEmbeddings
2
- from .responses import BatchResponses, AsyncBatchResponses
1
+ from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
2
+ from ._model import PreparedTask
3
+ from ._prompt import FewShotPrompt, FewShotPromptBuilder
4
+ from ._responses import AsyncBatchResponses, BatchResponses
5
+ from ._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
3
6
 
4
7
  __all__ = [
5
- "BatchResponses",
8
+ "AsyncBatchEmbeddings",
6
9
  "AsyncBatchResponses",
7
10
  "BatchEmbeddings",
8
- "AsyncBatchEmbeddings",
11
+ "BatchResponses",
12
+ "FewShotPrompt",
13
+ "FewShotPromptBuilder",
14
+ "SchemaInferenceOutput",
15
+ "PreparedTask",
16
+ "SchemaInferenceInput",
17
+ "SchemaInferer",
9
18
  ]
@@ -0,0 +1,12 @@
1
+ """Caching utilities used across OpenAIVec."""
2
+
3
+ from .optimize import BatchSizeSuggester, PerformanceMetric
4
+ from .proxy import AsyncBatchingMapProxy, BatchingMapProxy, ProxyBase
5
+
6
+ __all__ = [
7
+ "AsyncBatchingMapProxy",
8
+ "BatchSizeSuggester",
9
+ "BatchingMapProxy",
10
+ "PerformanceMetric",
11
+ "ProxyBase",
12
+ ]
@@ -0,0 +1,109 @@
1
+ import threading
2
+ import time
3
+ from contextlib import contextmanager
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timezone
6
+
7
+ __all__ = []
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class PerformanceMetric:
12
+ duration: float
13
+ batch_size: int
14
+ executed_at: datetime
15
+ exception: BaseException | None = None
16
+
17
+
18
+ @dataclass
19
+ class BatchSizeSuggester:
20
+ current_batch_size: int = 10
21
+ min_batch_size: int = 10
22
+ min_duration: float = 30.0
23
+ max_duration: float = 60.0
24
+ step_ratio: float = 0.2
25
+ sample_size: int = 4
26
+ _history: list[PerformanceMetric] = field(default_factory=list)
27
+ _lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
28
+ _batch_size_changed_at: datetime | None = field(default=None, init=False)
29
+
30
+ def __post_init__(self) -> None:
31
+ if self.min_batch_size <= 0:
32
+ raise ValueError("min_batch_size must be > 0")
33
+ if self.current_batch_size < self.min_batch_size:
34
+ raise ValueError("current_batch_size must be >= min_batch_size")
35
+ if self.sample_size <= 0:
36
+ raise ValueError("sample_size must be > 0")
37
+ if self.step_ratio <= 0:
38
+ raise ValueError("step_ratio must be > 0")
39
+ if self.min_duration <= 0 or self.max_duration <= 0:
40
+ raise ValueError("min_duration and max_duration must be > 0")
41
+ if self.min_duration >= self.max_duration:
42
+ raise ValueError("min_duration must be < max_duration")
43
+
44
+ @contextmanager
45
+ def record(self, batch_size: int):
46
+ start_time = time.perf_counter()
47
+ executed_at = datetime.now(timezone.utc)
48
+ caught_exception: BaseException | None = None
49
+ try:
50
+ yield
51
+ except BaseException as e:
52
+ caught_exception = e
53
+ raise
54
+ finally:
55
+ duration = time.perf_counter() - start_time
56
+ with self._lock:
57
+ self._history.append(
58
+ PerformanceMetric(
59
+ duration=duration,
60
+ batch_size=batch_size,
61
+ executed_at=executed_at,
62
+ exception=caught_exception,
63
+ )
64
+ )
65
+
66
+ @property
67
+ def samples(self) -> list[PerformanceMetric]:
68
+ with self._lock:
69
+ selected: list[PerformanceMetric] = []
70
+ for metric in reversed(self._history):
71
+ if metric.exception is not None:
72
+ continue
73
+ if self._batch_size_changed_at and metric.executed_at < self._batch_size_changed_at:
74
+ continue
75
+ selected.append(metric)
76
+ if len(selected) >= self.sample_size:
77
+ break
78
+ return list(reversed(selected))
79
+
80
+ def clear_history(self):
81
+ with self._lock:
82
+ self._history.clear()
83
+
84
+ def suggest_batch_size(self) -> int:
85
+ selected = self.samples
86
+
87
+ if len(selected) < self.sample_size:
88
+ with self._lock:
89
+ return self.current_batch_size
90
+
91
+ average_duration = sum(m.duration for m in selected) / len(selected)
92
+
93
+ with self._lock:
94
+ current_size = self.current_batch_size
95
+
96
+ if average_duration < self.min_duration:
97
+ new_batch_size = int(current_size * (1 + self.step_ratio))
98
+ elif average_duration > self.max_duration:
99
+ new_batch_size = int(current_size * (1 - self.step_ratio))
100
+ else:
101
+ new_batch_size = current_size
102
+
103
+ new_batch_size = max(new_batch_size, self.min_batch_size)
104
+
105
+ if new_batch_size != self.current_batch_size:
106
+ self._batch_size_changed_at = datetime.now(timezone.utc)
107
+ self.current_batch_size = new_batch_size
108
+
109
+ return self.current_batch_size