PyPI - openaivec - Versions diffs - 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl - Mend

openaivec 0.10.0py3-none-any.whl → 1.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

openaivec/__init__.py +13 -4
openaivec/_cache/__init__.py +12 -0
openaivec/_cache/optimize.py +109 -0
openaivec/_cache/proxy.py +806 -0
openaivec/_di.py +326 -0
openaivec/_embeddings.py +203 -0
openaivec/{log.py → _log.py} +2 -2
openaivec/_model.py +113 -0
openaivec/{prompt.py → _prompt.py} +95 -28
openaivec/_provider.py +207 -0
openaivec/_responses.py +511 -0
openaivec/_schema/__init__.py +9 -0
openaivec/_schema/infer.py +340 -0
openaivec/_schema/spec.py +350 -0
openaivec/_serialize.py +234 -0
openaivec/{util.py → _util.py} +25 -85
openaivec/pandas_ext.py +1635 -425
openaivec/spark.py +604 -335
openaivec/task/__init__.py +27 -29
openaivec/task/customer_support/__init__.py +9 -15
openaivec/task/customer_support/customer_sentiment.py +51 -41
openaivec/task/customer_support/inquiry_classification.py +86 -61
openaivec/task/customer_support/inquiry_summary.py +44 -45
openaivec/task/customer_support/intent_analysis.py +56 -41
openaivec/task/customer_support/response_suggestion.py +49 -43
openaivec/task/customer_support/urgency_analysis.py +76 -71
openaivec/task/nlp/__init__.py +4 -4
openaivec/task/nlp/dependency_parsing.py +19 -20
openaivec/task/nlp/keyword_extraction.py +22 -24
openaivec/task/nlp/morphological_analysis.py +25 -25
openaivec/task/nlp/named_entity_recognition.py +26 -28
openaivec/task/nlp/sentiment_analysis.py +29 -21
openaivec/task/nlp/translation.py +24 -30
openaivec/task/table/__init__.py +3 -0
openaivec/task/table/fillna.py +183 -0
openaivec-1.0.10.dist-info/METADATA +399 -0
openaivec-1.0.10.dist-info/RECORD +39 -0
{openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/WHEEL +1 -1
openaivec/embeddings.py +0 -172
openaivec/responses.py +0 -392
openaivec/serialize.py +0 -225
openaivec/task/model.py +0 -84
openaivec-0.10.0.dist-info/METADATA +0 -546
openaivec-0.10.0.dist-info/RECORD +0 -29
{openaivec-0.10.0.dist-info → openaivec-1.0.10.dist-info}/licenses/LICENSE +0 -0

openaivec/__init__.py CHANGED Viewed

@@ -1,9 +1,18 @@
-from .embeddings import BatchEmbeddings, AsyncBatchEmbeddings
-from .responses import BatchResponses, AsyncBatchResponses
+from ._embeddings import AsyncBatchEmbeddings, BatchEmbeddings
+from ._model import PreparedTask
+from ._prompt import FewShotPrompt, FewShotPromptBuilder
+from ._responses import AsyncBatchResponses, BatchResponses
+from ._schema import SchemaInferenceInput, SchemaInferenceOutput, SchemaInferer
 __all__ = [
-    "BatchResponses",
+    "AsyncBatchEmbeddings",
     "AsyncBatchResponses",
     "BatchEmbeddings",
-    "AsyncBatchEmbeddings",
+    "BatchResponses",
+    "FewShotPrompt",
+    "FewShotPromptBuilder",
+    "SchemaInferenceOutput",
+    "PreparedTask",
+    "SchemaInferenceInput",
+    "SchemaInferer",
 ]

openaivec/_cache/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Caching utilities used across OpenAIVec."""
+from .optimize import BatchSizeSuggester, PerformanceMetric
+from .proxy import AsyncBatchingMapProxy, BatchingMapProxy, ProxyBase
+__all__ = [
+    "AsyncBatchingMapProxy",
+    "BatchSizeSuggester",
+    "BatchingMapProxy",
+    "PerformanceMetric",
+    "ProxyBase",
+]

openaivec/_cache/optimize.py ADDED Viewed

@@ -0,0 +1,109 @@
+import threading
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+__all__ = []
+@dataclass(frozen=True)
+class PerformanceMetric:
+    duration: float
+    batch_size: int
+    executed_at: datetime
+    exception: BaseException | None = None
+@dataclass
+class BatchSizeSuggester:
+    current_batch_size: int = 10
+    min_batch_size: int = 10
+    min_duration: float = 30.0
+    max_duration: float = 60.0
+    step_ratio: float = 0.2
+    sample_size: int = 4
+    _history: list[PerformanceMetric] = field(default_factory=list)
+    _lock: threading.RLock = field(default_factory=threading.RLock, repr=False)
+    _batch_size_changed_at: datetime | None = field(default=None, init=False)
+    def __post_init__(self) -> None:
+        if self.min_batch_size <= 0:
+            raise ValueError("min_batch_size must be > 0")
+        if self.current_batch_size < self.min_batch_size:
+            raise ValueError("current_batch_size must be >= min_batch_size")
+        if self.sample_size <= 0:
+            raise ValueError("sample_size must be > 0")
+        if self.step_ratio <= 0:
+            raise ValueError("step_ratio must be > 0")
+        if self.min_duration <= 0 or self.max_duration <= 0:
+            raise ValueError("min_duration and max_duration must be > 0")
+        if self.min_duration >= self.max_duration:
+            raise ValueError("min_duration must be < max_duration")
+    @contextmanager
+    def record(self, batch_size: int):
+        start_time = time.perf_counter()
+        executed_at = datetime.now(timezone.utc)
+        caught_exception: BaseException | None = None
+        try:
+            yield
+        except BaseException as e:
+            caught_exception = e
+            raise
+        finally:
+            duration = time.perf_counter() - start_time
+            with self._lock:
+                self._history.append(
+                    PerformanceMetric(
+                        duration=duration,
+                        batch_size=batch_size,
+                        executed_at=executed_at,
+                        exception=caught_exception,
+                    )
+                )
+    @property
+    def samples(self) -> list[PerformanceMetric]:
+        with self._lock:
+            selected: list[PerformanceMetric] = []
+            for metric in reversed(self._history):
+                if metric.exception is not None:
+                    continue
+                if self._batch_size_changed_at and metric.executed_at < self._batch_size_changed_at:
+                    continue
+                selected.append(metric)
+                if len(selected) >= self.sample_size:
+                    break
+            return list(reversed(selected))
+    def clear_history(self):
+        with self._lock:
+            self._history.clear()
+    def suggest_batch_size(self) -> int:
+        selected = self.samples
+        if len(selected) < self.sample_size:
+            with self._lock:
+                return self.current_batch_size
+        average_duration = sum(m.duration for m in selected) / len(selected)
+        with self._lock:
+            current_size = self.current_batch_size
+            if average_duration < self.min_duration:
+                new_batch_size = int(current_size * (1 + self.step_ratio))
+            elif average_duration > self.max_duration:
+                new_batch_size = int(current_size * (1 - self.step_ratio))
+            else:
+                new_batch_size = current_size
+            new_batch_size = max(new_batch_size, self.min_batch_size)
+            if new_batch_size != self.current_batch_size:
+                self._batch_size_changed_at = datetime.now(timezone.utc)
+                self.current_batch_size = new_batch_size
+            return self.current_batch_size

openaivec 0.10.0__py3-none-any.whl → 1.0.10__py3-none-any.whl

openaivec 0.10.0py3-none-any.whl → 1.0.10py3-none-any.whl