semantis 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantis-0.1.0/MANIFEST.in +7 -0
- semantis-0.1.0/PKG-INFO +98 -0
- semantis-0.1.0/README.md +67 -0
- semantis-0.1.0/pyproject.toml +44 -0
- semantis-0.1.0/semantis/__init__.py +40 -0
- semantis-0.1.0/semantis/client.py +219 -0
- semantis-0.1.0/semantis/models.py +128 -0
- semantis-0.1.0/semantis.egg-info/PKG-INFO +98 -0
- semantis-0.1.0/semantis.egg-info/SOURCES.txt +11 -0
- semantis-0.1.0/semantis.egg-info/dependency_links.txt +1 -0
- semantis-0.1.0/semantis.egg-info/requires.txt +9 -0
- semantis-0.1.0/semantis.egg-info/top_level.txt +1 -0
- semantis-0.1.0/setup.cfg +4 -0
semantis-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: semantis
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for Semantis AI - Semantic LLM Cache
|
|
5
|
+
Author: Semantis AI
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://semantis.ai
|
|
8
|
+
Project-URL: Documentation, https://docs.semantis.ai
|
|
9
|
+
Project-URL: Repository, https://github.com/semantis-ai/semantis-python
|
|
10
|
+
Keywords: llm,cache,semantic,openai,gpt
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: httpx>=0.24.0
|
|
25
|
+
Provides-Extra: openai
|
|
26
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
30
|
+
Requires-Dist: respx; extra == "dev"
|
|
31
|
+
|
|
32
|
+
# Semantis Python SDK
|
|
33
|
+
|
|
34
|
+
Drop-in replacement for the OpenAI Python client with automatic semantic caching.
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install semantis
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For automatic OpenAI fallback when Semantis is unreachable:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install semantis[openai]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from semantis import SemantisCache
|
|
52
|
+
|
|
53
|
+
cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
|
|
54
|
+
|
|
55
|
+
# OpenAI-compatible interface
|
|
56
|
+
response = cache.chat.completions.create(
|
|
57
|
+
model="gpt-4o-mini",
|
|
58
|
+
messages=[{"role": "user", "content": "What is machine learning?"}],
|
|
59
|
+
)
|
|
60
|
+
print(response.choices[0].message.content)
|
|
61
|
+
print(f"Cache: {response.meta.hit} | Similarity: {response.meta.similarity}")
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Zero-Code Integration (Proxy Mode)
|
|
65
|
+
|
|
66
|
+
Point your existing OpenAI client at Semantis:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import openai
|
|
70
|
+
|
|
71
|
+
client = openai.OpenAI(
|
|
72
|
+
base_url="https://api.semantis.ai/v1",
|
|
73
|
+
api_key="sc-myorg-xxxxxxxx",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Existing code works unchanged - caching is transparent
|
|
77
|
+
response = client.chat.completions.create(
|
|
78
|
+
model="gpt-4o-mini",
|
|
79
|
+
messages=[{"role": "user", "content": "What is ML?"}],
|
|
80
|
+
)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Self-Hosted
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
cache = SemantisCache(
|
|
87
|
+
api_key="sc-myorg-xxxxxxxx",
|
|
88
|
+
base_url="http://localhost:8000",
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Features
|
|
93
|
+
|
|
94
|
+
- **OpenAI-compatible**: Drop-in replacement, same interface
|
|
95
|
+
- **Automatic retry**: Exponential backoff on 429/5xx errors
|
|
96
|
+
- **Fallback**: If Semantis is unreachable, falls back to direct OpenAI (with `[openai]` extra)
|
|
97
|
+
- **Cache metadata**: Every response includes `meta.hit`, `meta.similarity`, `meta.latency_ms`
|
|
98
|
+
- **Context manager**: Use with `with` for automatic cleanup
|
semantis-0.1.0/README.md
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Semantis Python SDK
|
|
2
|
+
|
|
3
|
+
Drop-in replacement for the OpenAI Python client with automatic semantic caching.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install semantis
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For automatic OpenAI fallback when Semantis is unreachable:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install semantis[openai]
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from semantis import SemantisCache
|
|
21
|
+
|
|
22
|
+
cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
|
|
23
|
+
|
|
24
|
+
# OpenAI-compatible interface
|
|
25
|
+
response = cache.chat.completions.create(
|
|
26
|
+
model="gpt-4o-mini",
|
|
27
|
+
messages=[{"role": "user", "content": "What is machine learning?"}],
|
|
28
|
+
)
|
|
29
|
+
print(response.choices[0].message.content)
|
|
30
|
+
print(f"Cache: {response.meta.hit} | Similarity: {response.meta.similarity}")
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Zero-Code Integration (Proxy Mode)
|
|
34
|
+
|
|
35
|
+
Point your existing OpenAI client at Semantis:
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import openai
|
|
39
|
+
|
|
40
|
+
client = openai.OpenAI(
|
|
41
|
+
base_url="https://api.semantis.ai/v1",
|
|
42
|
+
api_key="sc-myorg-xxxxxxxx",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Existing code works unchanged - caching is transparent
|
|
46
|
+
response = client.chat.completions.create(
|
|
47
|
+
model="gpt-4o-mini",
|
|
48
|
+
messages=[{"role": "user", "content": "What is ML?"}],
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Self-Hosted
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
cache = SemantisCache(
|
|
56
|
+
api_key="sc-myorg-xxxxxxxx",
|
|
57
|
+
base_url="http://localhost:8000",
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- **OpenAI-compatible**: Drop-in replacement, same interface
|
|
64
|
+
- **Automatic retry**: Exponential backoff on 429/5xx errors
|
|
65
|
+
- **Fallback**: If Semantis is unreachable, falls back to direct OpenAI (with `[openai]` extra)
|
|
66
|
+
- **Cache metadata**: Every response includes `meta.hit`, `meta.similarity`, `meta.latency_ms`
|
|
67
|
+
- **Context manager**: Use with `with` for automatic cleanup
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "semantis"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python SDK for Semantis AI - Semantic LLM Cache"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Semantis AI"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["llm", "cache", "semantic", "openai", "gpt"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
"Programming Language :: Python :: 3.12",
|
|
26
|
+
"Topic :: Software Development :: Libraries",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"httpx>=0.24.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
openai = ["openai>=1.0.0"]
|
|
35
|
+
dev = ["pytest", "pytest-asyncio", "respx"]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://semantis.ai"
|
|
39
|
+
Documentation = "https://docs.semantis.ai"
|
|
40
|
+
Repository = "https://github.com/semantis-ai/semantis-python"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["."]
|
|
44
|
+
include = ["semantis"]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Semantis AI Python SDK
|
|
3
|
+
|
|
4
|
+
Drop-in replacement for OpenAI with automatic semantic caching.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from semantis import SemantisCache
|
|
8
|
+
|
|
9
|
+
cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
|
|
10
|
+
response = cache.chat.completions.create(
|
|
11
|
+
model="gpt-4o-mini",
|
|
12
|
+
messages=[{"role": "user", "content": "What is ML?"}],
|
|
13
|
+
)
|
|
14
|
+
print(response.choices[0].message.content)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from semantis.client import SemantisCache
|
|
18
|
+
from semantis.models import (
|
|
19
|
+
ChatCompletion,
|
|
20
|
+
ChatCompletionMessage,
|
|
21
|
+
ChatCompletionChoice,
|
|
22
|
+
ChatCompletionChunk,
|
|
23
|
+
ChatCompletionChunkDelta,
|
|
24
|
+
ChatCompletionChunkChoice,
|
|
25
|
+
CacheMeta,
|
|
26
|
+
Usage,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__version__ = "0.1.0"
|
|
30
|
+
__all__ = [
|
|
31
|
+
"SemantisCache",
|
|
32
|
+
"ChatCompletion",
|
|
33
|
+
"ChatCompletionMessage",
|
|
34
|
+
"ChatCompletionChoice",
|
|
35
|
+
"ChatCompletionChunk",
|
|
36
|
+
"ChatCompletionChunkDelta",
|
|
37
|
+
"ChatCompletionChunkChoice",
|
|
38
|
+
"CacheMeta",
|
|
39
|
+
"Usage",
|
|
40
|
+
]
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Semantis AI Client - OpenAI-compatible interface with automatic semantic caching.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from typing import Generator, Iterator, Optional, List, Dict, Any, Union
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from semantis.models import ChatCompletion, ChatCompletionChunk
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_DEFAULT_BASE_URL = "https://api.semantis.ai"
|
|
15
|
+
_DEFAULT_TIMEOUT = 60.0
|
|
16
|
+
_MAX_RETRIES = 3
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _Completions:
|
|
20
|
+
"""Mirrors openai.chat.completions interface."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, client: "SemantisCache"):
|
|
23
|
+
self._client = client
|
|
24
|
+
|
|
25
|
+
def create(
|
|
26
|
+
self,
|
|
27
|
+
*,
|
|
28
|
+
model: str = "gpt-4o-mini",
|
|
29
|
+
messages: List[Dict[str, str]],
|
|
30
|
+
temperature: float = 0.2,
|
|
31
|
+
stream: bool = False,
|
|
32
|
+
ttl_seconds: int = 604800,
|
|
33
|
+
**kwargs,
|
|
34
|
+
) -> Union[ChatCompletion, Iterator["ChatCompletionChunk"]]:
|
|
35
|
+
"""Create a chat completion (with automatic semantic caching).
|
|
36
|
+
|
|
37
|
+
Accepts the same parameters as ``openai.chat.completions.create``.
|
|
38
|
+
When ``stream=True``, returns an iterator of ``ChatCompletionChunk``.
|
|
39
|
+
"""
|
|
40
|
+
payload: Dict[str, Any] = {
|
|
41
|
+
"model": model,
|
|
42
|
+
"messages": messages,
|
|
43
|
+
"temperature": temperature,
|
|
44
|
+
"stream": stream,
|
|
45
|
+
"ttl_seconds": ttl_seconds,
|
|
46
|
+
}
|
|
47
|
+
payload.update(kwargs)
|
|
48
|
+
|
|
49
|
+
if stream:
|
|
50
|
+
return self._client._post_stream("/v1/chat/completions", json=payload)
|
|
51
|
+
|
|
52
|
+
data = self._client._post("/v1/chat/completions", json=payload)
|
|
53
|
+
return ChatCompletion.from_dict(data)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class _Chat:
|
|
57
|
+
"""Mirrors openai.chat namespace."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, client: "SemantisCache"):
|
|
60
|
+
self.completions = _Completions(client)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SemantisCache:
|
|
64
|
+
"""Semantis AI SDK client.
|
|
65
|
+
|
|
66
|
+
Drop-in replacement for ``openai.OpenAI`` that routes requests through
|
|
67
|
+
the Semantis semantic cache.
|
|
68
|
+
|
|
69
|
+
Example::
|
|
70
|
+
|
|
71
|
+
from semantis import SemantisCache
|
|
72
|
+
|
|
73
|
+
cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
|
|
74
|
+
resp = cache.chat.completions.create(
|
|
75
|
+
model="gpt-4o-mini",
|
|
76
|
+
messages=[{"role": "user", "content": "What is ML?"}],
|
|
77
|
+
)
|
|
78
|
+
print(resp.choices[0].message.content)
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
api_key: str,
|
|
84
|
+
base_url: str = _DEFAULT_BASE_URL,
|
|
85
|
+
timeout: float = _DEFAULT_TIMEOUT,
|
|
86
|
+
max_retries: int = _MAX_RETRIES,
|
|
87
|
+
):
|
|
88
|
+
if not api_key:
|
|
89
|
+
raise ValueError("api_key is required")
|
|
90
|
+
self.api_key = api_key
|
|
91
|
+
self.base_url = base_url.rstrip("/")
|
|
92
|
+
self.timeout = timeout
|
|
93
|
+
self.max_retries = max_retries
|
|
94
|
+
self._http = httpx.Client(
|
|
95
|
+
base_url=self.base_url,
|
|
96
|
+
timeout=self.timeout,
|
|
97
|
+
headers={
|
|
98
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
99
|
+
"Content-Type": "application/json",
|
|
100
|
+
"User-Agent": "semantis-python/0.1.0",
|
|
101
|
+
},
|
|
102
|
+
)
|
|
103
|
+
self.chat = _Chat(self)
|
|
104
|
+
|
|
105
|
+
def _post(self, path: str, **kwargs) -> dict:
|
|
106
|
+
"""POST with retry + exponential backoff."""
|
|
107
|
+
last_exc: Optional[Exception] = None
|
|
108
|
+
for attempt in range(self.max_retries):
|
|
109
|
+
try:
|
|
110
|
+
resp = self._http.post(path, **kwargs)
|
|
111
|
+
if resp.status_code == 429:
|
|
112
|
+
wait = min(2 ** attempt, 8)
|
|
113
|
+
time.sleep(wait)
|
|
114
|
+
continue
|
|
115
|
+
resp.raise_for_status()
|
|
116
|
+
return resp.json()
|
|
117
|
+
except httpx.HTTPStatusError as e:
|
|
118
|
+
if e.response.status_code >= 500:
|
|
119
|
+
last_exc = e
|
|
120
|
+
time.sleep(min(2 ** attempt, 8))
|
|
121
|
+
continue
|
|
122
|
+
raise
|
|
123
|
+
except (httpx.ConnectError, httpx.ReadTimeout) as e:
|
|
124
|
+
last_exc = e
|
|
125
|
+
if attempt == self.max_retries - 1:
|
|
126
|
+
return self._openai_fallback(path, kwargs)
|
|
127
|
+
time.sleep(min(2 ** attempt, 8))
|
|
128
|
+
if last_exc:
|
|
129
|
+
raise last_exc
|
|
130
|
+
raise RuntimeError("Request failed after retries")
|
|
131
|
+
|
|
132
|
+
def _post_stream(self, path: str, **kwargs) -> Generator[ChatCompletionChunk, None, None]:
|
|
133
|
+
"""POST with SSE streaming. Yields ChatCompletionChunk objects."""
|
|
134
|
+
with self._http.stream("POST", path, **kwargs) as resp:
|
|
135
|
+
resp.raise_for_status()
|
|
136
|
+
for line in resp.iter_lines():
|
|
137
|
+
if not line or not line.startswith("data: "):
|
|
138
|
+
continue
|
|
139
|
+
data_str = line[6:]
|
|
140
|
+
if data_str.strip() == "[DONE]":
|
|
141
|
+
return
|
|
142
|
+
try:
|
|
143
|
+
data = json.loads(data_str)
|
|
144
|
+
yield ChatCompletionChunk.from_dict(data)
|
|
145
|
+
except json.JSONDecodeError:
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
def _openai_fallback(self, path: str, kwargs: dict) -> dict:
|
|
149
|
+
"""When Semantis is unreachable, fall back to direct OpenAI call."""
|
|
150
|
+
try:
|
|
151
|
+
import openai as _openai
|
|
152
|
+
payload = kwargs.get("json", {})
|
|
153
|
+
# Remove Semantis-specific params that OpenAI doesn't accept
|
|
154
|
+
openai_payload = {k: v for k, v in payload.items() if k not in ("ttl_seconds",)}
|
|
155
|
+
client = _openai.OpenAI()
|
|
156
|
+
resp = client.chat.completions.create(**openai_payload)
|
|
157
|
+
return {
|
|
158
|
+
"id": resp.id,
|
|
159
|
+
"object": resp.object,
|
|
160
|
+
"created": resp.created,
|
|
161
|
+
"model": resp.model,
|
|
162
|
+
"choices": [
|
|
163
|
+
{
|
|
164
|
+
"index": c.index,
|
|
165
|
+
"message": {"role": c.message.role, "content": c.message.content},
|
|
166
|
+
"finish_reason": c.finish_reason,
|
|
167
|
+
}
|
|
168
|
+
for c in resp.choices
|
|
169
|
+
],
|
|
170
|
+
"usage": {
|
|
171
|
+
"prompt_tokens": resp.usage.prompt_tokens if resp.usage else None,
|
|
172
|
+
"completion_tokens": resp.usage.completion_tokens if resp.usage else None,
|
|
173
|
+
"total_tokens": resp.usage.total_tokens if resp.usage else None,
|
|
174
|
+
},
|
|
175
|
+
"meta": {"hit": "fallback", "similarity": 0.0, "latency_ms": 0, "strategy": "openai_fallback"},
|
|
176
|
+
}
|
|
177
|
+
except ImportError:
|
|
178
|
+
raise RuntimeError(
|
|
179
|
+
"Semantis API unreachable and openai package not installed for fallback. "
|
|
180
|
+
"Install with: pip install semantis[openai]"
|
|
181
|
+
)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
raise RuntimeError(f"Both Semantis and OpenAI fallback failed: {e}")
|
|
184
|
+
|
|
185
|
+
# ── Convenience methods ──
|
|
186
|
+
|
|
187
|
+
def query(self, prompt: str, model: str = "gpt-4o-mini") -> dict:
|
|
188
|
+
"""Simple query interface (non-OpenAI-compatible)."""
|
|
189
|
+
resp = self._http.get("/query", params={"prompt": prompt, "model": model})
|
|
190
|
+
resp.raise_for_status()
|
|
191
|
+
return resp.json()
|
|
192
|
+
|
|
193
|
+
def health(self) -> dict:
|
|
194
|
+
"""Check Semantis API health."""
|
|
195
|
+
resp = self._http.get("/health")
|
|
196
|
+
resp.raise_for_status()
|
|
197
|
+
return resp.json()
|
|
198
|
+
|
|
199
|
+
def metrics(self) -> dict:
|
|
200
|
+
"""Get cache metrics for the authenticated tenant."""
|
|
201
|
+
resp = self._http.get("/metrics")
|
|
202
|
+
resp.raise_for_status()
|
|
203
|
+
return resp.json()
|
|
204
|
+
|
|
205
|
+
def close(self):
|
|
206
|
+
"""Close the underlying HTTP client."""
|
|
207
|
+
self._http.close()
|
|
208
|
+
|
|
209
|
+
def __enter__(self):
|
|
210
|
+
return self
|
|
211
|
+
|
|
212
|
+
def __exit__(self, *args):
|
|
213
|
+
self.close()
|
|
214
|
+
|
|
215
|
+
def __del__(self):
|
|
216
|
+
try:
|
|
217
|
+
self.close()
|
|
218
|
+
except Exception:
|
|
219
|
+
pass
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Response models mirroring OpenAI's API shape."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ChatCompletionMessage:
|
|
9
|
+
role: str
|
|
10
|
+
content: str
|
|
11
|
+
|
|
12
|
+
def to_dict(self) -> dict:
|
|
13
|
+
return {"role": self.role, "content": self.content}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Usage:
|
|
18
|
+
prompt_tokens: Optional[int] = None
|
|
19
|
+
completion_tokens: Optional[int] = None
|
|
20
|
+
total_tokens: Optional[int] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class CacheMeta:
|
|
25
|
+
hit: str = "miss"
|
|
26
|
+
similarity: float = 0.0
|
|
27
|
+
latency_ms: float = 0.0
|
|
28
|
+
strategy: str = "miss"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ChatCompletionChoice:
|
|
33
|
+
index: int
|
|
34
|
+
message: ChatCompletionMessage
|
|
35
|
+
finish_reason: str = "stop"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ChatCompletion:
|
|
40
|
+
id: str
|
|
41
|
+
object: str = "chat.completion"
|
|
42
|
+
created: int = 0
|
|
43
|
+
model: str = ""
|
|
44
|
+
choices: List[ChatCompletionChoice] = field(default_factory=list)
|
|
45
|
+
usage: Optional[Usage] = None
|
|
46
|
+
meta: Optional[CacheMeta] = None
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_dict(cls, data: dict) -> "ChatCompletion":
|
|
50
|
+
choices = []
|
|
51
|
+
for c in data.get("choices", []):
|
|
52
|
+
msg = c.get("message", {})
|
|
53
|
+
choices.append(ChatCompletionChoice(
|
|
54
|
+
index=c.get("index", 0),
|
|
55
|
+
message=ChatCompletionMessage(
|
|
56
|
+
role=msg.get("role", "assistant"),
|
|
57
|
+
content=msg.get("content", ""),
|
|
58
|
+
),
|
|
59
|
+
finish_reason=c.get("finish_reason", "stop"),
|
|
60
|
+
))
|
|
61
|
+
|
|
62
|
+
usage_data = data.get("usage")
|
|
63
|
+
usage = Usage(
|
|
64
|
+
prompt_tokens=usage_data.get("prompt_tokens") if usage_data else None,
|
|
65
|
+
completion_tokens=usage_data.get("completion_tokens") if usage_data else None,
|
|
66
|
+
total_tokens=usage_data.get("total_tokens") if usage_data else None,
|
|
67
|
+
) if usage_data else None
|
|
68
|
+
|
|
69
|
+
meta_data = data.get("meta")
|
|
70
|
+
meta = CacheMeta(
|
|
71
|
+
hit=meta_data.get("hit", "miss"),
|
|
72
|
+
similarity=meta_data.get("similarity", 0.0),
|
|
73
|
+
latency_ms=meta_data.get("latency_ms", 0.0),
|
|
74
|
+
strategy=meta_data.get("strategy", "miss"),
|
|
75
|
+
) if meta_data else None
|
|
76
|
+
|
|
77
|
+
return cls(
|
|
78
|
+
id=data.get("id", ""),
|
|
79
|
+
object=data.get("object", "chat.completion"),
|
|
80
|
+
created=data.get("created", 0),
|
|
81
|
+
model=data.get("model", ""),
|
|
82
|
+
choices=choices,
|
|
83
|
+
usage=usage,
|
|
84
|
+
meta=meta,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class ChatCompletionChunkDelta:
|
|
90
|
+
role: Optional[str] = None
|
|
91
|
+
content: Optional[str] = None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class ChatCompletionChunkChoice:
|
|
96
|
+
index: int = 0
|
|
97
|
+
delta: ChatCompletionChunkDelta = field(default_factory=ChatCompletionChunkDelta)
|
|
98
|
+
finish_reason: Optional[str] = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class ChatCompletionChunk:
|
|
103
|
+
id: str = ""
|
|
104
|
+
object: str = "chat.completion.chunk"
|
|
105
|
+
created: int = 0
|
|
106
|
+
model: str = ""
|
|
107
|
+
choices: List[ChatCompletionChunkChoice] = field(default_factory=list)
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def from_dict(cls, data: dict) -> "ChatCompletionChunk":
|
|
111
|
+
choices = []
|
|
112
|
+
for c in data.get("choices", []):
|
|
113
|
+
delta = c.get("delta", {})
|
|
114
|
+
choices.append(ChatCompletionChunkChoice(
|
|
115
|
+
index=c.get("index", 0),
|
|
116
|
+
delta=ChatCompletionChunkDelta(
|
|
117
|
+
role=delta.get("role"),
|
|
118
|
+
content=delta.get("content"),
|
|
119
|
+
),
|
|
120
|
+
finish_reason=c.get("finish_reason"),
|
|
121
|
+
))
|
|
122
|
+
return cls(
|
|
123
|
+
id=data.get("id", ""),
|
|
124
|
+
object=data.get("object", "chat.completion.chunk"),
|
|
125
|
+
created=data.get("created", 0),
|
|
126
|
+
model=data.get("model", ""),
|
|
127
|
+
choices=choices,
|
|
128
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: semantis
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for Semantis AI - Semantic LLM Cache
|
|
5
|
+
Author: Semantis AI
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://semantis.ai
|
|
8
|
+
Project-URL: Documentation, https://docs.semantis.ai
|
|
9
|
+
Project-URL: Repository, https://github.com/semantis-ai/semantis-python
|
|
10
|
+
Keywords: llm,cache,semantic,openai,gpt
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: httpx>=0.24.0
|
|
25
|
+
Provides-Extra: openai
|
|
26
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
30
|
+
Requires-Dist: respx; extra == "dev"
|
|
31
|
+
|
|
32
|
+
# Semantis Python SDK
|
|
33
|
+
|
|
34
|
+
Drop-in replacement for the OpenAI Python client with automatic semantic caching.
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install semantis
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For automatic OpenAI fallback when Semantis is unreachable:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install semantis[openai]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from semantis import SemantisCache
|
|
52
|
+
|
|
53
|
+
cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
|
|
54
|
+
|
|
55
|
+
# OpenAI-compatible interface
|
|
56
|
+
response = cache.chat.completions.create(
|
|
57
|
+
model="gpt-4o-mini",
|
|
58
|
+
messages=[{"role": "user", "content": "What is machine learning?"}],
|
|
59
|
+
)
|
|
60
|
+
print(response.choices[0].message.content)
|
|
61
|
+
print(f"Cache: {response.meta.hit} | Similarity: {response.meta.similarity}")
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Zero-Code Integration (Proxy Mode)
|
|
65
|
+
|
|
66
|
+
Point your existing OpenAI client at Semantis:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import openai
|
|
70
|
+
|
|
71
|
+
client = openai.OpenAI(
|
|
72
|
+
base_url="https://api.semantis.ai/v1",
|
|
73
|
+
api_key="sc-myorg-xxxxxxxx",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Existing code works unchanged - caching is transparent
|
|
77
|
+
response = client.chat.completions.create(
|
|
78
|
+
model="gpt-4o-mini",
|
|
79
|
+
messages=[{"role": "user", "content": "What is ML?"}],
|
|
80
|
+
)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Self-Hosted
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
cache = SemantisCache(
|
|
87
|
+
api_key="sc-myorg-xxxxxxxx",
|
|
88
|
+
base_url="http://localhost:8000",
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Features
|
|
93
|
+
|
|
94
|
+
- **OpenAI-compatible**: Drop-in replacement, same interface
|
|
95
|
+
- **Automatic retry**: Exponential backoff on 429/5xx errors
|
|
96
|
+
- **Fallback**: If Semantis is unreachable, falls back to direct OpenAI (with `[openai]` extra)
|
|
97
|
+
- **Cache metadata**: Every response includes `meta.hit`, `meta.similarity`, `meta.latency_ms`
|
|
98
|
+
- **Context manager**: Use with `with` for automatic cleanup
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
MANIFEST.in
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
semantis/__init__.py
|
|
5
|
+
semantis/client.py
|
|
6
|
+
semantis/models.py
|
|
7
|
+
semantis.egg-info/PKG-INFO
|
|
8
|
+
semantis.egg-info/SOURCES.txt
|
|
9
|
+
semantis.egg-info/dependency_links.txt
|
|
10
|
+
semantis.egg-info/requires.txt
|
|
11
|
+
semantis.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
semantis
|
semantis-0.1.0/setup.cfg
ADDED