semantis 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ include README.md
2
+ include pyproject.toml
3
+ recursive-include semantis *.py
4
+ recursive-exclude semantis_ai_semantic_cache_api_client *
5
+ exclude test_local.py
6
+ prune semantis_ai_semantic_cache_api_client
7
+ prune .ruff_cache
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: semantis
3
+ Version: 0.1.0
4
+ Summary: Python SDK for Semantis AI - Semantic LLM Cache
5
+ Author: Semantis AI
6
+ License: MIT
7
+ Project-URL: Homepage, https://semantis.ai
8
+ Project-URL: Documentation, https://docs.semantis.ai
9
+ Project-URL: Repository, https://github.com/semantis-ai/semantis-python
10
+ Keywords: llm,cache,semantic,openai,gpt
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: httpx>=0.24.0
25
+ Provides-Extra: openai
26
+ Requires-Dist: openai>=1.0.0; extra == "openai"
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest; extra == "dev"
29
+ Requires-Dist: pytest-asyncio; extra == "dev"
30
+ Requires-Dist: respx; extra == "dev"
31
+
32
+ # Semantis Python SDK
33
+
34
+ Drop-in replacement for the OpenAI Python client with automatic semantic caching.
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install semantis
40
+ ```
41
+
42
+ For automatic OpenAI fallback when Semantis is unreachable:
43
+
44
+ ```bash
45
+ pip install semantis[openai]
46
+ ```
47
+
48
+ ## Quick Start
49
+
50
+ ```python
51
+ from semantis import SemantisCache
52
+
53
+ cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
54
+
55
+ # OpenAI-compatible interface
56
+ response = cache.chat.completions.create(
57
+ model="gpt-4o-mini",
58
+ messages=[{"role": "user", "content": "What is machine learning?"}],
59
+ )
60
+ print(response.choices[0].message.content)
61
+ print(f"Cache: {response.meta.hit} | Similarity: {response.meta.similarity}")
62
+ ```
63
+
64
+ ## Zero-Code Integration (Proxy Mode)
65
+
66
+ Point your existing OpenAI client at Semantis:
67
+
68
+ ```python
69
+ import openai
70
+
71
+ client = openai.OpenAI(
72
+ base_url="https://api.semantis.ai/v1",
73
+ api_key="sc-myorg-xxxxxxxx",
74
+ )
75
+
76
+ # Existing code works unchanged - caching is transparent
77
+ response = client.chat.completions.create(
78
+ model="gpt-4o-mini",
79
+ messages=[{"role": "user", "content": "What is ML?"}],
80
+ )
81
+ ```
82
+
83
+ ## Self-Hosted
84
+
85
+ ```python
86
+ cache = SemantisCache(
87
+ api_key="sc-myorg-xxxxxxxx",
88
+ base_url="http://localhost:8000",
89
+ )
90
+ ```
91
+
92
+ ## Features
93
+
94
+ - **OpenAI-compatible**: Drop-in replacement, same interface
95
+ - **Automatic retry**: Exponential backoff on 429/5xx errors
96
+ - **Fallback**: If Semantis is unreachable, falls back to direct OpenAI (with `[openai]` extra)
97
+ - **Cache metadata**: Every response includes `meta.hit`, `meta.similarity`, `meta.latency_ms`
98
+ - **Context manager**: Use with `with` for automatic cleanup
@@ -0,0 +1,67 @@
1
+ # Semantis Python SDK
2
+
3
+ Drop-in replacement for the OpenAI Python client with automatic semantic caching.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install semantis
9
+ ```
10
+
11
+ For automatic OpenAI fallback when Semantis is unreachable:
12
+
13
+ ```bash
14
+ pip install semantis[openai]
15
+ ```
16
+
17
+ ## Quick Start
18
+
19
+ ```python
20
+ from semantis import SemantisCache
21
+
22
+ cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
23
+
24
+ # OpenAI-compatible interface
25
+ response = cache.chat.completions.create(
26
+ model="gpt-4o-mini",
27
+ messages=[{"role": "user", "content": "What is machine learning?"}],
28
+ )
29
+ print(response.choices[0].message.content)
30
+ print(f"Cache: {response.meta.hit} | Similarity: {response.meta.similarity}")
31
+ ```
32
+
33
+ ## Zero-Code Integration (Proxy Mode)
34
+
35
+ Point your existing OpenAI client at Semantis:
36
+
37
+ ```python
38
+ import openai
39
+
40
+ client = openai.OpenAI(
41
+ base_url="https://api.semantis.ai/v1",
42
+ api_key="sc-myorg-xxxxxxxx",
43
+ )
44
+
45
+ # Existing code works unchanged - caching is transparent
46
+ response = client.chat.completions.create(
47
+ model="gpt-4o-mini",
48
+ messages=[{"role": "user", "content": "What is ML?"}],
49
+ )
50
+ ```
51
+
52
+ ## Self-Hosted
53
+
54
+ ```python
55
+ cache = SemantisCache(
56
+ api_key="sc-myorg-xxxxxxxx",
57
+ base_url="http://localhost:8000",
58
+ )
59
+ ```
60
+
61
+ ## Features
62
+
63
+ - **OpenAI-compatible**: Drop-in replacement, same interface
64
+ - **Automatic retry**: Exponential backoff on 429/5xx errors
65
+ - **Fallback**: If Semantis is unreachable, falls back to direct OpenAI (with `[openai]` extra)
66
+ - **Cache metadata**: Every response includes `meta.hit`, `meta.similarity`, `meta.latency_ms`
67
+ - **Context manager**: Use with `with` for automatic cleanup
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "semantis"
7
+ version = "0.1.0"
8
+ description = "Python SDK for Semantis AI - Semantic LLM Cache"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ {name = "Semantis AI"},
14
+ ]
15
+ keywords = ["llm", "cache", "semantic", "openai", "gpt"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Topic :: Software Development :: Libraries",
27
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
28
+ ]
29
+ dependencies = [
30
+ "httpx>=0.24.0",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ openai = ["openai>=1.0.0"]
35
+ dev = ["pytest", "pytest-asyncio", "respx"]
36
+
37
+ [project.urls]
38
+ Homepage = "https://semantis.ai"
39
+ Documentation = "https://docs.semantis.ai"
40
+ Repository = "https://github.com/semantis-ai/semantis-python"
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["."]
44
+ include = ["semantis"]
@@ -0,0 +1,40 @@
1
+ """
2
+ Semantis AI Python SDK
3
+
4
+ Drop-in replacement for OpenAI with automatic semantic caching.
5
+
6
+ Usage:
7
+ from semantis import SemantisCache
8
+
9
+ cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
10
+ response = cache.chat.completions.create(
11
+ model="gpt-4o-mini",
12
+ messages=[{"role": "user", "content": "What is ML?"}],
13
+ )
14
+ print(response.choices[0].message.content)
15
+ """
16
+
17
+ from semantis.client import SemantisCache
18
+ from semantis.models import (
19
+ ChatCompletion,
20
+ ChatCompletionMessage,
21
+ ChatCompletionChoice,
22
+ ChatCompletionChunk,
23
+ ChatCompletionChunkDelta,
24
+ ChatCompletionChunkChoice,
25
+ CacheMeta,
26
+ Usage,
27
+ )
28
+
29
+ __version__ = "0.1.0"
30
+ __all__ = [
31
+ "SemantisCache",
32
+ "ChatCompletion",
33
+ "ChatCompletionMessage",
34
+ "ChatCompletionChoice",
35
+ "ChatCompletionChunk",
36
+ "ChatCompletionChunkDelta",
37
+ "ChatCompletionChunkChoice",
38
+ "CacheMeta",
39
+ "Usage",
40
+ ]
@@ -0,0 +1,219 @@
1
+ """
2
+ Semantis AI Client - OpenAI-compatible interface with automatic semantic caching.
3
+ """
4
+
5
+ import json
6
+ import time
7
+ from typing import Generator, Iterator, Optional, List, Dict, Any, Union
8
+
9
+ import httpx
10
+
11
+ from semantis.models import ChatCompletion, ChatCompletionChunk
12
+
13
+
14
+ _DEFAULT_BASE_URL = "https://api.semantis.ai"
15
+ _DEFAULT_TIMEOUT = 60.0
16
+ _MAX_RETRIES = 3
17
+
18
+
19
+ class _Completions:
20
+ """Mirrors openai.chat.completions interface."""
21
+
22
+ def __init__(self, client: "SemantisCache"):
23
+ self._client = client
24
+
25
+ def create(
26
+ self,
27
+ *,
28
+ model: str = "gpt-4o-mini",
29
+ messages: List[Dict[str, str]],
30
+ temperature: float = 0.2,
31
+ stream: bool = False,
32
+ ttl_seconds: int = 604800,
33
+ **kwargs,
34
+ ) -> Union[ChatCompletion, Iterator["ChatCompletionChunk"]]:
35
+ """Create a chat completion (with automatic semantic caching).
36
+
37
+ Accepts the same parameters as ``openai.chat.completions.create``.
38
+ When ``stream=True``, returns an iterator of ``ChatCompletionChunk``.
39
+ """
40
+ payload: Dict[str, Any] = {
41
+ "model": model,
42
+ "messages": messages,
43
+ "temperature": temperature,
44
+ "stream": stream,
45
+ "ttl_seconds": ttl_seconds,
46
+ }
47
+ payload.update(kwargs)
48
+
49
+ if stream:
50
+ return self._client._post_stream("/v1/chat/completions", json=payload)
51
+
52
+ data = self._client._post("/v1/chat/completions", json=payload)
53
+ return ChatCompletion.from_dict(data)
54
+
55
+
56
+ class _Chat:
57
+ """Mirrors openai.chat namespace."""
58
+
59
+ def __init__(self, client: "SemantisCache"):
60
+ self.completions = _Completions(client)
61
+
62
+
63
+ class SemantisCache:
64
+ """Semantis AI SDK client.
65
+
66
+ Drop-in replacement for ``openai.OpenAI`` that routes requests through
67
+ the Semantis semantic cache.
68
+
69
+ Example::
70
+
71
+ from semantis import SemantisCache
72
+
73
+ cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
74
+ resp = cache.chat.completions.create(
75
+ model="gpt-4o-mini",
76
+ messages=[{"role": "user", "content": "What is ML?"}],
77
+ )
78
+ print(resp.choices[0].message.content)
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ api_key: str,
84
+ base_url: str = _DEFAULT_BASE_URL,
85
+ timeout: float = _DEFAULT_TIMEOUT,
86
+ max_retries: int = _MAX_RETRIES,
87
+ ):
88
+ if not api_key:
89
+ raise ValueError("api_key is required")
90
+ self.api_key = api_key
91
+ self.base_url = base_url.rstrip("/")
92
+ self.timeout = timeout
93
+ self.max_retries = max_retries
94
+ self._http = httpx.Client(
95
+ base_url=self.base_url,
96
+ timeout=self.timeout,
97
+ headers={
98
+ "Authorization": f"Bearer {self.api_key}",
99
+ "Content-Type": "application/json",
100
+ "User-Agent": "semantis-python/0.1.0",
101
+ },
102
+ )
103
+ self.chat = _Chat(self)
104
+
105
+ def _post(self, path: str, **kwargs) -> dict:
106
+ """POST with retry + exponential backoff."""
107
+ last_exc: Optional[Exception] = None
108
+ for attempt in range(self.max_retries):
109
+ try:
110
+ resp = self._http.post(path, **kwargs)
111
+ if resp.status_code == 429:
112
+ wait = min(2 ** attempt, 8)
113
+ time.sleep(wait)
114
+ continue
115
+ resp.raise_for_status()
116
+ return resp.json()
117
+ except httpx.HTTPStatusError as e:
118
+ if e.response.status_code >= 500:
119
+ last_exc = e
120
+ time.sleep(min(2 ** attempt, 8))
121
+ continue
122
+ raise
123
+ except (httpx.ConnectError, httpx.ReadTimeout) as e:
124
+ last_exc = e
125
+ if attempt == self.max_retries - 1:
126
+ return self._openai_fallback(path, kwargs)
127
+ time.sleep(min(2 ** attempt, 8))
128
+ if last_exc:
129
+ raise last_exc
130
+ raise RuntimeError("Request failed after retries")
131
+
132
+ def _post_stream(self, path: str, **kwargs) -> Generator[ChatCompletionChunk, None, None]:
133
+ """POST with SSE streaming. Yields ChatCompletionChunk objects."""
134
+ with self._http.stream("POST", path, **kwargs) as resp:
135
+ resp.raise_for_status()
136
+ for line in resp.iter_lines():
137
+ if not line or not line.startswith("data: "):
138
+ continue
139
+ data_str = line[6:]
140
+ if data_str.strip() == "[DONE]":
141
+ return
142
+ try:
143
+ data = json.loads(data_str)
144
+ yield ChatCompletionChunk.from_dict(data)
145
+ except json.JSONDecodeError:
146
+ continue
147
+
148
+ def _openai_fallback(self, path: str, kwargs: dict) -> dict:
149
+ """When Semantis is unreachable, fall back to direct OpenAI call."""
150
+ try:
151
+ import openai as _openai
152
+ payload = kwargs.get("json", {})
153
+ # Remove Semantis-specific params that OpenAI doesn't accept
154
+ openai_payload = {k: v for k, v in payload.items() if k not in ("ttl_seconds",)}
155
+ client = _openai.OpenAI()
156
+ resp = client.chat.completions.create(**openai_payload)
157
+ return {
158
+ "id": resp.id,
159
+ "object": resp.object,
160
+ "created": resp.created,
161
+ "model": resp.model,
162
+ "choices": [
163
+ {
164
+ "index": c.index,
165
+ "message": {"role": c.message.role, "content": c.message.content},
166
+ "finish_reason": c.finish_reason,
167
+ }
168
+ for c in resp.choices
169
+ ],
170
+ "usage": {
171
+ "prompt_tokens": resp.usage.prompt_tokens if resp.usage else None,
172
+ "completion_tokens": resp.usage.completion_tokens if resp.usage else None,
173
+ "total_tokens": resp.usage.total_tokens if resp.usage else None,
174
+ },
175
+ "meta": {"hit": "fallback", "similarity": 0.0, "latency_ms": 0, "strategy": "openai_fallback"},
176
+ }
177
+ except ImportError:
178
+ raise RuntimeError(
179
+ "Semantis API unreachable and openai package not installed for fallback. "
180
+ "Install with: pip install semantis[openai]"
181
+ )
182
+ except Exception as e:
183
+ raise RuntimeError(f"Both Semantis and OpenAI fallback failed: {e}")
184
+
185
+ # ── Convenience methods ──
186
+
187
+ def query(self, prompt: str, model: str = "gpt-4o-mini") -> dict:
188
+ """Simple query interface (non-OpenAI-compatible)."""
189
+ resp = self._http.get("/query", params={"prompt": prompt, "model": model})
190
+ resp.raise_for_status()
191
+ return resp.json()
192
+
193
+ def health(self) -> dict:
194
+ """Check Semantis API health."""
195
+ resp = self._http.get("/health")
196
+ resp.raise_for_status()
197
+ return resp.json()
198
+
199
+ def metrics(self) -> dict:
200
+ """Get cache metrics for the authenticated tenant."""
201
+ resp = self._http.get("/metrics")
202
+ resp.raise_for_status()
203
+ return resp.json()
204
+
205
+ def close(self):
206
+ """Close the underlying HTTP client."""
207
+ self._http.close()
208
+
209
+ def __enter__(self):
210
+ return self
211
+
212
+ def __exit__(self, *args):
213
+ self.close()
214
+
215
+ def __del__(self):
216
+ try:
217
+ self.close()
218
+ except Exception:
219
+ pass
@@ -0,0 +1,128 @@
1
+ """Response models mirroring OpenAI's API shape."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional, List, Dict, Any
5
+
6
+
7
+ @dataclass
8
+ class ChatCompletionMessage:
9
+ role: str
10
+ content: str
11
+
12
+ def to_dict(self) -> dict:
13
+ return {"role": self.role, "content": self.content}
14
+
15
+
16
+ @dataclass
17
+ class Usage:
18
+ prompt_tokens: Optional[int] = None
19
+ completion_tokens: Optional[int] = None
20
+ total_tokens: Optional[int] = None
21
+
22
+
23
+ @dataclass
24
+ class CacheMeta:
25
+ hit: str = "miss"
26
+ similarity: float = 0.0
27
+ latency_ms: float = 0.0
28
+ strategy: str = "miss"
29
+
30
+
31
+ @dataclass
32
+ class ChatCompletionChoice:
33
+ index: int
34
+ message: ChatCompletionMessage
35
+ finish_reason: str = "stop"
36
+
37
+
38
+ @dataclass
39
+ class ChatCompletion:
40
+ id: str
41
+ object: str = "chat.completion"
42
+ created: int = 0
43
+ model: str = ""
44
+ choices: List[ChatCompletionChoice] = field(default_factory=list)
45
+ usage: Optional[Usage] = None
46
+ meta: Optional[CacheMeta] = None
47
+
48
+ @classmethod
49
+ def from_dict(cls, data: dict) -> "ChatCompletion":
50
+ choices = []
51
+ for c in data.get("choices", []):
52
+ msg = c.get("message", {})
53
+ choices.append(ChatCompletionChoice(
54
+ index=c.get("index", 0),
55
+ message=ChatCompletionMessage(
56
+ role=msg.get("role", "assistant"),
57
+ content=msg.get("content", ""),
58
+ ),
59
+ finish_reason=c.get("finish_reason", "stop"),
60
+ ))
61
+
62
+ usage_data = data.get("usage")
63
+ usage = Usage(
64
+ prompt_tokens=usage_data.get("prompt_tokens") if usage_data else None,
65
+ completion_tokens=usage_data.get("completion_tokens") if usage_data else None,
66
+ total_tokens=usage_data.get("total_tokens") if usage_data else None,
67
+ ) if usage_data else None
68
+
69
+ meta_data = data.get("meta")
70
+ meta = CacheMeta(
71
+ hit=meta_data.get("hit", "miss"),
72
+ similarity=meta_data.get("similarity", 0.0),
73
+ latency_ms=meta_data.get("latency_ms", 0.0),
74
+ strategy=meta_data.get("strategy", "miss"),
75
+ ) if meta_data else None
76
+
77
+ return cls(
78
+ id=data.get("id", ""),
79
+ object=data.get("object", "chat.completion"),
80
+ created=data.get("created", 0),
81
+ model=data.get("model", ""),
82
+ choices=choices,
83
+ usage=usage,
84
+ meta=meta,
85
+ )
86
+
87
+
88
+ @dataclass
89
+ class ChatCompletionChunkDelta:
90
+ role: Optional[str] = None
91
+ content: Optional[str] = None
92
+
93
+
94
+ @dataclass
95
+ class ChatCompletionChunkChoice:
96
+ index: int = 0
97
+ delta: ChatCompletionChunkDelta = field(default_factory=ChatCompletionChunkDelta)
98
+ finish_reason: Optional[str] = None
99
+
100
+
101
+ @dataclass
102
+ class ChatCompletionChunk:
103
+ id: str = ""
104
+ object: str = "chat.completion.chunk"
105
+ created: int = 0
106
+ model: str = ""
107
+ choices: List[ChatCompletionChunkChoice] = field(default_factory=list)
108
+
109
+ @classmethod
110
+ def from_dict(cls, data: dict) -> "ChatCompletionChunk":
111
+ choices = []
112
+ for c in data.get("choices", []):
113
+ delta = c.get("delta", {})
114
+ choices.append(ChatCompletionChunkChoice(
115
+ index=c.get("index", 0),
116
+ delta=ChatCompletionChunkDelta(
117
+ role=delta.get("role"),
118
+ content=delta.get("content"),
119
+ ),
120
+ finish_reason=c.get("finish_reason"),
121
+ ))
122
+ return cls(
123
+ id=data.get("id", ""),
124
+ object=data.get("object", "chat.completion.chunk"),
125
+ created=data.get("created", 0),
126
+ model=data.get("model", ""),
127
+ choices=choices,
128
+ )
@@ -0,0 +1,98 @@
1
+ Metadata-Version: 2.4
2
+ Name: semantis
3
+ Version: 0.1.0
4
+ Summary: Python SDK for Semantis AI - Semantic LLM Cache
5
+ Author: Semantis AI
6
+ License: MIT
7
+ Project-URL: Homepage, https://semantis.ai
8
+ Project-URL: Documentation, https://docs.semantis.ai
9
+ Project-URL: Repository, https://github.com/semantis-ai/semantis-python
10
+ Keywords: llm,cache,semantic,openai,gpt
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: httpx>=0.24.0
25
+ Provides-Extra: openai
26
+ Requires-Dist: openai>=1.0.0; extra == "openai"
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest; extra == "dev"
29
+ Requires-Dist: pytest-asyncio; extra == "dev"
30
+ Requires-Dist: respx; extra == "dev"
31
+
32
+ # Semantis Python SDK
33
+
34
+ Drop-in replacement for the OpenAI Python client with automatic semantic caching.
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install semantis
40
+ ```
41
+
42
+ For automatic OpenAI fallback when Semantis is unreachable:
43
+
44
+ ```bash
45
+ pip install semantis[openai]
46
+ ```
47
+
48
+ ## Quick Start
49
+
50
+ ```python
51
+ from semantis import SemantisCache
52
+
53
+ cache = SemantisCache(api_key="sc-myorg-xxxxxxxx")
54
+
55
+ # OpenAI-compatible interface
56
+ response = cache.chat.completions.create(
57
+ model="gpt-4o-mini",
58
+ messages=[{"role": "user", "content": "What is machine learning?"}],
59
+ )
60
+ print(response.choices[0].message.content)
61
+ print(f"Cache: {response.meta.hit} | Similarity: {response.meta.similarity}")
62
+ ```
63
+
64
+ ## Zero-Code Integration (Proxy Mode)
65
+
66
+ Point your existing OpenAI client at Semantis:
67
+
68
+ ```python
69
+ import openai
70
+
71
+ client = openai.OpenAI(
72
+ base_url="https://api.semantis.ai/v1",
73
+ api_key="sc-myorg-xxxxxxxx",
74
+ )
75
+
76
+ # Existing code works unchanged - caching is transparent
77
+ response = client.chat.completions.create(
78
+ model="gpt-4o-mini",
79
+ messages=[{"role": "user", "content": "What is ML?"}],
80
+ )
81
+ ```
82
+
83
+ ## Self-Hosted
84
+
85
+ ```python
86
+ cache = SemantisCache(
87
+ api_key="sc-myorg-xxxxxxxx",
88
+ base_url="http://localhost:8000",
89
+ )
90
+ ```
91
+
92
+ ## Features
93
+
94
+ - **OpenAI-compatible**: Drop-in replacement, same interface
95
+ - **Automatic retry**: Exponential backoff on 429/5xx errors
96
+ - **Fallback**: If Semantis is unreachable, falls back to direct OpenAI (with `[openai]` extra)
97
+ - **Cache metadata**: Every response includes `meta.hit`, `meta.similarity`, `meta.latency_ms`
98
+ - **Context manager**: Use with `with` for automatic cleanup
@@ -0,0 +1,11 @@
1
+ MANIFEST.in
2
+ README.md
3
+ pyproject.toml
4
+ semantis/__init__.py
5
+ semantis/client.py
6
+ semantis/models.py
7
+ semantis.egg-info/PKG-INFO
8
+ semantis.egg-info/SOURCES.txt
9
+ semantis.egg-info/dependency_links.txt
10
+ semantis.egg-info/requires.txt
11
+ semantis.egg-info/top_level.txt
@@ -0,0 +1,9 @@
1
+ httpx>=0.24.0
2
+
3
+ [dev]
4
+ pytest
5
+ pytest-asyncio
6
+ respx
7
+
8
+ [openai]
9
+ openai>=1.0.0
@@ -0,0 +1 @@
1
+ semantis
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+