offline-intelligence 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""
|
|
2
|
+
offline_intelligence - Python bindings for Offline Intelligence Library
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
|
|
5
|
+
Pure-Python HTTP client that communicates with the Offline Intelligence
|
|
6
|
+
server (default port 9999). Supports streaming via Server-Sent Events.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import json
|
|
13
|
+
from typing import Any, Dict, Generator, Iterator, Optional
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import requests
|
|
17
|
+
from requests import Response
|
|
18
|
+
except ImportError as e:
|
|
19
|
+
raise ImportError(
|
|
20
|
+
"The 'requests' package is required. Install it with: pip install requests"
|
|
21
|
+
) from e
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.3"
|
|
24
|
+
__all__ = ["Config", "OfflineIntelligence", "OfflineIntelligenceException"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Exception
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
class OfflineIntelligenceException(Exception):
|
|
32
|
+
"""Raised when the Offline Intelligence server returns an error."""
|
|
33
|
+
def __init__(self, message: str, status_code: Optional[int] = None):
|
|
34
|
+
super().__init__(message)
|
|
35
|
+
self.status_code = status_code
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
# Config
|
|
40
|
+
# ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
class Config:
|
|
43
|
+
"""Configuration for the Offline Intelligence HTTP client."""
|
|
44
|
+
|
|
45
|
+
def __init__(self):
|
|
46
|
+
self.model_path: str = "default.gguf"
|
|
47
|
+
self.llama_bin: str = "llama-server"
|
|
48
|
+
self.llama_host: str = "127.0.0.1"
|
|
49
|
+
self.llama_port: int = 8081
|
|
50
|
+
self.backend_url: str = "http://127.0.0.1:8081"
|
|
51
|
+
self.openrouter_api_key: str = ""
|
|
52
|
+
self.ctx_size: int = 8192
|
|
53
|
+
self.batch_size: int = 256
|
|
54
|
+
self.threads: int = 6
|
|
55
|
+
self.gpu_layers: int = 20
|
|
56
|
+
self.health_timeout_seconds: int = 60
|
|
57
|
+
self.hot_swap_grace_seconds: int = 25
|
|
58
|
+
self.max_concurrent_streams: int = 4
|
|
59
|
+
self.prometheus_port: int = 9000
|
|
60
|
+
self.api_host: str = "127.0.0.1"
|
|
61
|
+
self.api_port: int = 9999
|
|
62
|
+
self.requests_per_second: int = 24
|
|
63
|
+
self.generate_timeout_seconds: int = 300
|
|
64
|
+
self.stream_timeout_seconds: int = 600
|
|
65
|
+
self.health_check_timeout_seconds: int = 90
|
|
66
|
+
self.queue_size: int = 100
|
|
67
|
+
self.queue_timeout_seconds: int = 30
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_env(cls) -> "Config":
|
|
71
|
+
"""Create a Config populated from environment variables."""
|
|
72
|
+
cfg = cls()
|
|
73
|
+
if v := os.getenv("MODEL_PATH"):
|
|
74
|
+
cfg.model_path = v
|
|
75
|
+
if v := os.getenv("LLAMA_BIN"):
|
|
76
|
+
cfg.llama_bin = v
|
|
77
|
+
if v := os.getenv("LLAMA_HOST"):
|
|
78
|
+
cfg.llama_host = v
|
|
79
|
+
if v := os.getenv("LLAMA_PORT"):
|
|
80
|
+
cfg.llama_port = int(v)
|
|
81
|
+
if v := os.getenv("BACKEND_URL"):
|
|
82
|
+
cfg.backend_url = v
|
|
83
|
+
if v := os.getenv("OPENROUTER_API_KEY"):
|
|
84
|
+
cfg.openrouter_api_key = v
|
|
85
|
+
if v := os.getenv("API_HOST"):
|
|
86
|
+
cfg.api_host = v
|
|
87
|
+
if v := os.getenv("API_PORT"):
|
|
88
|
+
cfg.api_port = int(v)
|
|
89
|
+
if v := os.getenv("CTX_SIZE"):
|
|
90
|
+
cfg.ctx_size = int(v)
|
|
91
|
+
if v := os.getenv("GPU_LAYERS"):
|
|
92
|
+
cfg.gpu_layers = int(v)
|
|
93
|
+
return cfg
|
|
94
|
+
|
|
95
|
+
def __repr__(self) -> str:
|
|
96
|
+
return (
|
|
97
|
+
f"Config(api_host={self.api_host!r}, api_port={self.api_port}, "
|
|
98
|
+
f"model_path={self.model_path!r})"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
# Main Client
|
|
104
|
+
# ---------------------------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
class OfflineIntelligence:
|
|
107
|
+
"""
|
|
108
|
+
HTTP client for the Offline Intelligence server.
|
|
109
|
+
|
|
110
|
+
Usage::
|
|
111
|
+
|
|
112
|
+
from offline_intelligence import Config, OfflineIntelligence
|
|
113
|
+
|
|
114
|
+
cfg = Config.from_env()
|
|
115
|
+
client = OfflineIntelligence(cfg)
|
|
116
|
+
|
|
117
|
+
print(client.health_check())
|
|
118
|
+
print(client.generate("Hello, world!"))
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
def __init__(self, config: Optional[Config] = None):
|
|
122
|
+
self.config = config or Config()
|
|
123
|
+
self.base_url = f"http://{self.config.api_host}:{self.config.api_port}"
|
|
124
|
+
self._session = requests.Session()
|
|
125
|
+
self._session.headers.update({"Content-Type": "application/json"})
|
|
126
|
+
|
|
127
|
+
def _get(self, path: str, timeout: Optional[int] = None) -> Any:
|
|
128
|
+
url = f"{self.base_url}{path}"
|
|
129
|
+
try:
|
|
130
|
+
resp = self._session.get(url, timeout=timeout or self.config.health_timeout_seconds)
|
|
131
|
+
resp.raise_for_status()
|
|
132
|
+
return resp.json()
|
|
133
|
+
except requests.HTTPError as e:
|
|
134
|
+
raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
|
|
135
|
+
except requests.RequestException as e:
|
|
136
|
+
raise OfflineIntelligenceException(str(e))
|
|
137
|
+
|
|
138
|
+
def _post(self, path: str, body: Optional[Dict] = None, timeout: Optional[int] = None) -> Any:
|
|
139
|
+
url = f"{self.base_url}{path}"
|
|
140
|
+
try:
|
|
141
|
+
resp = self._session.post(url, json=body, timeout=timeout or self.config.generate_timeout_seconds)
|
|
142
|
+
resp.raise_for_status()
|
|
143
|
+
return resp.json()
|
|
144
|
+
except requests.HTTPError as e:
|
|
145
|
+
raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
|
|
146
|
+
except requests.RequestException as e:
|
|
147
|
+
raise OfflineIntelligenceException(str(e))
|
|
148
|
+
|
|
149
|
+
def _delete(self, path: str, timeout: Optional[int] = None) -> Any:
|
|
150
|
+
url = f"{self.base_url}{path}"
|
|
151
|
+
try:
|
|
152
|
+
resp = self._session.delete(url, timeout=timeout or self.config.health_timeout_seconds)
|
|
153
|
+
resp.raise_for_status()
|
|
154
|
+
return resp.json()
|
|
155
|
+
except requests.HTTPError as e:
|
|
156
|
+
raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
|
|
157
|
+
except requests.RequestException as e:
|
|
158
|
+
raise OfflineIntelligenceException(str(e))
|
|
159
|
+
|
|
160
|
+
# ── Health & Status ────────────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
def health_check(self) -> Dict:
|
|
163
|
+
"""GET /healthz — returns server health status."""
|
|
164
|
+
return self._get("/healthz")
|
|
165
|
+
|
|
166
|
+
def get_status(self) -> Dict:
|
|
167
|
+
"""GET /admin/status — returns engine/model status."""
|
|
168
|
+
return self._get("/admin/status")
|
|
169
|
+
|
|
170
|
+
# ── Model Management ───────────────────────────────────────────────────
|
|
171
|
+
|
|
172
|
+
def load_model(self, model_path: str) -> Dict:
|
|
173
|
+
"""POST /admin/load — load a model by path."""
|
|
174
|
+
return self._post("/admin/load", {"model_path": model_path})
|
|
175
|
+
|
|
176
|
+
def stop_model(self) -> Dict:
|
|
177
|
+
"""POST /admin/stop — stop the running model."""
|
|
178
|
+
return self._post("/admin/stop")
|
|
179
|
+
|
|
180
|
+
# ── Generation ─────────────────────────────────────────────────────────
|
|
181
|
+
|
|
182
|
+
def generate(self, prompt: str, **options: Any) -> Dict:
|
|
183
|
+
"""POST /generate — generate a response (non-streaming)."""
|
|
184
|
+
body = {"prompt": prompt, **options}
|
|
185
|
+
return self._post("/generate", body, timeout=self.config.generate_timeout_seconds)
|
|
186
|
+
|
|
187
|
+
def generate_stream(self, prompt: str, **options: Any) -> Generator[str, None, None]:
|
|
188
|
+
"""
|
|
189
|
+
POST /generate/stream — stream a response via Server-Sent Events.
|
|
190
|
+
|
|
191
|
+
Yields each text chunk as a string.
|
|
192
|
+
|
|
193
|
+
Example::
|
|
194
|
+
|
|
195
|
+
for chunk in client.generate_stream("Tell me a story"):
|
|
196
|
+
print(chunk, end="", flush=True)
|
|
197
|
+
"""
|
|
198
|
+
url = f"{self.base_url}/generate/stream"
|
|
199
|
+
body = {"prompt": prompt, **options}
|
|
200
|
+
try:
|
|
201
|
+
with self._session.post(
|
|
202
|
+
url,
|
|
203
|
+
json=body,
|
|
204
|
+
stream=True,
|
|
205
|
+
timeout=self.config.stream_timeout_seconds,
|
|
206
|
+
) as resp:
|
|
207
|
+
resp.raise_for_status()
|
|
208
|
+
for line in resp.iter_lines(decode_unicode=True):
|
|
209
|
+
if not line:
|
|
210
|
+
continue
|
|
211
|
+
if line.startswith("data: "):
|
|
212
|
+
data = line[6:]
|
|
213
|
+
if data.strip() == "[DONE]":
|
|
214
|
+
return
|
|
215
|
+
try:
|
|
216
|
+
payload = json.loads(data)
|
|
217
|
+
# OpenAI-compatible format
|
|
218
|
+
if "choices" in payload:
|
|
219
|
+
delta = payload["choices"][0].get("delta", {})
|
|
220
|
+
content = delta.get("content", "")
|
|
221
|
+
if content:
|
|
222
|
+
yield content
|
|
223
|
+
elif "text" in payload:
|
|
224
|
+
yield payload["text"]
|
|
225
|
+
except json.JSONDecodeError:
|
|
226
|
+
yield data
|
|
227
|
+
except requests.HTTPError as e:
|
|
228
|
+
raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
|
|
229
|
+
except requests.RequestException as e:
|
|
230
|
+
raise OfflineIntelligenceException(str(e))
|
|
231
|
+
|
|
232
|
+
# ── Conversations ──────────────────────────────────────────────────────
|
|
233
|
+
|
|
234
|
+
def get_conversations(self) -> Dict:
|
|
235
|
+
"""GET /conversations — list all conversations."""
|
|
236
|
+
return self._get("/conversations")
|
|
237
|
+
|
|
238
|
+
def get_conversation(self, conversation_id: str) -> Dict:
|
|
239
|
+
"""GET /conversations/{id} — get a single conversation."""
|
|
240
|
+
return self._get(f"/conversations/{conversation_id}")
|
|
241
|
+
|
|
242
|
+
def delete_conversation(self, conversation_id: str) -> Dict:
|
|
243
|
+
"""DELETE /conversations/{id} — delete a conversation."""
|
|
244
|
+
return self._delete(f"/conversations/{conversation_id}")
|
|
245
|
+
|
|
246
|
+
def get_conversation_title(self, conversation_id: str) -> Dict:
|
|
247
|
+
"""GET /conversations/{id}/title — get conversation title."""
|
|
248
|
+
return self._get(f"/conversations/{conversation_id}/title")
|
|
249
|
+
|
|
250
|
+
def generate_title(self, session_id: str, first_message: str) -> Dict:
|
|
251
|
+
"""POST /generate/title — generate a title for a conversation."""
|
|
252
|
+
return self._post("/generate/title", {
|
|
253
|
+
"session_id": session_id,
|
|
254
|
+
"first_message": first_message,
|
|
255
|
+
})
|
|
256
|
+
|
|
257
|
+
# ── Memory ─────────────────────────────────────────────────────────────
|
|
258
|
+
|
|
259
|
+
def get_memory_stats(self, session_id: str) -> Dict:
|
|
260
|
+
"""GET /memory/stats/{session_id} — get memory statistics."""
|
|
261
|
+
return self._get(f"/memory/stats/{session_id}")
|
|
262
|
+
|
|
263
|
+
def optimize_memory(self) -> Dict:
|
|
264
|
+
"""POST /memory/optimize — trigger memory optimization."""
|
|
265
|
+
return self._post("/memory/optimize")
|
|
266
|
+
|
|
267
|
+
def cleanup_memory(self) -> Dict:
|
|
268
|
+
"""POST /memory/cleanup — clean up stale memory entries."""
|
|
269
|
+
return self._post("/memory/cleanup")
|
|
270
|
+
|
|
271
|
+
# ── Version ────────────────────────────────────────────────────────────
|
|
272
|
+
|
|
273
|
+
@staticmethod
|
|
274
|
+
def version() -> str:
|
|
275
|
+
return __version__
|
|
276
|
+
|
|
277
|
+
def __repr__(self) -> str:
|
|
278
|
+
return f"OfflineIntelligence(base_url={self.base_url!r})"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: offline-intelligence
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Python bindings for Offline Intelligence Library
|
|
5
|
+
Home-page: https://github.com/offline-intelligence/offline-intelligence
|
|
6
|
+
Author: Offline Intelligence Team
|
|
7
|
+
Author-email: team@offlineintelligence.com
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/offline-intelligence/offline-intelligence/issues
|
|
9
|
+
Project-URL: Source, https://github.com/offline-intelligence/offline-intelligence
|
|
10
|
+
Keywords: llm,ai,offline,intelligence,local-ai,http-client
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
Requires-Dist: requests>=2.28.0
|
|
26
|
+
Provides-Extra: streaming
|
|
27
|
+
Requires-Dist: sseclient-ng>=1.0.0; extra == "streaming"
|
|
28
|
+
Dynamic: author
|
|
29
|
+
Dynamic: author-email
|
|
30
|
+
Dynamic: classifier
|
|
31
|
+
Dynamic: description
|
|
32
|
+
Dynamic: description-content-type
|
|
33
|
+
Dynamic: home-page
|
|
34
|
+
Dynamic: keywords
|
|
35
|
+
Dynamic: project-url
|
|
36
|
+
Dynamic: provides-extra
|
|
37
|
+
Dynamic: requires-dist
|
|
38
|
+
Dynamic: requires-python
|
|
39
|
+
Dynamic: summary
|
|
40
|
+
|
|
41
|
+
# Offline Intelligence Python Bindings
|
|
42
|
+
|
|
43
|
+
Python bindings for the Offline Intelligence Library - High-performance LLM inference engine with memory management capabilities.
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install offline-intelligence
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quick Start
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from offline_intelligence import Config, run_server
|
|
55
|
+
|
|
56
|
+
# Configure the engine
|
|
57
|
+
config = Config.from_env()
|
|
58
|
+
|
|
59
|
+
# Start the server
|
|
60
|
+
run_server(config)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Features
|
|
64
|
+
|
|
65
|
+
- **Core LLM Integration**: Direct access to LLM engine functionality
|
|
66
|
+
- **Memory Management**: Base memory operations and database access
|
|
67
|
+
- **Configuration**: Flexible configuration system
|
|
68
|
+
- **Metrics**: Performance monitoring and telemetry
|
|
69
|
+
- **Proxy Interface**: Stream generation and API proxy functionality
|
|
70
|
+
|
|
71
|
+
## Architecture
|
|
72
|
+
|
|
73
|
+
This package provides bindings to the core open-source components (80%) of the Offline Intelligence system. Proprietary extensions are available separately.
|
|
74
|
+
|
|
75
|
+
## Platform Support
|
|
76
|
+
|
|
77
|
+
- Windows (x64)
|
|
78
|
+
- macOS (Intel/Apple Silicon)
|
|
79
|
+
- Linux (x64, ARM64)
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
Apache 2.0
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
offline_intelligence/__init__.py,sha256=VuNZsY84gVhJVQhmmhZXwlzgx5T1zaEPo9Rrd83b4dE,11747
|
|
2
|
+
offline_intelligence-0.1.3.dist-info/METADATA,sha256=a7DOxp64KA9AayFTfrU_VeTJEXAPyjsuNatyUxm2-sk,2688
|
|
3
|
+
offline_intelligence-0.1.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
4
|
+
offline_intelligence-0.1.3.dist-info/top_level.txt,sha256=qQOPMXLBQPADgwsI0Ta0hYwYNqTnD7kFSTqaW5tIzDs,21
|
|
5
|
+
offline_intelligence-0.1.3.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
offline_intelligence
|