offline-intelligence 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,278 @@
1
+ """
2
+ offline_intelligence - Python bindings for Offline Intelligence Library
3
+ Version: 0.1.3
4
+
5
+ Pure-Python HTTP client that communicates with the Offline Intelligence
6
+ server (default port 9999). Supports streaming via Server-Sent Events.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import json
13
+ from typing import Any, Dict, Generator, Iterator, Optional
14
+
15
+ try:
16
+ import requests
17
+ from requests import Response
18
+ except ImportError as e:
19
+ raise ImportError(
20
+ "The 'requests' package is required. Install it with: pip install requests"
21
+ ) from e
22
+
23
+ __version__ = "0.1.3"
24
+ __all__ = ["Config", "OfflineIntelligence", "OfflineIntelligenceException"]
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Exception
29
+ # ---------------------------------------------------------------------------
30
+
31
+ class OfflineIntelligenceException(Exception):
32
+ """Raised when the Offline Intelligence server returns an error."""
33
+ def __init__(self, message: str, status_code: Optional[int] = None):
34
+ super().__init__(message)
35
+ self.status_code = status_code
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Config
40
+ # ---------------------------------------------------------------------------
41
+
42
+ class Config:
43
+ """Configuration for the Offline Intelligence HTTP client."""
44
+
45
+ def __init__(self):
46
+ self.model_path: str = "default.gguf"
47
+ self.llama_bin: str = "llama-server"
48
+ self.llama_host: str = "127.0.0.1"
49
+ self.llama_port: int = 8081
50
+ self.backend_url: str = "http://127.0.0.1:8081"
51
+ self.openrouter_api_key: str = ""
52
+ self.ctx_size: int = 8192
53
+ self.batch_size: int = 256
54
+ self.threads: int = 6
55
+ self.gpu_layers: int = 20
56
+ self.health_timeout_seconds: int = 60
57
+ self.hot_swap_grace_seconds: int = 25
58
+ self.max_concurrent_streams: int = 4
59
+ self.prometheus_port: int = 9000
60
+ self.api_host: str = "127.0.0.1"
61
+ self.api_port: int = 9999
62
+ self.requests_per_second: int = 24
63
+ self.generate_timeout_seconds: int = 300
64
+ self.stream_timeout_seconds: int = 600
65
+ self.health_check_timeout_seconds: int = 90
66
+ self.queue_size: int = 100
67
+ self.queue_timeout_seconds: int = 30
68
+
69
+ @classmethod
70
+ def from_env(cls) -> "Config":
71
+ """Create a Config populated from environment variables."""
72
+ cfg = cls()
73
+ if v := os.getenv("MODEL_PATH"):
74
+ cfg.model_path = v
75
+ if v := os.getenv("LLAMA_BIN"):
76
+ cfg.llama_bin = v
77
+ if v := os.getenv("LLAMA_HOST"):
78
+ cfg.llama_host = v
79
+ if v := os.getenv("LLAMA_PORT"):
80
+ cfg.llama_port = int(v)
81
+ if v := os.getenv("BACKEND_URL"):
82
+ cfg.backend_url = v
83
+ if v := os.getenv("OPENROUTER_API_KEY"):
84
+ cfg.openrouter_api_key = v
85
+ if v := os.getenv("API_HOST"):
86
+ cfg.api_host = v
87
+ if v := os.getenv("API_PORT"):
88
+ cfg.api_port = int(v)
89
+ if v := os.getenv("CTX_SIZE"):
90
+ cfg.ctx_size = int(v)
91
+ if v := os.getenv("GPU_LAYERS"):
92
+ cfg.gpu_layers = int(v)
93
+ return cfg
94
+
95
+ def __repr__(self) -> str:
96
+ return (
97
+ f"Config(api_host={self.api_host!r}, api_port={self.api_port}, "
98
+ f"model_path={self.model_path!r})"
99
+ )
100
+
101
+
102
+ # ---------------------------------------------------------------------------
103
+ # Main Client
104
+ # ---------------------------------------------------------------------------
105
+
106
+ class OfflineIntelligence:
107
+ """
108
+ HTTP client for the Offline Intelligence server.
109
+
110
+ Usage::
111
+
112
+ from offline_intelligence import Config, OfflineIntelligence
113
+
114
+ cfg = Config.from_env()
115
+ client = OfflineIntelligence(cfg)
116
+
117
+ print(client.health_check())
118
+ print(client.generate("Hello, world!"))
119
+ """
120
+
121
+ def __init__(self, config: Optional[Config] = None):
122
+ self.config = config or Config()
123
+ self.base_url = f"http://{self.config.api_host}:{self.config.api_port}"
124
+ self._session = requests.Session()
125
+ self._session.headers.update({"Content-Type": "application/json"})
126
+
127
+ def _get(self, path: str, timeout: Optional[int] = None) -> Any:
128
+ url = f"{self.base_url}{path}"
129
+ try:
130
+ resp = self._session.get(url, timeout=timeout or self.config.health_timeout_seconds)
131
+ resp.raise_for_status()
132
+ return resp.json()
133
+ except requests.HTTPError as e:
134
+ raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
135
+ except requests.RequestException as e:
136
+ raise OfflineIntelligenceException(str(e))
137
+
138
+ def _post(self, path: str, body: Optional[Dict] = None, timeout: Optional[int] = None) -> Any:
139
+ url = f"{self.base_url}{path}"
140
+ try:
141
+ resp = self._session.post(url, json=body, timeout=timeout or self.config.generate_timeout_seconds)
142
+ resp.raise_for_status()
143
+ return resp.json()
144
+ except requests.HTTPError as e:
145
+ raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
146
+ except requests.RequestException as e:
147
+ raise OfflineIntelligenceException(str(e))
148
+
149
+ def _delete(self, path: str, timeout: Optional[int] = None) -> Any:
150
+ url = f"{self.base_url}{path}"
151
+ try:
152
+ resp = self._session.delete(url, timeout=timeout or self.config.health_timeout_seconds)
153
+ resp.raise_for_status()
154
+ return resp.json()
155
+ except requests.HTTPError as e:
156
+ raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
157
+ except requests.RequestException as e:
158
+ raise OfflineIntelligenceException(str(e))
159
+
160
+ # ── Health & Status ────────────────────────────────────────────────────
161
+
162
+ def health_check(self) -> Dict:
163
+ """GET /healthz — returns server health status."""
164
+ return self._get("/healthz")
165
+
166
+ def get_status(self) -> Dict:
167
+ """GET /admin/status — returns engine/model status."""
168
+ return self._get("/admin/status")
169
+
170
+ # ── Model Management ───────────────────────────────────────────────────
171
+
172
+ def load_model(self, model_path: str) -> Dict:
173
+ """POST /admin/load — load a model by path."""
174
+ return self._post("/admin/load", {"model_path": model_path})
175
+
176
+ def stop_model(self) -> Dict:
177
+ """POST /admin/stop — stop the running model."""
178
+ return self._post("/admin/stop")
179
+
180
+ # ── Generation ─────────────────────────────────────────────────────────
181
+
182
+ def generate(self, prompt: str, **options: Any) -> Dict:
183
+ """POST /generate — generate a response (non-streaming)."""
184
+ body = {"prompt": prompt, **options}
185
+ return self._post("/generate", body, timeout=self.config.generate_timeout_seconds)
186
+
187
+ def generate_stream(self, prompt: str, **options: Any) -> Generator[str, None, None]:
188
+ """
189
+ POST /generate/stream — stream a response via Server-Sent Events.
190
+
191
+ Yields each text chunk as a string.
192
+
193
+ Example::
194
+
195
+ for chunk in client.generate_stream("Tell me a story"):
196
+ print(chunk, end="", flush=True)
197
+ """
198
+ url = f"{self.base_url}/generate/stream"
199
+ body = {"prompt": prompt, **options}
200
+ try:
201
+ with self._session.post(
202
+ url,
203
+ json=body,
204
+ stream=True,
205
+ timeout=self.config.stream_timeout_seconds,
206
+ ) as resp:
207
+ resp.raise_for_status()
208
+ for line in resp.iter_lines(decode_unicode=True):
209
+ if not line:
210
+ continue
211
+ if line.startswith("data: "):
212
+ data = line[6:]
213
+ if data.strip() == "[DONE]":
214
+ return
215
+ try:
216
+ payload = json.loads(data)
217
+ # OpenAI-compatible format
218
+ if "choices" in payload:
219
+ delta = payload["choices"][0].get("delta", {})
220
+ content = delta.get("content", "")
221
+ if content:
222
+ yield content
223
+ elif "text" in payload:
224
+ yield payload["text"]
225
+ except json.JSONDecodeError:
226
+ yield data
227
+ except requests.HTTPError as e:
228
+ raise OfflineIntelligenceException(str(e), e.response.status_code if e.response else None)
229
+ except requests.RequestException as e:
230
+ raise OfflineIntelligenceException(str(e))
231
+
232
+ # ── Conversations ──────────────────────────────────────────────────────
233
+
234
+ def get_conversations(self) -> Dict:
235
+ """GET /conversations — list all conversations."""
236
+ return self._get("/conversations")
237
+
238
+ def get_conversation(self, conversation_id: str) -> Dict:
239
+ """GET /conversations/{id} — get a single conversation."""
240
+ return self._get(f"/conversations/{conversation_id}")
241
+
242
+ def delete_conversation(self, conversation_id: str) -> Dict:
243
+ """DELETE /conversations/{id} — delete a conversation."""
244
+ return self._delete(f"/conversations/{conversation_id}")
245
+
246
+ def get_conversation_title(self, conversation_id: str) -> Dict:
247
+ """GET /conversations/{id}/title — get conversation title."""
248
+ return self._get(f"/conversations/{conversation_id}/title")
249
+
250
+ def generate_title(self, session_id: str, first_message: str) -> Dict:
251
+ """POST /generate/title — generate a title for a conversation."""
252
+ return self._post("/generate/title", {
253
+ "session_id": session_id,
254
+ "first_message": first_message,
255
+ })
256
+
257
+ # ── Memory ─────────────────────────────────────────────────────────────
258
+
259
+ def get_memory_stats(self, session_id: str) -> Dict:
260
+ """GET /memory/stats/{session_id} — get memory statistics."""
261
+ return self._get(f"/memory/stats/{session_id}")
262
+
263
+ def optimize_memory(self) -> Dict:
264
+ """POST /memory/optimize — trigger memory optimization."""
265
+ return self._post("/memory/optimize")
266
+
267
+ def cleanup_memory(self) -> Dict:
268
+ """POST /memory/cleanup — clean up stale memory entries."""
269
+ return self._post("/memory/cleanup")
270
+
271
+ # ── Version ────────────────────────────────────────────────────────────
272
+
273
+ @staticmethod
274
+ def version() -> str:
275
+ return __version__
276
+
277
+ def __repr__(self) -> str:
278
+ return f"OfflineIntelligence(base_url={self.base_url!r})"
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.4
2
+ Name: offline-intelligence
3
+ Version: 0.1.3
4
+ Summary: Python bindings for Offline Intelligence Library
5
+ Home-page: https://github.com/offline-intelligence/offline-intelligence
6
+ Author: Offline Intelligence Team
7
+ Author-email: team@offlineintelligence.com
8
+ Project-URL: Bug Tracker, https://github.com/offline-intelligence/offline-intelligence/issues
9
+ Project-URL: Source, https://github.com/offline-intelligence/offline-intelligence
10
+ Keywords: llm,ai,offline,intelligence,local-ai,http-client
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ Requires-Dist: requests>=2.28.0
26
+ Provides-Extra: streaming
27
+ Requires-Dist: sseclient-ng>=1.0.0; extra == "streaming"
28
+ Dynamic: author
29
+ Dynamic: author-email
30
+ Dynamic: classifier
31
+ Dynamic: description
32
+ Dynamic: description-content-type
33
+ Dynamic: home-page
34
+ Dynamic: keywords
35
+ Dynamic: project-url
36
+ Dynamic: provides-extra
37
+ Dynamic: requires-dist
38
+ Dynamic: requires-python
39
+ Dynamic: summary
40
+
41
+ # Offline Intelligence Python Bindings
42
+
43
+ Python bindings for the Offline Intelligence Library - High-performance LLM inference engine with memory management capabilities.
44
+
45
+ ## Installation
46
+
47
+ ```bash
48
+ pip install offline-intelligence
49
+ ```
50
+
51
+ ## Quick Start
52
+
53
+ ```python
54
+ from offline_intelligence import Config, run_server
55
+
56
+ # Configure the engine
57
+ config = Config.from_env()
58
+
59
+ # Start the server
60
+ run_server(config)
61
+ ```
62
+
63
+ ## Features
64
+
65
+ - **Core LLM Integration**: Direct access to LLM engine functionality
66
+ - **Memory Management**: Base memory operations and database access
67
+ - **Configuration**: Flexible configuration system
68
+ - **Metrics**: Performance monitoring and telemetry
69
+ - **Proxy Interface**: Stream generation and API proxy functionality
70
+
71
+ ## Architecture
72
+
73
+ This package provides bindings to the core open-source components (80%) of the Offline Intelligence system. Proprietary extensions are available separately.
74
+
75
+ ## Platform Support
76
+
77
+ - Windows (x64)
78
+ - macOS (Intel/Apple Silicon)
79
+ - Linux (x64, ARM64)
80
+
81
+ ## License
82
+
83
+ Apache 2.0
@@ -0,0 +1,5 @@
1
+ offline_intelligence/__init__.py,sha256=VuNZsY84gVhJVQhmmhZXwlzgx5T1zaEPo9Rrd83b4dE,11747
2
+ offline_intelligence-0.1.3.dist-info/METADATA,sha256=a7DOxp64KA9AayFTfrU_VeTJEXAPyjsuNatyUxm2-sk,2688
3
+ offline_intelligence-0.1.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
4
+ offline_intelligence-0.1.3.dist-info/top_level.txt,sha256=qQOPMXLBQPADgwsI0Ta0hYwYNqTnD7kFSTqaW5tIzDs,21
5
+ offline_intelligence-0.1.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ offline_intelligence