create-tether-app 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -0
- package/dist/index.js +729 -0
- package/package.json +59 -0
- package/template/.env.example +18 -0
- package/template/README.md.template +123 -0
- package/template/backend/app/__init__.py.template +5 -0
- package/template/backend/app/main.py +66 -0
- package/template/backend/app/routes/__init__.py +3 -0
- package/template/backend/app/routes/chat.py +151 -0
- package/template/backend/app/routes/health.py +28 -0
- package/template/backend/app/routes/models.py +126 -0
- package/template/backend/app/services/__init__.py +3 -0
- package/template/backend/app/services/llm.py +526 -0
- package/template/backend/pyproject.toml.template +34 -0
- package/template/backend/scripts/build.py +112 -0
- package/template/frontend/App.css +58 -0
- package/template/frontend/App.tsx +62 -0
- package/template/frontend/components/Chat.css +220 -0
- package/template/frontend/components/Chat.tsx +284 -0
- package/template/frontend/components/ChatMessage.css +206 -0
- package/template/frontend/components/ChatMessage.tsx +62 -0
- package/template/frontend/components/ModelStatus.css +62 -0
- package/template/frontend/components/ModelStatus.tsx +103 -0
- package/template/frontend/hooks/useApi.ts +334 -0
- package/template/frontend/index.css +92 -0
- package/template/frontend/main.tsx +10 -0
- package/template/frontend/vite-env.d.ts +1 -0
- package/template/index.html.template +13 -0
- package/template/package.json.template +33 -0
- package/template/postcss.config.js.template +6 -0
- package/template/public/tether.svg +15 -0
- package/template/src-tauri/.cargo/config.toml +66 -0
- package/template/src-tauri/Cargo.lock +4764 -0
- package/template/src-tauri/Cargo.toml +24 -0
- package/template/src-tauri/build.rs +3 -0
- package/template/src-tauri/capabilities/default.json +40 -0
- package/template/src-tauri/icons/128x128.png +0 -0
- package/template/src-tauri/icons/128x128@2x.png +0 -0
- package/template/src-tauri/icons/32x32.png +0 -0
- package/template/src-tauri/icons/icon.icns +0 -0
- package/template/src-tauri/icons/icon.ico +0 -0
- package/template/src-tauri/src/main.rs +65 -0
- package/template/src-tauri/src/sidecar.rs +110 -0
- package/template/src-tauri/tauri.conf.json.template +44 -0
- package/template/tailwind.config.js.template +19 -0
- package/template/tsconfig.json +21 -0
- package/template/tsconfig.node.json +11 -0
- package/template/vite.config.ts +27 -0
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM service abstraction.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import os
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from functools import lru_cache
|
|
10
|
+
from typing import Literal, Optional
|
|
11
|
+
|
|
12
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_ollama_base_url() -> str:
|
|
16
|
+
"""Get Ollama base URL from OLLAMA_HOST env var or default."""
|
|
17
|
+
ollama_host = os.environ.get("OLLAMA_HOST")
|
|
18
|
+
if ollama_host:
|
|
19
|
+
if not ollama_host.startswith("http"):
|
|
20
|
+
ollama_host = f"http://{ollama_host}"
|
|
21
|
+
return ollama_host.rstrip("/")
|
|
22
|
+
return "http://localhost:11434"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class OllamaDiscoveryResult:
|
|
27
|
+
"""Result of Ollama discovery."""
|
|
28
|
+
|
|
29
|
+
available: bool
|
|
30
|
+
base_url: str
|
|
31
|
+
models: list[str]
|
|
32
|
+
error: Optional[str] = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class LLMSettings(BaseSettings):
|
|
36
|
+
"""LLM configuration from environment."""
|
|
37
|
+
|
|
38
|
+
model_config = SettingsConfigDict(
|
|
39
|
+
env_file=".env",
|
|
40
|
+
env_file_encoding="utf-8",
|
|
41
|
+
extra="ignore",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
tether_llm_backend: Literal["local", "ollama", "openai", "mock"] = "ollama"
|
|
45
|
+
tether_model_path: Optional[str] = None
|
|
46
|
+
openai_api_key: Optional[str] = None
|
|
47
|
+
tether_openai_model: str = "gpt-4o-mini"
|
|
48
|
+
tether_context_length: int = 4096
|
|
49
|
+
# Ollama settings - model can be empty to auto-select
|
|
50
|
+
tether_ollama_model: Optional[str] = None
|
|
51
|
+
tether_ollama_base_url: Optional[str] = None # Uses OLLAMA_HOST or default
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@lru_cache
|
|
55
|
+
def get_settings() -> LLMSettings:
|
|
56
|
+
return LLMSettings()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class LLMService(ABC):
|
|
60
|
+
"""Abstract base class for LLM services."""
|
|
61
|
+
|
|
62
|
+
service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
|
|
63
|
+
model_name: str = "unknown"
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
async def initialize(self) -> None:
|
|
67
|
+
"""Initialize the service."""
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
async def cleanup(self) -> None:
|
|
72
|
+
"""Cleanup resources."""
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def is_ready(self) -> bool:
|
|
77
|
+
"""Check if service is ready."""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
@abstractmethod
|
|
81
|
+
async def complete(
|
|
82
|
+
self,
|
|
83
|
+
prompt: str,
|
|
84
|
+
*,
|
|
85
|
+
temperature: float = 0.7,
|
|
86
|
+
max_tokens: Optional[int] = None,
|
|
87
|
+
) -> str:
|
|
88
|
+
"""Generate a completion."""
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class MockLLMService(LLMService):
|
|
93
|
+
"""Mock LLM service for testing."""
|
|
94
|
+
|
|
95
|
+
service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
|
|
96
|
+
model_name = "mock"
|
|
97
|
+
|
|
98
|
+
def __init__(self):
|
|
99
|
+
self._ready = False
|
|
100
|
+
|
|
101
|
+
async def initialize(self) -> None:
|
|
102
|
+
self._ready = True
|
|
103
|
+
|
|
104
|
+
async def cleanup(self) -> None:
|
|
105
|
+
self._ready = False
|
|
106
|
+
|
|
107
|
+
def is_ready(self) -> bool:
|
|
108
|
+
return self._ready
|
|
109
|
+
|
|
110
|
+
async def complete(
|
|
111
|
+
self,
|
|
112
|
+
prompt: str,
|
|
113
|
+
*,
|
|
114
|
+
temperature: float = 0.7,
|
|
115
|
+
max_tokens: Optional[int] = None,
|
|
116
|
+
) -> str:
|
|
117
|
+
return f"This is a mock response to: {prompt[:50]}..."
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class OpenAIService(LLMService):
|
|
121
|
+
"""OpenAI API service."""
|
|
122
|
+
|
|
123
|
+
service_type: Literal["local", "ollama", "openai", "mock"] = "openai"
|
|
124
|
+
|
|
125
|
+
def __init__(
|
|
126
|
+
self,
|
|
127
|
+
api_key: Optional[str] = None,
|
|
128
|
+
model: Optional[str] = None,
|
|
129
|
+
):
|
|
130
|
+
settings = get_settings()
|
|
131
|
+
self._api_key = api_key or settings.openai_api_key
|
|
132
|
+
self._model = model or settings.tether_openai_model
|
|
133
|
+
self._client = None
|
|
134
|
+
self._ready = False
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def model_name(self) -> str:
|
|
138
|
+
return self._model
|
|
139
|
+
|
|
140
|
+
async def initialize(self) -> None:
|
|
141
|
+
try:
|
|
142
|
+
from openai import AsyncOpenAI
|
|
143
|
+
|
|
144
|
+
self._client = AsyncOpenAI(api_key=self._api_key)
|
|
145
|
+
self._ready = True
|
|
146
|
+
except ImportError:
|
|
147
|
+
raise ImportError("openai package not installed")
|
|
148
|
+
|
|
149
|
+
async def cleanup(self) -> None:
|
|
150
|
+
if self._client:
|
|
151
|
+
await self._client.close()
|
|
152
|
+
self._ready = False
|
|
153
|
+
|
|
154
|
+
def is_ready(self) -> bool:
|
|
155
|
+
return self._ready and self._client is not None
|
|
156
|
+
|
|
157
|
+
async def complete(
|
|
158
|
+
self,
|
|
159
|
+
prompt: str,
|
|
160
|
+
*,
|
|
161
|
+
temperature: float = 0.7,
|
|
162
|
+
max_tokens: Optional[int] = None,
|
|
163
|
+
) -> str:
|
|
164
|
+
if not self._client:
|
|
165
|
+
raise RuntimeError("OpenAI client not initialized")
|
|
166
|
+
|
|
167
|
+
response = await self._client.chat.completions.create(
|
|
168
|
+
model=self._model,
|
|
169
|
+
messages=[{"role": "user", "content": prompt}],
|
|
170
|
+
temperature=temperature,
|
|
171
|
+
max_tokens=max_tokens,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
return response.choices[0].message.content or ""
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
async def discover_ollama(base_url: Optional[str] = None) -> OllamaDiscoveryResult:
|
|
178
|
+
"""Discover Ollama instance and available models."""
|
|
179
|
+
import httpx
|
|
180
|
+
|
|
181
|
+
url = base_url or get_ollama_base_url()
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
185
|
+
response = await client.get(f"{url}/api/tags")
|
|
186
|
+
response.raise_for_status()
|
|
187
|
+
tags = response.json()
|
|
188
|
+
models = [m["name"] for m in tags.get("models", [])]
|
|
189
|
+
return OllamaDiscoveryResult(available=True, base_url=url, models=models)
|
|
190
|
+
except httpx.ConnectError:
|
|
191
|
+
return OllamaDiscoveryResult(
|
|
192
|
+
available=False, base_url=url, models=[],
|
|
193
|
+
error=f"Cannot connect to Ollama at {url}. Make sure Ollama is running (run 'ollama serve' or start the Ollama app).",
|
|
194
|
+
)
|
|
195
|
+
except httpx.TimeoutException:
|
|
196
|
+
return OllamaDiscoveryResult(
|
|
197
|
+
available=False, base_url=url, models=[],
|
|
198
|
+
error=f"Connection to Ollama at {url} timed out. The server may be busy or unresponsive.",
|
|
199
|
+
)
|
|
200
|
+
except httpx.HTTPStatusError as e:
|
|
201
|
+
return OllamaDiscoveryResult(
|
|
202
|
+
available=False, base_url=url, models=[],
|
|
203
|
+
error=f"Ollama returned an error (HTTP {e.response.status_code}). Check if Ollama is working correctly.",
|
|
204
|
+
)
|
|
205
|
+
except Exception as e:
|
|
206
|
+
return OllamaDiscoveryResult(
|
|
207
|
+
available=False, base_url=url, models=[],
|
|
208
|
+
error=f"Unexpected error connecting to Ollama: {str(e)}",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class OllamaService(LLMService):
|
|
213
|
+
"""Ollama LLM service."""
|
|
214
|
+
|
|
215
|
+
service_type: Literal["local", "ollama", "openai", "mock"] = "ollama"
|
|
216
|
+
|
|
217
|
+
def __init__(
|
|
218
|
+
self,
|
|
219
|
+
model: Optional[str] = None,
|
|
220
|
+
base_url: Optional[str] = None,
|
|
221
|
+
):
|
|
222
|
+
settings = get_settings()
|
|
223
|
+
self._model = model or settings.tether_ollama_model # Can be None
|
|
224
|
+
base = base_url or settings.tether_ollama_base_url
|
|
225
|
+
self._base_url = (base or get_ollama_base_url()).rstrip("/")
|
|
226
|
+
self._client = None
|
|
227
|
+
self._ready = False
|
|
228
|
+
self._available_models: list[str] = []
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def model_name(self) -> str:
|
|
232
|
+
return self._model or "not-set"
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def available_models(self) -> list[str]:
|
|
236
|
+
"""List of available models (populated after initialize)."""
|
|
237
|
+
return self._available_models
|
|
238
|
+
|
|
239
|
+
async def initialize(self) -> None:
|
|
240
|
+
try:
|
|
241
|
+
import httpx
|
|
242
|
+
|
|
243
|
+
self._client = httpx.AsyncClient(
|
|
244
|
+
base_url=self._base_url,
|
|
245
|
+
timeout=httpx.Timeout(120.0),
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Discover available models
|
|
249
|
+
discovery = await discover_ollama(self._base_url)
|
|
250
|
+
|
|
251
|
+
if not discovery.available:
|
|
252
|
+
error_msg = discovery.error or f"Cannot connect to Ollama at {self._base_url}"
|
|
253
|
+
raise RuntimeError(error_msg)
|
|
254
|
+
|
|
255
|
+
self._available_models = discovery.models
|
|
256
|
+
|
|
257
|
+
# Auto-select model if not specified
|
|
258
|
+
if not self._model:
|
|
259
|
+
if self._available_models:
|
|
260
|
+
self._model = self._available_models[0]
|
|
261
|
+
print(f"Auto-selected Ollama model: {self._model}")
|
|
262
|
+
else:
|
|
263
|
+
raise RuntimeError(
|
|
264
|
+
"No models found in Ollama. Pull a model first with:\n"
|
|
265
|
+
" ollama pull llama3.2\n"
|
|
266
|
+
"Or visit https://ollama.com/library to browse available models."
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
# Verify model exists
|
|
270
|
+
model_found = any(
|
|
271
|
+
self._model == m or self._model == m.split(":")[0]
|
|
272
|
+
for m in self._available_models
|
|
273
|
+
)
|
|
274
|
+
if not model_found:
|
|
275
|
+
available_str = ", ".join(self._available_models[:5])
|
|
276
|
+
if len(self._available_models) > 5:
|
|
277
|
+
available_str += f", ... ({len(self._available_models) - 5} more)"
|
|
278
|
+
if self._available_models:
|
|
279
|
+
print(
|
|
280
|
+
f"Warning: Model '{self._model}' not found locally. "
|
|
281
|
+
f"Available models: {available_str}. "
|
|
282
|
+
f"Ollama will try to pull it automatically."
|
|
283
|
+
)
|
|
284
|
+
else:
|
|
285
|
+
print(
|
|
286
|
+
f"Warning: Model '{self._model}' specified but no models found. "
|
|
287
|
+
f"Ollama will try to pull it automatically."
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
self._ready = True
|
|
291
|
+
except ImportError:
|
|
292
|
+
raise ImportError(
|
|
293
|
+
"httpx package not installed. Install it with:\n"
|
|
294
|
+
" pip install httpx\n"
|
|
295
|
+
"Or: uv add httpx"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
async def cleanup(self) -> None:
|
|
299
|
+
if self._client:
|
|
300
|
+
await self._client.aclose()
|
|
301
|
+
self._ready = False
|
|
302
|
+
|
|
303
|
+
def is_ready(self) -> bool:
|
|
304
|
+
return self._ready and self._client is not None
|
|
305
|
+
|
|
306
|
+
async def list_models(self) -> list[str]:
|
|
307
|
+
"""List available Ollama models."""
|
|
308
|
+
if self._available_models:
|
|
309
|
+
return self._available_models
|
|
310
|
+
discovery = await discover_ollama(self._base_url)
|
|
311
|
+
return discovery.models
|
|
312
|
+
|
|
313
|
+
async def complete(
|
|
314
|
+
self,
|
|
315
|
+
prompt: str,
|
|
316
|
+
*,
|
|
317
|
+
temperature: float = 0.7,
|
|
318
|
+
max_tokens: Optional[int] = None,
|
|
319
|
+
) -> str:
|
|
320
|
+
if not self._client:
|
|
321
|
+
raise RuntimeError("Ollama client not initialized")
|
|
322
|
+
|
|
323
|
+
payload = {
|
|
324
|
+
"model": self._model,
|
|
325
|
+
"prompt": prompt,
|
|
326
|
+
"stream": False,
|
|
327
|
+
"options": {"temperature": temperature},
|
|
328
|
+
}
|
|
329
|
+
if max_tokens:
|
|
330
|
+
payload["options"]["num_predict"] = max_tokens
|
|
331
|
+
|
|
332
|
+
response = await self._client.post("/api/generate", json=payload)
|
|
333
|
+
response.raise_for_status()
|
|
334
|
+
return response.json().get("response", "")
|
|
335
|
+
|
|
336
|
+
async def chat(
|
|
337
|
+
self,
|
|
338
|
+
messages: list[dict],
|
|
339
|
+
*,
|
|
340
|
+
temperature: float = 0.7,
|
|
341
|
+
max_tokens: Optional[int] = None,
|
|
342
|
+
think: bool = True,
|
|
343
|
+
) -> dict:
|
|
344
|
+
"""
|
|
345
|
+
Chat completion using Ollama's chat API.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
messages: List of message dicts with 'role' and 'content'
|
|
349
|
+
temperature: Sampling temperature
|
|
350
|
+
max_tokens: Maximum tokens to generate
|
|
351
|
+
think: Enable thinking mode for reasoning models (default: True)
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
Dict with 'content' and optionally 'thinking' keys
|
|
355
|
+
"""
|
|
356
|
+
if not self._client:
|
|
357
|
+
raise RuntimeError("Ollama client not initialized")
|
|
358
|
+
|
|
359
|
+
payload = {
|
|
360
|
+
"model": self._model,
|
|
361
|
+
"messages": messages,
|
|
362
|
+
"stream": False,
|
|
363
|
+
"options": {"temperature": temperature},
|
|
364
|
+
}
|
|
365
|
+
if max_tokens:
|
|
366
|
+
payload["options"]["num_predict"] = max_tokens
|
|
367
|
+
|
|
368
|
+
# Only include think parameter if enabled
|
|
369
|
+
# Some models don't support thinking and will error if it's sent
|
|
370
|
+
if think:
|
|
371
|
+
payload["think"] = True
|
|
372
|
+
|
|
373
|
+
response = await self._client.post("/api/chat", json=payload)
|
|
374
|
+
|
|
375
|
+
# If request fails and thinking was enabled, retry without it
|
|
376
|
+
# Some models don't support thinking mode and return 400
|
|
377
|
+
if not response.is_success and think:
|
|
378
|
+
payload.pop("think", None)
|
|
379
|
+
response = await self._client.post("/api/chat", json=payload)
|
|
380
|
+
|
|
381
|
+
response.raise_for_status()
|
|
382
|
+
|
|
383
|
+
data = response.json()
|
|
384
|
+
message = data.get("message", {})
|
|
385
|
+
|
|
386
|
+
return {
|
|
387
|
+
"content": message.get("content", ""),
|
|
388
|
+
"thinking": message.get("thinking"), # None if not a thinking model
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class LocalLLMService(LLMService):
|
|
393
|
+
"""Local LLM service using llama-cpp-python."""
|
|
394
|
+
|
|
395
|
+
service_type: Literal["local", "ollama", "openai", "mock"] = "local"
|
|
396
|
+
|
|
397
|
+
def __init__(
|
|
398
|
+
self,
|
|
399
|
+
model_path: Optional[str] = None,
|
|
400
|
+
n_ctx: Optional[int] = None,
|
|
401
|
+
n_gpu_layers: int = -1,
|
|
402
|
+
):
|
|
403
|
+
settings = get_settings()
|
|
404
|
+
self._model_path = model_path or settings.tether_model_path
|
|
405
|
+
self._n_ctx = n_ctx or settings.tether_context_length
|
|
406
|
+
self._n_gpu_layers = n_gpu_layers
|
|
407
|
+
self._llm = None
|
|
408
|
+
self._ready = False
|
|
409
|
+
|
|
410
|
+
@property
|
|
411
|
+
def model_name(self) -> str:
|
|
412
|
+
if self._model_path:
|
|
413
|
+
return self._model_path.split("/")[-1]
|
|
414
|
+
return "unknown"
|
|
415
|
+
|
|
416
|
+
async def initialize(self) -> None:
|
|
417
|
+
if not self._model_path:
|
|
418
|
+
raise RuntimeError(
|
|
419
|
+
"No model path specified. Set TETHER_MODEL_PATH environment variable "
|
|
420
|
+
"to the path of your GGUF model file.\n"
|
|
421
|
+
"Example: TETHER_MODEL_PATH=./models/llama-3.2-1b.gguf"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
# Check if file exists
|
|
425
|
+
if not os.path.isfile(self._model_path):
|
|
426
|
+
raise RuntimeError(
|
|
427
|
+
f"Model file not found: {self._model_path}\n"
|
|
428
|
+
"Make sure the path is correct and the file exists.\n"
|
|
429
|
+
"You can download GGUF models from: https://huggingface.co/models?library=gguf"
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# Check file extension
|
|
433
|
+
if not self._model_path.lower().endswith(".gguf"):
|
|
434
|
+
print(
|
|
435
|
+
f"Warning: Model file '{self._model_path}' doesn't have .gguf extension. "
|
|
436
|
+
"Make sure this is a valid GGUF model file."
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
try:
|
|
440
|
+
from llama_cpp import Llama
|
|
441
|
+
|
|
442
|
+
print(f"Loading model: {self._model_path}...")
|
|
443
|
+
loop = asyncio.get_event_loop()
|
|
444
|
+
await loop.run_in_executor(None, self._load_model)
|
|
445
|
+
print(f"Model loaded successfully: {self.model_name}")
|
|
446
|
+
except ImportError:
|
|
447
|
+
raise ImportError(
|
|
448
|
+
"llama-cpp-python package not installed. Install it with:\n"
|
|
449
|
+
" pip install llama-cpp-python\n"
|
|
450
|
+
"For GPU support, see: https://github.com/abetlen/llama-cpp-python#installation"
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
def _load_model(self) -> None:
|
|
454
|
+
from llama_cpp import Llama
|
|
455
|
+
|
|
456
|
+
try:
|
|
457
|
+
self._llm = Llama(
|
|
458
|
+
model_path=self._model_path,
|
|
459
|
+
n_ctx=self._n_ctx,
|
|
460
|
+
n_gpu_layers=self._n_gpu_layers,
|
|
461
|
+
verbose=False,
|
|
462
|
+
)
|
|
463
|
+
self._ready = True
|
|
464
|
+
except Exception as e:
|
|
465
|
+
error_str = str(e).lower()
|
|
466
|
+
if "gguf" in error_str or "magic" in error_str:
|
|
467
|
+
raise RuntimeError(
|
|
468
|
+
f"Invalid model file format: {self._model_path}\n"
|
|
469
|
+
"This doesn't appear to be a valid GGUF file. "
|
|
470
|
+
"Make sure you have a properly formatted GGUF model."
|
|
471
|
+
) from e
|
|
472
|
+
elif "memory" in error_str or "alloc" in error_str:
|
|
473
|
+
raise RuntimeError(
|
|
474
|
+
f"Not enough memory to load model: {self._model_path}\n"
|
|
475
|
+
"Try a smaller/quantized model or close other applications."
|
|
476
|
+
) from e
|
|
477
|
+
else:
|
|
478
|
+
raise RuntimeError(
|
|
479
|
+
f"Failed to load model: {self._model_path}\n"
|
|
480
|
+
f"Error: {str(e)}"
|
|
481
|
+
) from e
|
|
482
|
+
|
|
483
|
+
async def cleanup(self) -> None:
|
|
484
|
+
self._llm = None
|
|
485
|
+
self._ready = False
|
|
486
|
+
|
|
487
|
+
def is_ready(self) -> bool:
|
|
488
|
+
return self._ready and self._llm is not None
|
|
489
|
+
|
|
490
|
+
async def complete(
|
|
491
|
+
self,
|
|
492
|
+
prompt: str,
|
|
493
|
+
*,
|
|
494
|
+
temperature: float = 0.7,
|
|
495
|
+
max_tokens: Optional[int] = None,
|
|
496
|
+
) -> str:
|
|
497
|
+
if not self._llm:
|
|
498
|
+
raise RuntimeError("Model not loaded")
|
|
499
|
+
|
|
500
|
+
loop = asyncio.get_event_loop()
|
|
501
|
+
result = await loop.run_in_executor(
|
|
502
|
+
None,
|
|
503
|
+
lambda: self._llm(
|
|
504
|
+
prompt,
|
|
505
|
+
max_tokens=max_tokens or 512,
|
|
506
|
+
temperature=temperature,
|
|
507
|
+
echo=False,
|
|
508
|
+
),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
return result["choices"][0]["text"]
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def get_llm_service() -> LLMService:
|
|
515
|
+
"""Get the appropriate LLM service based on configuration."""
|
|
516
|
+
settings = get_settings()
|
|
517
|
+
backend = settings.tether_llm_backend
|
|
518
|
+
|
|
519
|
+
if backend == "openai":
|
|
520
|
+
return OpenAIService()
|
|
521
|
+
elif backend == "ollama":
|
|
522
|
+
return OllamaService()
|
|
523
|
+
elif backend == "local":
|
|
524
|
+
return LocalLLMService()
|
|
525
|
+
else:
|
|
526
|
+
return MockLLMService()
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "{{PROJECT_NAME}}-backend"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Python backend for {{PROJECT_NAME}}"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"fastapi>=0.115.0",
|
|
8
|
+
"uvicorn[standard]>=0.30.0",
|
|
9
|
+
"pydantic>=2.0.0",
|
|
10
|
+
"pydantic-settings>=2.0.0",
|
|
11
|
+
"httpx>=0.27.0",
|
|
12
|
+
"openai>=1.0.0",
|
|
13
|
+
"llama-cpp-python>=0.2.0",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=8.0.0",
|
|
19
|
+
"pytest-asyncio>=0.23.0",
|
|
20
|
+
"httpx>=0.27.0",
|
|
21
|
+
]
|
|
22
|
+
build = [
|
|
23
|
+
"pyinstaller>=6.0.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["hatchling"]
|
|
28
|
+
build-backend = "hatchling.build"
|
|
29
|
+
|
|
30
|
+
[tool.hatch.build.targets.wheel]
|
|
31
|
+
packages = ["app"]
|
|
32
|
+
|
|
33
|
+
[tool.pytest.ini_options]
|
|
34
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PyInstaller build script for creating standalone Python backend binary.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import platform
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_binary_name() -> str:
|
|
13
|
+
"""Get the binary name for the current platform."""
|
|
14
|
+
system = platform.system().lower()
|
|
15
|
+
if system == "windows":
|
|
16
|
+
return "api.exe"
|
|
17
|
+
return "api"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_target_triple() -> str:
|
|
21
|
+
"""Get the target triple for the current platform."""
|
|
22
|
+
system = platform.system().lower()
|
|
23
|
+
machine = platform.machine().lower()
|
|
24
|
+
|
|
25
|
+
if system == "darwin":
|
|
26
|
+
if machine == "arm64":
|
|
27
|
+
return "aarch64-apple-darwin"
|
|
28
|
+
return "x86_64-apple-darwin"
|
|
29
|
+
elif system == "windows":
|
|
30
|
+
return "x86_64-pc-windows-msvc"
|
|
31
|
+
elif system == "linux":
|
|
32
|
+
return "x86_64-unknown-linux-gnu"
|
|
33
|
+
else:
|
|
34
|
+
raise RuntimeError(f"Unsupported platform: {system}")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def build() -> None:
|
|
38
|
+
"""Build the Python backend using PyInstaller."""
|
|
39
|
+
# Get paths
|
|
40
|
+
backend_dir = Path(__file__).parent.parent
|
|
41
|
+
src_tauri = backend_dir.parent / "src-tauri"
|
|
42
|
+
target_triple = get_target_triple()
|
|
43
|
+
binary_name = get_binary_name()
|
|
44
|
+
|
|
45
|
+
# Create output directory
|
|
46
|
+
output_dir = src_tauri / "binaries"
|
|
47
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
# PyInstaller command
|
|
50
|
+
cmd = [
|
|
51
|
+
sys.executable,
|
|
52
|
+
"-m",
|
|
53
|
+
"PyInstaller",
|
|
54
|
+
"--onefile",
|
|
55
|
+
"--name",
|
|
56
|
+
"api",
|
|
57
|
+
"--distpath",
|
|
58
|
+
str(output_dir),
|
|
59
|
+
"--workpath",
|
|
60
|
+
str(backend_dir / "build"),
|
|
61
|
+
"--specpath",
|
|
62
|
+
str(backend_dir),
|
|
63
|
+
"--clean",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# Add llama-cpp-python collection if the package is installed
|
|
67
|
+
# This ensures the native llama.cpp library is bundled
|
|
68
|
+
try:
|
|
69
|
+
import llama_cpp
|
|
70
|
+
cmd.extend(["--collect-all", "llama_cpp"])
|
|
71
|
+
print("Including llama-cpp-python native libraries...")
|
|
72
|
+
except ImportError:
|
|
73
|
+
pass # llama-cpp-python not installed, skip
|
|
74
|
+
|
|
75
|
+
cmd.append(str(backend_dir / "app" / "main.py"))
|
|
76
|
+
|
|
77
|
+
print(f"Building Python backend for {target_triple}...")
|
|
78
|
+
print(f"Command: {' '.join(cmd)}")
|
|
79
|
+
|
|
80
|
+
# Run PyInstaller
|
|
81
|
+
result = subprocess.run(cmd, cwd=backend_dir)
|
|
82
|
+
|
|
83
|
+
if result.returncode != 0:
|
|
84
|
+
print("PyInstaller build failed!")
|
|
85
|
+
sys.exit(1)
|
|
86
|
+
|
|
87
|
+
# Rename binary with target triple (required by Tauri)
|
|
88
|
+
src_binary = output_dir / binary_name
|
|
89
|
+
dst_binary = output_dir / f"api-{target_triple}{'.exe' if platform.system() == 'Windows' else ''}"
|
|
90
|
+
|
|
91
|
+
if src_binary.exists():
|
|
92
|
+
src_binary.rename(dst_binary)
|
|
93
|
+
print(f"Binary created: {dst_binary}")
|
|
94
|
+
else:
|
|
95
|
+
print(f"Error: Binary not found at {src_binary}")
|
|
96
|
+
sys.exit(1)
|
|
97
|
+
|
|
98
|
+
# Clean up
|
|
99
|
+
spec_file = backend_dir / "api.spec"
|
|
100
|
+
if spec_file.exists():
|
|
101
|
+
spec_file.unlink()
|
|
102
|
+
|
|
103
|
+
build_dir = backend_dir / "build"
|
|
104
|
+
if build_dir.exists():
|
|
105
|
+
import shutil
|
|
106
|
+
shutil.rmtree(build_dir)
|
|
107
|
+
|
|
108
|
+
print("Build complete!")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
if __name__ == "__main__":
|
|
112
|
+
build()
|