create-tether-app 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +33 -2
- package/package.json +1 -1
- package/template/.env.example +5 -1
- package/template/backend/app/main.py +2 -1
- package/template/backend/app/routes/apikey.py +57 -0
- package/template/backend/app/routes/chat.py +24 -3
- package/template/backend/app/routes/models.py +39 -1
- package/template/backend/app/services/llm.py +301 -6
- package/template/backend/app/services/pricing.py +37 -0
- package/template/backend/pyproject.toml.template +1 -0
- package/template/frontend/App.tsx +6 -1
- package/template/frontend/components/ApiKeyForm.css +64 -0
- package/template/frontend/components/ApiKeyForm.tsx +94 -0
- package/template/frontend/components/ChatMessage.css +12 -0
- package/template/frontend/components/ChatMessage.tsx +18 -0
- package/template/frontend/components/ModelStatus.tsx +4 -0
- package/template/frontend/hooks/useApi.ts +43 -4
package/dist/index.js
CHANGED
|
@@ -297,6 +297,16 @@ async function scaffoldProject(options) {
|
|
|
297
297
|
)
|
|
298
298
|
);
|
|
299
299
|
console.log();
|
|
300
|
+
} else if (options.template === "gemini") {
|
|
301
|
+
console.log(
|
|
302
|
+
chalk.dim(
|
|
303
|
+
" Note: Set your GEMINI_API_KEY in .env to use the Gemini API."
|
|
304
|
+
)
|
|
305
|
+
);
|
|
306
|
+
console.log(
|
|
307
|
+
chalk.dim(" Get an API key at: https://aistudio.google.com/apikey")
|
|
308
|
+
);
|
|
309
|
+
console.log();
|
|
300
310
|
}
|
|
301
311
|
if (options.useTailwind) {
|
|
302
312
|
console.log(
|
|
@@ -363,11 +373,17 @@ async function customizeForTemplate(targetDir, options) {
|
|
|
363
373
|
if (options.template === "ollama") {
|
|
364
374
|
content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
|
|
365
375
|
content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
|
|
376
|
+
content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
|
|
366
377
|
} else if (options.template === "openai") {
|
|
367
378
|
content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
|
|
379
|
+
content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
|
|
380
|
+
} else if (options.template === "gemini") {
|
|
381
|
+
content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
|
|
382
|
+
content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
|
|
368
383
|
} else if (options.template === "custom") {
|
|
369
384
|
content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
|
|
370
385
|
content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
|
|
386
|
+
content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
|
|
371
387
|
}
|
|
372
388
|
await fs2.writeFile(pyprojectPath, content);
|
|
373
389
|
}
|
|
@@ -380,6 +396,8 @@ async function customizeForTemplate(targetDir, options) {
|
|
|
380
396
|
content = content.replace(backendRegex, '$1"ollama"');
|
|
381
397
|
} else if (options.template === "openai") {
|
|
382
398
|
content = content.replace(backendRegex, '$1"openai"');
|
|
399
|
+
} else if (options.template === "gemini") {
|
|
400
|
+
content = content.replace(backendRegex, '$1"gemini"');
|
|
383
401
|
} else if (options.template === "custom") {
|
|
384
402
|
content = content.replace(backendRegex, '$1"mock"');
|
|
385
403
|
}
|
|
@@ -412,7 +430,10 @@ async function removeExampleComponents(targetDir) {
|
|
|
412
430
|
if (await fs2.pathExists(appPath)) {
|
|
413
431
|
let content = await fs2.readFile(appPath, "utf-8");
|
|
414
432
|
content = content.replace(/import.*Chat.*from.*\n?/g, "");
|
|
415
|
-
content = content.replace(
|
|
433
|
+
content = content.replace(
|
|
434
|
+
/\s*\{status === "connected" && <Chat\s*\/>\}/g,
|
|
435
|
+
""
|
|
436
|
+
);
|
|
416
437
|
await fs2.writeFile(appPath, content);
|
|
417
438
|
}
|
|
418
439
|
}
|
|
@@ -456,6 +477,10 @@ async function promptForOptions(options) {
|
|
|
456
477
|
name: "OpenAI API - Use GPT models via API",
|
|
457
478
|
value: "openai"
|
|
458
479
|
},
|
|
480
|
+
{
|
|
481
|
+
name: "Google Gemini API - Use Gemini models via API",
|
|
482
|
+
value: "gemini"
|
|
483
|
+
},
|
|
459
484
|
{
|
|
460
485
|
name: "Custom - Bare FastAPI setup",
|
|
461
486
|
value: "custom"
|
|
@@ -503,6 +528,11 @@ var LLM_TEMPLATES = [
|
|
|
503
528
|
description: "Use OpenAI API (requires API key)",
|
|
504
529
|
details: "Uses GPT models via the OpenAI API. Requires OPENAI_API_KEY env var."
|
|
505
530
|
},
|
|
531
|
+
{
|
|
532
|
+
name: "gemini",
|
|
533
|
+
description: "Use Google Gemini API (requires API key)",
|
|
534
|
+
details: "Uses Gemini models via the Google AI API. Requires GEMINI_API_KEY env var."
|
|
535
|
+
},
|
|
506
536
|
{
|
|
507
537
|
name: "custom",
|
|
508
538
|
description: "Bare FastAPI setup, no LLM integration",
|
|
@@ -513,7 +543,7 @@ function createCli() {
|
|
|
513
543
|
const program = new Command();
|
|
514
544
|
program.name("create-tether-app").description("Create a new Tether AI/ML desktop application").version(getPackageVersion()).argument("[project-name]", "Name of the project to create").option(
|
|
515
545
|
"--llm <provider>",
|
|
516
|
-
"LLM backend: ollama (default), local-llm, openai, custom"
|
|
546
|
+
"LLM backend: ollama (default), local-llm, openai, gemini, custom"
|
|
517
547
|
).option("-t, --template <template>", "Alias for --llm").option("-y, --yes", "Skip prompts and use defaults (ollama, with example)").option("--skip-prompts", "Alias for --yes").option("--skip-install", "Skip dependency installation").option("--use-npm", "Use npm instead of pnpm").option("--use-yarn", "Use yarn instead of pnpm").option("--dry-run", "Show what would be created without making changes").option("--no-example", "Skip example chat component").option("--tailwind", "Include Tailwind CSS setup").option("--no-tailwind", "Skip Tailwind CSS setup").option("-v, --verbose", "Show detailed output").option("--list-templates", "List available LLM templates").option("--check", "Check if all required dependencies are installed").addHelpText(
|
|
518
548
|
"after",
|
|
519
549
|
`
|
|
@@ -543,6 +573,7 @@ LLM Backends:
|
|
|
543
573
|
ollama Run models locally via Ollama (recommended)
|
|
544
574
|
local-llm Embed models directly with llama-cpp-python
|
|
545
575
|
openai Use OpenAI API (requires API key)
|
|
576
|
+
gemini Use Google Gemini API (requires API key)
|
|
546
577
|
custom Bare FastAPI setup, no LLM integration
|
|
547
578
|
`
|
|
548
579
|
).action(async (projectName, options) => {
|
package/package.json
CHANGED
package/template/.env.example
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
TETHER_HOST=127.0.0.1
|
|
3
3
|
TETHER_PORT=8000
|
|
4
4
|
|
|
5
|
-
# LLM Backend: local, openai, or mock
|
|
5
|
+
# LLM Backend: local, ollama, openai, gemini, or mock
|
|
6
6
|
TETHER_LLM_BACKEND=local
|
|
7
7
|
|
|
8
8
|
# For local LLM (llama-cpp-python)
|
|
@@ -13,6 +13,10 @@ TETHER_CONTEXT_LENGTH=4096
|
|
|
13
13
|
OPENAI_API_KEY=sk-your-api-key
|
|
14
14
|
TETHER_OPENAI_MODEL=gpt-4o-mini
|
|
15
15
|
|
|
16
|
+
# For Google Gemini API
|
|
17
|
+
GEMINI_API_KEY=your-gemini-api-key
|
|
18
|
+
TETHER_GEMINI_MODEL=gemini-2.0-flash
|
|
19
|
+
|
|
16
20
|
# Model parameters
|
|
17
21
|
TETHER_DEFAULT_TEMPERATURE=0.7
|
|
18
22
|
TETHER_DEFAULT_MAX_TOKENS=1024
|
|
@@ -8,7 +8,7 @@ from typing import AsyncIterator
|
|
|
8
8
|
from fastapi import FastAPI
|
|
9
9
|
from fastapi.middleware.cors import CORSMiddleware
|
|
10
10
|
|
|
11
|
-
from app.routes import health, chat, models
|
|
11
|
+
from app.routes import health, chat, models, apikey
|
|
12
12
|
from app.services.llm import get_llm_service
|
|
13
13
|
|
|
14
14
|
|
|
@@ -47,6 +47,7 @@ def create_app() -> FastAPI:
|
|
|
47
47
|
app.include_router(health.router)
|
|
48
48
|
app.include_router(chat.router)
|
|
49
49
|
app.include_router(models.router)
|
|
50
|
+
app.include_router(apikey.router)
|
|
50
51
|
|
|
51
52
|
return app
|
|
52
53
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API key submission endpoint.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SetApiKeyRequest(BaseModel):
|
|
10
|
+
api_key: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SetApiKeyResponse(BaseModel):
|
|
14
|
+
success: bool
|
|
15
|
+
message: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
router = APIRouter()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@router.post("/api-key", response_model=SetApiKeyResponse)
|
|
22
|
+
async def set_api_key(request: Request, body: SetApiKeyRequest) -> SetApiKeyResponse:
|
|
23
|
+
"""
|
|
24
|
+
Set the API key for the current LLM service at runtime.
|
|
25
|
+
|
|
26
|
+
Only supported for backends that require an API key (OpenAI, Gemini).
|
|
27
|
+
The key is stored in memory only and not persisted to disk.
|
|
28
|
+
"""
|
|
29
|
+
llm_service = getattr(request.app.state, "llm_service", None)
|
|
30
|
+
|
|
31
|
+
if not llm_service:
|
|
32
|
+
raise HTTPException(status_code=503, detail="No LLM service configured")
|
|
33
|
+
|
|
34
|
+
if not hasattr(llm_service, "set_api_key"):
|
|
35
|
+
raise HTTPException(
|
|
36
|
+
status_code=400,
|
|
37
|
+
detail=f"Backend '{llm_service.service_type}' does not support runtime API key configuration",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
await llm_service.set_api_key(body.api_key)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return SetApiKeyResponse(
|
|
44
|
+
success=False,
|
|
45
|
+
message=f"Failed to initialize with provided key: {str(e)}",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if llm_service.is_ready():
|
|
49
|
+
return SetApiKeyResponse(
|
|
50
|
+
success=True,
|
|
51
|
+
message="API key accepted. Service is ready.",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return SetApiKeyResponse(
|
|
55
|
+
success=False,
|
|
56
|
+
message="API key set but service failed to become ready.",
|
|
57
|
+
)
|
|
@@ -8,6 +8,8 @@ from typing import Literal, Optional
|
|
|
8
8
|
from fastapi import APIRouter, HTTPException, Request
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
|
+
from app.services.pricing import estimate_cost
|
|
12
|
+
|
|
11
13
|
|
|
12
14
|
class ChatMessage(BaseModel):
|
|
13
15
|
role: Literal["user", "assistant", "system"]
|
|
@@ -43,8 +45,14 @@ class ChatResponse(BaseModel):
|
|
|
43
45
|
thinking: Optional[str] = Field(
|
|
44
46
|
default=None, description="Model's reasoning/thinking content (for thinking models)"
|
|
45
47
|
)
|
|
46
|
-
|
|
47
|
-
default=None, description="
|
|
48
|
+
input_tokens: Optional[int] = Field(
|
|
49
|
+
default=None, description="Input tokens used"
|
|
50
|
+
)
|
|
51
|
+
output_tokens: Optional[int] = Field(
|
|
52
|
+
default=None, description="Output tokens generated"
|
|
53
|
+
)
|
|
54
|
+
cost: Optional[float] = Field(
|
|
55
|
+
default=None, description="Estimated cost in USD"
|
|
48
56
|
)
|
|
49
57
|
model: Optional[str] = Field(default=None, description="Model used")
|
|
50
58
|
finish_reason: Optional[Literal["stop", "length", "error"]] = Field(
|
|
@@ -112,6 +120,9 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
|
|
|
112
120
|
use_thinking = False if has_images else (body.think if body.think is not None else True)
|
|
113
121
|
|
|
114
122
|
# Use chat API if available (supports thinking models), fallback to complete
|
|
123
|
+
input_tokens = None
|
|
124
|
+
output_tokens = None
|
|
125
|
+
|
|
115
126
|
if hasattr(llm_service, "chat"):
|
|
116
127
|
result = await llm_service.chat(
|
|
117
128
|
messages,
|
|
@@ -119,10 +130,12 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
|
|
|
119
130
|
max_tokens=body.max_tokens,
|
|
120
131
|
think=use_thinking,
|
|
121
132
|
)
|
|
122
|
-
# chat() returns dict with 'content'
|
|
133
|
+
# chat() returns dict with 'content', 'thinking', and token counts
|
|
123
134
|
if isinstance(result, dict):
|
|
124
135
|
response = result.get("content", "")
|
|
125
136
|
thinking = result.get("thinking")
|
|
137
|
+
input_tokens = result.get("input_tokens")
|
|
138
|
+
output_tokens = result.get("output_tokens")
|
|
126
139
|
else:
|
|
127
140
|
# Fallback if chat returns string
|
|
128
141
|
response, thinking = parse_thinking_content(result)
|
|
@@ -141,9 +154,17 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
|
|
|
141
154
|
)
|
|
142
155
|
response, thinking = parse_thinking_content(raw_response)
|
|
143
156
|
|
|
157
|
+
# Estimate cost if token counts are available
|
|
158
|
+
cost = None
|
|
159
|
+
if input_tokens is not None and output_tokens is not None:
|
|
160
|
+
cost = estimate_cost(llm_service.model_name, input_tokens, output_tokens)
|
|
161
|
+
|
|
144
162
|
return ChatResponse(
|
|
145
163
|
response=response,
|
|
146
164
|
thinking=thinking,
|
|
165
|
+
input_tokens=input_tokens,
|
|
166
|
+
output_tokens=output_tokens,
|
|
167
|
+
cost=cost,
|
|
147
168
|
model=llm_service.model_name,
|
|
148
169
|
finish_reason="stop",
|
|
149
170
|
)
|
|
@@ -5,7 +5,7 @@ Model discovery and switching endpoints.
|
|
|
5
5
|
from fastapi import APIRouter, HTTPException, Request
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
7
|
|
|
8
|
-
from app.services.llm import discover_ollama, get_ollama_base_url
|
|
8
|
+
from app.services.llm import discover_gemini_models, discover_ollama, get_ollama_base_url
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ModelsResponse(BaseModel):
|
|
@@ -14,6 +14,7 @@ class ModelsResponse(BaseModel):
|
|
|
14
14
|
models: list[str]
|
|
15
15
|
backend: str
|
|
16
16
|
error: str | None = None
|
|
17
|
+
needs_api_key: bool = False
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class SwitchModelRequest(BaseModel):
|
|
@@ -62,12 +63,31 @@ async def list_models(request: Request) -> ModelsResponse:
|
|
|
62
63
|
error=discovery.error,
|
|
63
64
|
)
|
|
64
65
|
|
|
66
|
+
# For Gemini, use discovered models
|
|
67
|
+
if backend == "gemini":
|
|
68
|
+
models = getattr(llm_service, "_available_models", [])
|
|
69
|
+
if not models and llm_service.is_ready():
|
|
70
|
+
# Re-discover if models list is empty but service is ready
|
|
71
|
+
client = getattr(llm_service, "_client", None)
|
|
72
|
+
if client:
|
|
73
|
+
discovery = await discover_gemini_models(client)
|
|
74
|
+
if discovery.available:
|
|
75
|
+
models = discovery.models
|
|
76
|
+
return ModelsResponse(
|
|
77
|
+
available=llm_service.is_ready() or llm_service.needs_api_key,
|
|
78
|
+
current_model=llm_service.model_name if llm_service.is_ready() else None,
|
|
79
|
+
models=models,
|
|
80
|
+
backend=backend,
|
|
81
|
+
needs_api_key=llm_service.needs_api_key,
|
|
82
|
+
)
|
|
83
|
+
|
|
65
84
|
# For other backends, return the configured model
|
|
66
85
|
return ModelsResponse(
|
|
67
86
|
available=llm_service.is_ready(),
|
|
68
87
|
current_model=llm_service.model_name,
|
|
69
88
|
models=[llm_service.model_name] if llm_service.is_ready() else [],
|
|
70
89
|
backend=backend,
|
|
90
|
+
needs_api_key=llm_service.needs_api_key,
|
|
71
91
|
)
|
|
72
92
|
|
|
73
93
|
|
|
@@ -119,6 +139,24 @@ async def switch_model(request: Request, body: SwitchModelRequest) -> SwitchMode
|
|
|
119
139
|
message=f"Switched from {previous_model} to {body.model}",
|
|
120
140
|
)
|
|
121
141
|
|
|
142
|
+
# For Gemini, switch to the requested model
|
|
143
|
+
if backend == "gemini":
|
|
144
|
+
available = getattr(llm_service, "_available_models", [])
|
|
145
|
+
if body.model not in available:
|
|
146
|
+
raise HTTPException(
|
|
147
|
+
status_code=404,
|
|
148
|
+
detail=f"Model '{body.model}' not found. Available: {', '.join(available)}",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
llm_service._model = body.model
|
|
152
|
+
|
|
153
|
+
return SwitchModelResponse(
|
|
154
|
+
success=True,
|
|
155
|
+
previous_model=previous_model,
|
|
156
|
+
current_model=body.model,
|
|
157
|
+
message=f"Switched from {previous_model} to {body.model}",
|
|
158
|
+
)
|
|
159
|
+
|
|
122
160
|
# Other backends don't support runtime switching
|
|
123
161
|
raise HTTPException(
|
|
124
162
|
status_code=400,
|
|
@@ -41,7 +41,7 @@ class LLMSettings(BaseSettings):
|
|
|
41
41
|
extra="ignore",
|
|
42
42
|
)
|
|
43
43
|
|
|
44
|
-
tether_llm_backend: Literal["local", "ollama", "openai", "mock"] = "ollama"
|
|
44
|
+
tether_llm_backend: Literal["local", "ollama", "openai", "gemini", "mock"] = "ollama"
|
|
45
45
|
tether_model_path: Optional[str] = None
|
|
46
46
|
openai_api_key: Optional[str] = None
|
|
47
47
|
tether_openai_model: str = "gpt-4o-mini"
|
|
@@ -49,6 +49,9 @@ class LLMSettings(BaseSettings):
|
|
|
49
49
|
# Ollama settings - model can be empty to auto-select
|
|
50
50
|
tether_ollama_model: Optional[str] = None
|
|
51
51
|
tether_ollama_base_url: Optional[str] = None # Uses OLLAMA_HOST or default
|
|
52
|
+
# Gemini settings
|
|
53
|
+
gemini_api_key: Optional[str] = None
|
|
54
|
+
tether_gemini_model: str = "gemini-2.0-flash"
|
|
52
55
|
|
|
53
56
|
|
|
54
57
|
@lru_cache
|
|
@@ -59,9 +62,14 @@ def get_settings() -> LLMSettings:
|
|
|
59
62
|
class LLMService(ABC):
|
|
60
63
|
"""Abstract base class for LLM services."""
|
|
61
64
|
|
|
62
|
-
service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
|
|
65
|
+
service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "mock"
|
|
63
66
|
model_name: str = "unknown"
|
|
64
67
|
|
|
68
|
+
@property
|
|
69
|
+
def needs_api_key(self) -> bool:
|
|
70
|
+
"""Whether the service is waiting for an API key."""
|
|
71
|
+
return False
|
|
72
|
+
|
|
65
73
|
@abstractmethod
|
|
66
74
|
async def initialize(self) -> None:
|
|
67
75
|
"""Initialize the service."""
|
|
@@ -92,7 +100,7 @@ class LLMService(ABC):
|
|
|
92
100
|
class MockLLMService(LLMService):
|
|
93
101
|
"""Mock LLM service for testing."""
|
|
94
102
|
|
|
95
|
-
service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
|
|
103
|
+
service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "mock"
|
|
96
104
|
model_name = "mock"
|
|
97
105
|
|
|
98
106
|
def __init__(self):
|
|
@@ -120,7 +128,7 @@ class MockLLMService(LLMService):
|
|
|
120
128
|
class OpenAIService(LLMService):
|
|
121
129
|
"""OpenAI API service."""
|
|
122
130
|
|
|
123
|
-
service_type: Literal["local", "ollama", "openai", "mock"] = "openai"
|
|
131
|
+
service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "openai"
|
|
124
132
|
|
|
125
133
|
def __init__(
|
|
126
134
|
self,
|
|
@@ -132,12 +140,21 @@ class OpenAIService(LLMService):
|
|
|
132
140
|
self._model = model or settings.tether_openai_model
|
|
133
141
|
self._client = None
|
|
134
142
|
self._ready = False
|
|
143
|
+
self._needs_key = False
|
|
135
144
|
|
|
136
145
|
@property
|
|
137
146
|
def model_name(self) -> str:
|
|
138
147
|
return self._model
|
|
139
148
|
|
|
149
|
+
@property
|
|
150
|
+
def needs_api_key(self) -> bool:
|
|
151
|
+
return self._needs_key
|
|
152
|
+
|
|
140
153
|
async def initialize(self) -> None:
|
|
154
|
+
if not self._api_key:
|
|
155
|
+
self._needs_key = True
|
|
156
|
+
return
|
|
157
|
+
self._needs_key = False
|
|
141
158
|
try:
|
|
142
159
|
from openai import AsyncOpenAI
|
|
143
160
|
|
|
@@ -146,6 +163,11 @@ class OpenAIService(LLMService):
|
|
|
146
163
|
except ImportError:
|
|
147
164
|
raise ImportError("openai package not installed")
|
|
148
165
|
|
|
166
|
+
async def set_api_key(self, api_key: str) -> None:
|
|
167
|
+
"""Set the API key at runtime and reinitialize."""
|
|
168
|
+
self._api_key = api_key
|
|
169
|
+
await self.initialize()
|
|
170
|
+
|
|
149
171
|
async def cleanup(self) -> None:
|
|
150
172
|
if self._client:
|
|
151
173
|
await self._client.close()
|
|
@@ -173,6 +195,275 @@ class OpenAIService(LLMService):
|
|
|
173
195
|
|
|
174
196
|
return response.choices[0].message.content or ""
|
|
175
197
|
|
|
198
|
+
async def chat(
|
|
199
|
+
self,
|
|
200
|
+
messages: list[dict],
|
|
201
|
+
*,
|
|
202
|
+
temperature: float = 0.7,
|
|
203
|
+
max_tokens: Optional[int] = None,
|
|
204
|
+
think: bool = True,
|
|
205
|
+
) -> dict:
|
|
206
|
+
"""
|
|
207
|
+
Chat completion using the OpenAI API.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
messages: List of message dicts with 'role' and 'content'
|
|
211
|
+
temperature: Sampling temperature
|
|
212
|
+
max_tokens: Maximum tokens to generate
|
|
213
|
+
think: Unused (kept for interface consistency)
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
Dict with 'content', 'thinking', 'input_tokens', 'output_tokens'
|
|
217
|
+
"""
|
|
218
|
+
if not self._client:
|
|
219
|
+
raise RuntimeError("OpenAI client not initialized")
|
|
220
|
+
|
|
221
|
+
response = await self._client.chat.completions.create(
|
|
222
|
+
model=self._model,
|
|
223
|
+
messages=messages,
|
|
224
|
+
temperature=temperature,
|
|
225
|
+
max_tokens=max_tokens,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
usage = response.usage
|
|
229
|
+
return {
|
|
230
|
+
"content": response.choices[0].message.content or "",
|
|
231
|
+
"thinking": None,
|
|
232
|
+
"input_tokens": usage.prompt_tokens if usage else None,
|
|
233
|
+
"output_tokens": usage.completion_tokens if usage else None,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class GeminiService(LLMService):
|
|
238
|
+
"""Google Gemini API service."""
|
|
239
|
+
|
|
240
|
+
service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "gemini"
|
|
241
|
+
|
|
242
|
+
def __init__(
|
|
243
|
+
self,
|
|
244
|
+
api_key: Optional[str] = None,
|
|
245
|
+
model: Optional[str] = None,
|
|
246
|
+
):
|
|
247
|
+
settings = get_settings()
|
|
248
|
+
self._api_key = api_key or settings.gemini_api_key
|
|
249
|
+
self._model = model or settings.tether_gemini_model
|
|
250
|
+
self._client = None
|
|
251
|
+
self._ready = False
|
|
252
|
+
self._needs_key = False
|
|
253
|
+
self._available_models: list[str] = []
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def model_name(self) -> str:
|
|
257
|
+
return self._model
|
|
258
|
+
|
|
259
|
+
@property
|
|
260
|
+
def needs_api_key(self) -> bool:
|
|
261
|
+
return self._needs_key
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def available_models(self) -> list[str]:
|
|
265
|
+
"""List of available models (populated after initialize)."""
|
|
266
|
+
return self._available_models
|
|
267
|
+
|
|
268
|
+
async def initialize(self) -> None:
|
|
269
|
+
if not self._api_key:
|
|
270
|
+
self._needs_key = True
|
|
271
|
+
return
|
|
272
|
+
self._needs_key = False
|
|
273
|
+
try:
|
|
274
|
+
from google import genai
|
|
275
|
+
|
|
276
|
+
self._client = genai.Client(api_key=self._api_key)
|
|
277
|
+
|
|
278
|
+
# Discover available models
|
|
279
|
+
discovery = await discover_gemini_models(self._client)
|
|
280
|
+
if discovery.available:
|
|
281
|
+
self._available_models = discovery.models
|
|
282
|
+
else:
|
|
283
|
+
print(f"Warning: Could not discover Gemini models: {discovery.error}")
|
|
284
|
+
# Fall back to just the configured model
|
|
285
|
+
self._available_models = [self._model]
|
|
286
|
+
|
|
287
|
+
# Verify configured model is available
|
|
288
|
+
if self._available_models and self._model not in self._available_models:
|
|
289
|
+
available_str = ", ".join(self._available_models[:5])
|
|
290
|
+
if len(self._available_models) > 5:
|
|
291
|
+
available_str += f", ... ({len(self._available_models) - 5} more)"
|
|
292
|
+
print(
|
|
293
|
+
f"Warning: Model '{self._model}' not found in available models. "
|
|
294
|
+
f"Available: {available_str}. "
|
|
295
|
+
f"It may still work if you have access."
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
self._ready = True
|
|
299
|
+
except ImportError:
|
|
300
|
+
raise ImportError(
|
|
301
|
+
"google-genai package not installed. Install it with:\n"
|
|
302
|
+
" pip install google-genai\n"
|
|
303
|
+
"Or: uv add google-genai"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
async def set_api_key(self, api_key: str) -> None:
|
|
307
|
+
"""Set the API key at runtime and reinitialize."""
|
|
308
|
+
self._api_key = api_key
|
|
309
|
+
await self.initialize()
|
|
310
|
+
|
|
311
|
+
async def cleanup(self) -> None:
|
|
312
|
+
self._client = None
|
|
313
|
+
self._ready = False
|
|
314
|
+
|
|
315
|
+
def is_ready(self) -> bool:
|
|
316
|
+
return self._ready and self._client is not None
|
|
317
|
+
|
|
318
|
+
async def complete(
|
|
319
|
+
self,
|
|
320
|
+
prompt: str,
|
|
321
|
+
*,
|
|
322
|
+
temperature: float = 0.7,
|
|
323
|
+
max_tokens: Optional[int] = None,
|
|
324
|
+
) -> str:
|
|
325
|
+
if not self._client:
|
|
326
|
+
raise RuntimeError("Gemini client not initialized")
|
|
327
|
+
|
|
328
|
+
from google.genai import types
|
|
329
|
+
|
|
330
|
+
config = types.GenerateContentConfig(
|
|
331
|
+
temperature=temperature,
|
|
332
|
+
max_output_tokens=max_tokens,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
response = await self._client.aio.models.generate_content(
|
|
336
|
+
model=self._model,
|
|
337
|
+
contents=prompt,
|
|
338
|
+
config=config,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
return response.text or ""
|
|
342
|
+
|
|
343
|
+
async def chat(
|
|
344
|
+
self,
|
|
345
|
+
messages: list[dict],
|
|
346
|
+
*,
|
|
347
|
+
temperature: float = 0.7,
|
|
348
|
+
max_tokens: Optional[int] = None,
|
|
349
|
+
think: bool = True,
|
|
350
|
+
) -> dict:
|
|
351
|
+
"""
|
|
352
|
+
Chat completion using the Gemini API.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
messages: List of message dicts with 'role' and 'content'
|
|
356
|
+
temperature: Sampling temperature
|
|
357
|
+
max_tokens: Maximum tokens to generate
|
|
358
|
+
think: Enable thinking for supported models (default: True)
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Dict with 'content' and optionally 'thinking' keys
|
|
362
|
+
"""
|
|
363
|
+
if not self._client:
|
|
364
|
+
raise RuntimeError("Gemini client not initialized")
|
|
365
|
+
|
|
366
|
+
from google.genai import types
|
|
367
|
+
|
|
368
|
+
# Extract system instruction from messages
|
|
369
|
+
system_instruction = None
|
|
370
|
+
chat_messages = []
|
|
371
|
+
for msg in messages:
|
|
372
|
+
if msg["role"] == "system":
|
|
373
|
+
system_instruction = msg["content"]
|
|
374
|
+
else:
|
|
375
|
+
chat_messages.append(msg)
|
|
376
|
+
|
|
377
|
+
# Build history (all messages except the last one)
|
|
378
|
+
history = []
|
|
379
|
+
for msg in chat_messages[:-1]:
|
|
380
|
+
role = "model" if msg["role"] == "assistant" else msg["role"]
|
|
381
|
+
history.append(
|
|
382
|
+
types.Content(
|
|
383
|
+
role=role,
|
|
384
|
+
parts=[types.Part.from_text(text=msg["content"])],
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Build config
|
|
389
|
+
config = types.GenerateContentConfig(
|
|
390
|
+
temperature=temperature,
|
|
391
|
+
max_output_tokens=max_tokens,
|
|
392
|
+
system_instruction=system_instruction,
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# Enable thinking for 2.5 models
|
|
396
|
+
is_thinking_model = "2.5" in self._model
|
|
397
|
+
if think and is_thinking_model:
|
|
398
|
+
config.thinking_config = types.ThinkingConfig(
|
|
399
|
+
thinking_budget=8192,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Create chat and send current message
|
|
403
|
+
chat_session = self._client.aio.chats.create(
|
|
404
|
+
model=self._model,
|
|
405
|
+
history=history,
|
|
406
|
+
config=config,
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
current_message = chat_messages[-1]["content"] if chat_messages else ""
|
|
410
|
+
response = await chat_session.send_message(current_message)
|
|
411
|
+
|
|
412
|
+
# Parse response parts for thinking content
|
|
413
|
+
thinking_text = None
|
|
414
|
+
content_text = ""
|
|
415
|
+
|
|
416
|
+
if response.candidates and response.candidates[0].content:
|
|
417
|
+
for part in response.candidates[0].content.parts:
|
|
418
|
+
if hasattr(part, "thought") and part.thought:
|
|
419
|
+
thinking_text = (thinking_text or "") + (part.text or "")
|
|
420
|
+
else:
|
|
421
|
+
content_text += part.text or ""
|
|
422
|
+
else:
|
|
423
|
+
content_text = response.text or ""
|
|
424
|
+
|
|
425
|
+
usage = response.usage_metadata
|
|
426
|
+
return {
|
|
427
|
+
"content": content_text,
|
|
428
|
+
"thinking": thinking_text,
|
|
429
|
+
"input_tokens": usage.prompt_token_count if usage else None,
|
|
430
|
+
"output_tokens": usage.candidates_token_count if usage else None,
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
@dataclass
|
|
435
|
+
class GeminiDiscoveryResult:
|
|
436
|
+
"""Result of Gemini model discovery."""
|
|
437
|
+
|
|
438
|
+
available: bool
|
|
439
|
+
models: list[str]
|
|
440
|
+
error: Optional[str] = None
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
async def discover_gemini_models(client) -> GeminiDiscoveryResult:
|
|
444
|
+
"""Discover available Gemini models from the API."""
|
|
445
|
+
try:
|
|
446
|
+
loop = asyncio.get_event_loop()
|
|
447
|
+
response = await loop.run_in_executor(None, client.models.list)
|
|
448
|
+
models = []
|
|
449
|
+
for model in response:
|
|
450
|
+
# Only include models that support generateContent
|
|
451
|
+
actions = getattr(model, "supported_actions", None)
|
|
452
|
+
if actions and "generateContent" in actions:
|
|
453
|
+
name = model.name or ""
|
|
454
|
+
# Strip "models/" prefix
|
|
455
|
+
short_name = name.removeprefix("models/")
|
|
456
|
+
if short_name:
|
|
457
|
+
models.append(short_name)
|
|
458
|
+
models.sort()
|
|
459
|
+
return GeminiDiscoveryResult(available=True, models=models)
|
|
460
|
+
except Exception as e:
|
|
461
|
+
return GeminiDiscoveryResult(
|
|
462
|
+
available=False,
|
|
463
|
+
models=[],
|
|
464
|
+
error=f"Failed to list Gemini models: {str(e)}",
|
|
465
|
+
)
|
|
466
|
+
|
|
176
467
|
|
|
177
468
|
async def discover_ollama(base_url: Optional[str] = None) -> OllamaDiscoveryResult:
|
|
178
469
|
"""Discover Ollama instance and available models."""
|
|
@@ -212,7 +503,7 @@ async def discover_ollama(base_url: Optional[str] = None) -> OllamaDiscoveryResu
|
|
|
212
503
|
class OllamaService(LLMService):
|
|
213
504
|
"""Ollama LLM service."""
|
|
214
505
|
|
|
215
|
-
service_type: Literal["local", "ollama", "openai", "mock"] = "ollama"
|
|
506
|
+
service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "ollama"
|
|
216
507
|
|
|
217
508
|
def __init__(
|
|
218
509
|
self,
|
|
@@ -386,13 +677,15 @@ class OllamaService(LLMService):
|
|
|
386
677
|
return {
|
|
387
678
|
"content": message.get("content", ""),
|
|
388
679
|
"thinking": message.get("thinking"), # None if not a thinking model
|
|
680
|
+
"input_tokens": data.get("prompt_eval_count"),
|
|
681
|
+
"output_tokens": data.get("eval_count"),
|
|
389
682
|
}
|
|
390
683
|
|
|
391
684
|
|
|
392
685
|
class LocalLLMService(LLMService):
|
|
393
686
|
"""Local LLM service using llama-cpp-python."""
|
|
394
687
|
|
|
395
|
-
service_type: Literal["local", "ollama", "openai", "mock"] = "local"
|
|
688
|
+
service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "local"
|
|
396
689
|
|
|
397
690
|
def __init__(
|
|
398
691
|
self,
|
|
@@ -518,6 +811,8 @@ def get_llm_service() -> LLMService:
|
|
|
518
811
|
|
|
519
812
|
if backend == "openai":
|
|
520
813
|
return OpenAIService()
|
|
814
|
+
elif backend == "gemini":
|
|
815
|
+
return GeminiService()
|
|
521
816
|
elif backend == "ollama":
|
|
522
817
|
return OllamaService()
|
|
523
818
|
elif backend == "local":
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Model pricing data (USD per million tokens)."""
|
|
2
|
+
|
|
3
|
+
# (input_cost_per_m, output_cost_per_m)
|
|
4
|
+
MODEL_PRICING: dict[str, tuple[float, float]] = {
|
|
5
|
+
# OpenAI
|
|
6
|
+
"gpt-4o": (2.50, 10.00),
|
|
7
|
+
"gpt-4o-mini": (0.15, 0.60),
|
|
8
|
+
"gpt-4.1": (2.00, 8.00),
|
|
9
|
+
"gpt-4.1-mini": (0.40, 1.60),
|
|
10
|
+
"gpt-4.1-nano": (0.10, 0.40),
|
|
11
|
+
"o3-mini": (1.10, 4.40),
|
|
12
|
+
# Gemini
|
|
13
|
+
"gemini-2.0-flash": (0.10, 0.40),
|
|
14
|
+
"gemini-2.5-flash": (0.15, 0.60),
|
|
15
|
+
"gemini-2.5-pro": (1.25, 10.00),
|
|
16
|
+
"gemini-2.0-flash-lite": (0.075, 0.30),
|
|
17
|
+
# Ollama / Local — free
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def estimate_cost(
|
|
22
|
+
model: str,
|
|
23
|
+
input_tokens: int,
|
|
24
|
+
output_tokens: int,
|
|
25
|
+
) -> float | None:
|
|
26
|
+
"""Estimate cost in USD. Returns None if model not in pricing table."""
|
|
27
|
+
# Try exact match first, then prefix match
|
|
28
|
+
pricing = MODEL_PRICING.get(model)
|
|
29
|
+
if not pricing:
|
|
30
|
+
for key, val in MODEL_PRICING.items():
|
|
31
|
+
if model.startswith(key):
|
|
32
|
+
pricing = val
|
|
33
|
+
break
|
|
34
|
+
if not pricing:
|
|
35
|
+
return None
|
|
36
|
+
input_cost, output_cost = pricing
|
|
37
|
+
return (input_tokens * input_cost + output_tokens * output_cost) / 1_000_000
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { useBackendStatus } from "./hooks/useApi";
|
|
2
2
|
import { Chat } from "./components/Chat";
|
|
3
3
|
import { ModelStatus } from "./components/ModelStatus";
|
|
4
|
+
import { ApiKeyForm } from "./components/ApiKeyForm";
|
|
4
5
|
import "./App.css";
|
|
5
6
|
|
|
6
7
|
function App() {
|
|
7
|
-
const { status, health, modelInfo, error, retry, changeModel } =
|
|
8
|
+
const { status, health, modelInfo, error, retry, changeModel, submitApiKey } =
|
|
8
9
|
useBackendStatus();
|
|
9
10
|
|
|
10
11
|
return (
|
|
@@ -45,6 +46,10 @@ function App() {
|
|
|
45
46
|
</div>
|
|
46
47
|
)}
|
|
47
48
|
|
|
49
|
+
{status === "needs-api-key" && (
|
|
50
|
+
<ApiKeyForm modelInfo={modelInfo} onSubmit={submitApiKey} />
|
|
51
|
+
)}
|
|
52
|
+
|
|
48
53
|
{status === "error" && (
|
|
49
54
|
<div className="error">
|
|
50
55
|
<p>Failed to connect</p>
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
.api-key-form-container {
|
|
2
|
+
flex: 1;
|
|
3
|
+
display: flex;
|
|
4
|
+
align-items: center;
|
|
5
|
+
justify-content: center;
|
|
6
|
+
padding: 2rem;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
.api-key-form {
|
|
10
|
+
display: flex;
|
|
11
|
+
flex-direction: column;
|
|
12
|
+
align-items: center;
|
|
13
|
+
gap: 0.75rem;
|
|
14
|
+
max-width: 400px;
|
|
15
|
+
width: 100%;
|
|
16
|
+
padding: 2rem;
|
|
17
|
+
background-color: var(--color-surface);
|
|
18
|
+
border: 1px solid var(--color-border);
|
|
19
|
+
border-radius: var(--radius);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
.api-key-form h2 {
|
|
23
|
+
font-size: 1.25rem;
|
|
24
|
+
font-weight: 600;
|
|
25
|
+
margin-bottom: 0.25rem;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
.api-key-description {
|
|
29
|
+
color: var(--color-text-muted);
|
|
30
|
+
font-size: 0.875rem;
|
|
31
|
+
text-align: center;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
.api-key-help-link {
|
|
35
|
+
color: var(--color-primary);
|
|
36
|
+
font-size: 0.875rem;
|
|
37
|
+
text-decoration: none;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
.api-key-help-link:hover {
|
|
41
|
+
text-decoration: underline;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
.api-key-input {
|
|
45
|
+
width: 100%;
|
|
46
|
+
margin-top: 0.25rem;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
.api-key-error {
|
|
50
|
+
color: var(--color-error);
|
|
51
|
+
font-size: 0.8125rem;
|
|
52
|
+
text-align: center;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
.api-key-submit {
|
|
56
|
+
width: 100%;
|
|
57
|
+
margin-top: 0.25rem;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
.api-key-hint {
|
|
61
|
+
color: var(--color-text-muted);
|
|
62
|
+
font-size: 0.75rem;
|
|
63
|
+
opacity: 0.7;
|
|
64
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { useState } from "react";
|
|
2
|
+
import type { ModelsResponse } from "../hooks/useApi";
|
|
3
|
+
import "./ApiKeyForm.css";
|
|
4
|
+
|
|
5
|
+
interface BackendInfo {
|
|
6
|
+
name: string;
|
|
7
|
+
placeholder: string;
|
|
8
|
+
helpUrl: string;
|
|
9
|
+
helpText: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const BACKEND_INFO: Record<string, BackendInfo> = {
|
|
13
|
+
openai: {
|
|
14
|
+
name: "OpenAI",
|
|
15
|
+
placeholder: "sk-...",
|
|
16
|
+
helpUrl: "https://platform.openai.com/api-keys",
|
|
17
|
+
helpText: "Get an API key",
|
|
18
|
+
},
|
|
19
|
+
gemini: {
|
|
20
|
+
name: "Google Gemini",
|
|
21
|
+
placeholder: "AI...",
|
|
22
|
+
helpUrl: "https://aistudio.google.com/apikey",
|
|
23
|
+
helpText: "Get an API key",
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
interface ApiKeyFormProps {
|
|
28
|
+
modelInfo: ModelsResponse | null;
|
|
29
|
+
onSubmit: (apiKey: string) => Promise<void>;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function ApiKeyForm({ modelInfo, onSubmit }: ApiKeyFormProps) {
|
|
33
|
+
const [apiKey, setApiKey] = useState("");
|
|
34
|
+
const [isSubmitting, setIsSubmitting] = useState(false);
|
|
35
|
+
const [error, setError] = useState<string | null>(null);
|
|
36
|
+
|
|
37
|
+
const backend = modelInfo?.backend || "openai";
|
|
38
|
+
const info = BACKEND_INFO[backend] || BACKEND_INFO.openai;
|
|
39
|
+
|
|
40
|
+
const handleSubmit = async (e: React.FormEvent) => {
|
|
41
|
+
e.preventDefault();
|
|
42
|
+
if (!apiKey.trim() || isSubmitting) return;
|
|
43
|
+
|
|
44
|
+
setIsSubmitting(true);
|
|
45
|
+
setError(null);
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
await onSubmit(apiKey.trim());
|
|
49
|
+
} catch (err) {
|
|
50
|
+
setError(err instanceof Error ? err.message : "Failed to set API key");
|
|
51
|
+
} finally {
|
|
52
|
+
setIsSubmitting(false);
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
return (
|
|
57
|
+
<div className="api-key-form-container">
|
|
58
|
+
<form className="api-key-form" onSubmit={handleSubmit}>
|
|
59
|
+
<h2>API Key Required</h2>
|
|
60
|
+
<p className="api-key-description">
|
|
61
|
+
Enter your {info.name} API key to get started.
|
|
62
|
+
</p>
|
|
63
|
+
<a
|
|
64
|
+
className="api-key-help-link"
|
|
65
|
+
href={info.helpUrl}
|
|
66
|
+
target="_blank"
|
|
67
|
+
rel="noopener noreferrer"
|
|
68
|
+
>
|
|
69
|
+
{info.helpText}
|
|
70
|
+
</a>
|
|
71
|
+
<input
|
|
72
|
+
type="password"
|
|
73
|
+
className="api-key-input"
|
|
74
|
+
value={apiKey}
|
|
75
|
+
onChange={(e) => setApiKey(e.target.value)}
|
|
76
|
+
placeholder={info.placeholder}
|
|
77
|
+
autoFocus
|
|
78
|
+
disabled={isSubmitting}
|
|
79
|
+
/>
|
|
80
|
+
{error && <p className="api-key-error">{error}</p>}
|
|
81
|
+
<button
|
|
82
|
+
type="submit"
|
|
83
|
+
className="api-key-submit"
|
|
84
|
+
disabled={!apiKey.trim() || isSubmitting}
|
|
85
|
+
>
|
|
86
|
+
{isSubmitting ? "Connecting..." : "Connect"}
|
|
87
|
+
</button>
|
|
88
|
+
<p className="api-key-hint">
|
|
89
|
+
Stored in memory only — not saved to disk.
|
|
90
|
+
</p>
|
|
91
|
+
</form>
|
|
92
|
+
</div>
|
|
93
|
+
);
|
|
94
|
+
}
|
|
@@ -185,6 +185,18 @@
|
|
|
185
185
|
margin-bottom: 0;
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
+
/* Token usage footer */
|
|
189
|
+
.message-usage {
|
|
190
|
+
display: flex;
|
|
191
|
+
gap: 0.75rem;
|
|
192
|
+
margin-top: 0.5rem;
|
|
193
|
+
padding-top: 0.375rem;
|
|
194
|
+
border-top: 1px solid var(--color-border);
|
|
195
|
+
font-size: 0.6875rem;
|
|
196
|
+
color: var(--color-text-muted);
|
|
197
|
+
opacity: 0.7;
|
|
198
|
+
}
|
|
199
|
+
|
|
188
200
|
/* Message images */
|
|
189
201
|
.message-images {
|
|
190
202
|
display: flex;
|
|
@@ -57,6 +57,24 @@ export function ChatMessage({ message }: ChatMessageProps) {
|
|
|
57
57
|
<div className="message-content">
|
|
58
58
|
{isUser ? message.content : <Markdown>{message.content}</Markdown>}
|
|
59
59
|
</div>
|
|
60
|
+
{!isUser && (message.input_tokens || message.output_tokens) && (
|
|
61
|
+
<div className="message-usage">
|
|
62
|
+
{message.input_tokens != null && (
|
|
63
|
+
<span>{message.input_tokens} in</span>
|
|
64
|
+
)}
|
|
65
|
+
{message.output_tokens != null && (
|
|
66
|
+
<span>{message.output_tokens} out</span>
|
|
67
|
+
)}
|
|
68
|
+
{message.cost != null && (
|
|
69
|
+
<span>
|
|
70
|
+
$
|
|
71
|
+
{message.cost < 0.01
|
|
72
|
+
? message.cost.toFixed(4)
|
|
73
|
+
: message.cost.toFixed(2)}
|
|
74
|
+
</span>
|
|
75
|
+
)}
|
|
76
|
+
</div>
|
|
77
|
+
)}
|
|
60
78
|
</div>
|
|
61
79
|
);
|
|
62
80
|
}
|
|
@@ -28,6 +28,8 @@ export function ModelStatus({
|
|
|
28
28
|
return "var(--color-success)";
|
|
29
29
|
case "connecting":
|
|
30
30
|
return "var(--color-warning)";
|
|
31
|
+
case "needs-api-key":
|
|
32
|
+
return "var(--color-warning)";
|
|
31
33
|
case "error":
|
|
32
34
|
case "disconnected":
|
|
33
35
|
return "var(--color-error)";
|
|
@@ -41,6 +43,8 @@ export function ModelStatus({
|
|
|
41
43
|
return health?.model_loaded ? "Ready" : "Connected";
|
|
42
44
|
case "connecting":
|
|
43
45
|
return "Connecting...";
|
|
46
|
+
case "needs-api-key":
|
|
47
|
+
return "API Key Required";
|
|
44
48
|
case "error":
|
|
45
49
|
return "Error";
|
|
46
50
|
case "disconnected":
|
|
@@ -7,6 +7,9 @@ export interface ChatMessage {
|
|
|
7
7
|
content: string;
|
|
8
8
|
images?: string[];
|
|
9
9
|
thinking?: string;
|
|
10
|
+
input_tokens?: number;
|
|
11
|
+
output_tokens?: number;
|
|
12
|
+
cost?: number;
|
|
10
13
|
timestamp?: number;
|
|
11
14
|
}
|
|
12
15
|
|
|
@@ -23,7 +26,9 @@ export interface ChatRequest {
|
|
|
23
26
|
export interface ChatResponse {
|
|
24
27
|
response: string;
|
|
25
28
|
thinking?: string;
|
|
26
|
-
|
|
29
|
+
input_tokens?: number;
|
|
30
|
+
output_tokens?: number;
|
|
31
|
+
cost?: number;
|
|
27
32
|
model?: string;
|
|
28
33
|
finish_reason?: "stop" | "length" | "error";
|
|
29
34
|
}
|
|
@@ -40,6 +45,12 @@ export interface ModelsResponse {
|
|
|
40
45
|
models: string[];
|
|
41
46
|
backend: string;
|
|
42
47
|
error: string | null;
|
|
48
|
+
needs_api_key?: boolean;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface SetApiKeyResponse {
|
|
52
|
+
success: boolean;
|
|
53
|
+
message: string;
|
|
43
54
|
}
|
|
44
55
|
|
|
45
56
|
export interface SwitchModelResponse {
|
|
@@ -54,6 +65,7 @@ export type ConnectionStatus =
|
|
|
54
65
|
| "loading-model"
|
|
55
66
|
| "connected"
|
|
56
67
|
| "disconnected"
|
|
68
|
+
| "needs-api-key"
|
|
57
69
|
| "error";
|
|
58
70
|
|
|
59
71
|
// Configuration
|
|
@@ -121,6 +133,13 @@ async function switchModel(model: string): Promise<SwitchModelResponse> {
|
|
|
121
133
|
});
|
|
122
134
|
}
|
|
123
135
|
|
|
136
|
+
async function sendApiKey(apiKey: string): Promise<SetApiKeyResponse> {
|
|
137
|
+
return apiFetch<SetApiKeyResponse>("/api-key", {
|
|
138
|
+
method: "POST",
|
|
139
|
+
body: JSON.stringify({ api_key: apiKey }),
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
124
143
|
async function waitForBackend(): Promise<boolean> {
|
|
125
144
|
// First, get the correct API URL from Tauri
|
|
126
145
|
await getApiUrl();
|
|
@@ -177,8 +196,10 @@ export function useBackendStatus() {
|
|
|
177
196
|
setHealth(healthData);
|
|
178
197
|
setModelInfo(modelsData);
|
|
179
198
|
|
|
180
|
-
// Check if
|
|
181
|
-
if (
|
|
199
|
+
// Check if API key is needed
|
|
200
|
+
if (modelsData?.needs_api_key) {
|
|
201
|
+
setStatus("needs-api-key");
|
|
202
|
+
} else if (healthData.model_loaded) {
|
|
182
203
|
setStatus("connected");
|
|
183
204
|
} else if (modelsData?.error) {
|
|
184
205
|
setStatus("error");
|
|
@@ -266,7 +287,22 @@ export function useBackendStatus() {
|
|
|
266
287
|
}
|
|
267
288
|
}, []);
|
|
268
289
|
|
|
269
|
-
|
|
290
|
+
const submitApiKey = useCallback(async (apiKey: string) => {
|
|
291
|
+
const result = await sendApiKey(apiKey);
|
|
292
|
+
if (!result.success) {
|
|
293
|
+
throw new Error(result.message);
|
|
294
|
+
}
|
|
295
|
+
// Refresh health and models after successful key submission
|
|
296
|
+
const [healthData, modelsData] = await Promise.all([
|
|
297
|
+
checkHealth(),
|
|
298
|
+
fetchModels().catch(() => null),
|
|
299
|
+
]);
|
|
300
|
+
setHealth(healthData);
|
|
301
|
+
setModelInfo(modelsData);
|
|
302
|
+
setStatus("connected");
|
|
303
|
+
}, []);
|
|
304
|
+
|
|
305
|
+
return { status, health, modelInfo, error, retry, changeModel, submitApiKey };
|
|
270
306
|
}
|
|
271
307
|
|
|
272
308
|
export function useChat() {
|
|
@@ -302,6 +338,9 @@ export function useChat() {
|
|
|
302
338
|
role: "assistant",
|
|
303
339
|
content: response.response,
|
|
304
340
|
thinking: response.thinking,
|
|
341
|
+
input_tokens: response.input_tokens,
|
|
342
|
+
output_tokens: response.output_tokens,
|
|
343
|
+
cost: response.cost,
|
|
305
344
|
timestamp: Date.now(),
|
|
306
345
|
};
|
|
307
346
|
|