create-tether-app 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -297,6 +297,16 @@ async function scaffoldProject(options) {
297
297
  )
298
298
  );
299
299
  console.log();
300
+ } else if (options.template === "gemini") {
301
+ console.log(
302
+ chalk.dim(
303
+ " Note: Set your GEMINI_API_KEY in .env to use the Gemini API."
304
+ )
305
+ );
306
+ console.log(
307
+ chalk.dim(" Get an API key at: https://aistudio.google.com/apikey")
308
+ );
309
+ console.log();
300
310
  }
301
311
  if (options.useTailwind) {
302
312
  console.log(
@@ -363,11 +373,17 @@ async function customizeForTemplate(targetDir, options) {
363
373
  if (options.template === "ollama") {
364
374
  content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
365
375
  content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
376
+ content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
366
377
  } else if (options.template === "openai") {
367
378
  content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
379
+ content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
380
+ } else if (options.template === "gemini") {
381
+ content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
382
+ content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
368
383
  } else if (options.template === "custom") {
369
384
  content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
370
385
  content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
386
+ content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
371
387
  }
372
388
  await fs2.writeFile(pyprojectPath, content);
373
389
  }
@@ -380,6 +396,8 @@ async function customizeForTemplate(targetDir, options) {
380
396
  content = content.replace(backendRegex, '$1"ollama"');
381
397
  } else if (options.template === "openai") {
382
398
  content = content.replace(backendRegex, '$1"openai"');
399
+ } else if (options.template === "gemini") {
400
+ content = content.replace(backendRegex, '$1"gemini"');
383
401
  } else if (options.template === "custom") {
384
402
  content = content.replace(backendRegex, '$1"mock"');
385
403
  }
@@ -412,7 +430,10 @@ async function removeExampleComponents(targetDir) {
412
430
  if (await fs2.pathExists(appPath)) {
413
431
  let content = await fs2.readFile(appPath, "utf-8");
414
432
  content = content.replace(/import.*Chat.*from.*\n?/g, "");
415
- content = content.replace(/<Chat\s*\/>/g, "");
433
+ content = content.replace(
434
+ /\s*\{status === "connected" && <Chat\s*\/>\}/g,
435
+ ""
436
+ );
416
437
  await fs2.writeFile(appPath, content);
417
438
  }
418
439
  }
@@ -456,6 +477,10 @@ async function promptForOptions(options) {
456
477
  name: "OpenAI API - Use GPT models via API",
457
478
  value: "openai"
458
479
  },
480
+ {
481
+ name: "Google Gemini API - Use Gemini models via API",
482
+ value: "gemini"
483
+ },
459
484
  {
460
485
  name: "Custom - Bare FastAPI setup",
461
486
  value: "custom"
@@ -503,6 +528,11 @@ var LLM_TEMPLATES = [
503
528
  description: "Use OpenAI API (requires API key)",
504
529
  details: "Uses GPT models via the OpenAI API. Requires OPENAI_API_KEY env var."
505
530
  },
531
+ {
532
+ name: "gemini",
533
+ description: "Use Google Gemini API (requires API key)",
534
+ details: "Uses Gemini models via the Google AI API. Requires GEMINI_API_KEY env var."
535
+ },
506
536
  {
507
537
  name: "custom",
508
538
  description: "Bare FastAPI setup, no LLM integration",
@@ -513,7 +543,7 @@ function createCli() {
513
543
  const program = new Command();
514
544
  program.name("create-tether-app").description("Create a new Tether AI/ML desktop application").version(getPackageVersion()).argument("[project-name]", "Name of the project to create").option(
515
545
  "--llm <provider>",
516
- "LLM backend: ollama (default), local-llm, openai, custom"
546
+ "LLM backend: ollama (default), local-llm, openai, gemini, custom"
517
547
  ).option("-t, --template <template>", "Alias for --llm").option("-y, --yes", "Skip prompts and use defaults (ollama, with example)").option("--skip-prompts", "Alias for --yes").option("--skip-install", "Skip dependency installation").option("--use-npm", "Use npm instead of pnpm").option("--use-yarn", "Use yarn instead of pnpm").option("--dry-run", "Show what would be created without making changes").option("--no-example", "Skip example chat component").option("--tailwind", "Include Tailwind CSS setup").option("--no-tailwind", "Skip Tailwind CSS setup").option("-v, --verbose", "Show detailed output").option("--list-templates", "List available LLM templates").option("--check", "Check if all required dependencies are installed").addHelpText(
518
548
  "after",
519
549
  `
@@ -543,6 +573,7 @@ LLM Backends:
543
573
  ollama Run models locally via Ollama (recommended)
544
574
  local-llm Embed models directly with llama-cpp-python
545
575
  openai Use OpenAI API (requires API key)
576
+ gemini Use Google Gemini API (requires API key)
546
577
  custom Bare FastAPI setup, no LLM integration
547
578
  `
548
579
  ).action(async (projectName, options) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "create-tether-app",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "CLI tool for scaffolding Tether AI/ML desktop applications",
5
5
  "type": "module",
6
6
  "bin": {
@@ -2,7 +2,7 @@
2
2
  TETHER_HOST=127.0.0.1
3
3
  TETHER_PORT=8000
4
4
 
5
- # LLM Backend: local, openai, or mock
5
+ # LLM Backend: local, ollama, openai, gemini, or mock
6
6
  TETHER_LLM_BACKEND=local
7
7
 
8
8
  # For local LLM (llama-cpp-python)
@@ -13,6 +13,10 @@ TETHER_CONTEXT_LENGTH=4096
13
13
  OPENAI_API_KEY=sk-your-api-key
14
14
  TETHER_OPENAI_MODEL=gpt-4o-mini
15
15
 
16
+ # For Google Gemini API
17
+ GEMINI_API_KEY=your-gemini-api-key
18
+ TETHER_GEMINI_MODEL=gemini-2.0-flash
19
+
16
20
  # Model parameters
17
21
  TETHER_DEFAULT_TEMPERATURE=0.7
18
22
  TETHER_DEFAULT_MAX_TOKENS=1024
@@ -8,7 +8,7 @@ from typing import AsyncIterator
8
8
  from fastapi import FastAPI
9
9
  from fastapi.middleware.cors import CORSMiddleware
10
10
 
11
- from app.routes import health, chat, models
11
+ from app.routes import health, chat, models, apikey
12
12
  from app.services.llm import get_llm_service
13
13
 
14
14
 
@@ -47,6 +47,7 @@ def create_app() -> FastAPI:
47
47
  app.include_router(health.router)
48
48
  app.include_router(chat.router)
49
49
  app.include_router(models.router)
50
+ app.include_router(apikey.router)
50
51
 
51
52
  return app
52
53
 
@@ -0,0 +1,57 @@
1
+ """
2
+ API key submission endpoint.
3
+ """
4
+
5
+ from fastapi import APIRouter, HTTPException, Request
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class SetApiKeyRequest(BaseModel):
10
+ api_key: str
11
+
12
+
13
+ class SetApiKeyResponse(BaseModel):
14
+ success: bool
15
+ message: str
16
+
17
+
18
+ router = APIRouter()
19
+
20
+
21
+ @router.post("/api-key", response_model=SetApiKeyResponse)
22
+ async def set_api_key(request: Request, body: SetApiKeyRequest) -> SetApiKeyResponse:
23
+ """
24
+ Set the API key for the current LLM service at runtime.
25
+
26
+ Only supported for backends that require an API key (OpenAI, Gemini).
27
+ The key is stored in memory only and not persisted to disk.
28
+ """
29
+ llm_service = getattr(request.app.state, "llm_service", None)
30
+
31
+ if not llm_service:
32
+ raise HTTPException(status_code=503, detail="No LLM service configured")
33
+
34
+ if not hasattr(llm_service, "set_api_key"):
35
+ raise HTTPException(
36
+ status_code=400,
37
+ detail=f"Backend '{llm_service.service_type}' does not support runtime API key configuration",
38
+ )
39
+
40
+ try:
41
+ await llm_service.set_api_key(body.api_key)
42
+ except Exception as e:
43
+ return SetApiKeyResponse(
44
+ success=False,
45
+ message=f"Failed to initialize with provided key: {str(e)}",
46
+ )
47
+
48
+ if llm_service.is_ready():
49
+ return SetApiKeyResponse(
50
+ success=True,
51
+ message="API key accepted. Service is ready.",
52
+ )
53
+
54
+ return SetApiKeyResponse(
55
+ success=False,
56
+ message="API key set but service failed to become ready.",
57
+ )
@@ -8,6 +8,8 @@ from typing import Literal, Optional
8
8
  from fastapi import APIRouter, HTTPException, Request
9
9
  from pydantic import BaseModel, Field
10
10
 
11
+ from app.services.pricing import estimate_cost
12
+
11
13
 
12
14
  class ChatMessage(BaseModel):
13
15
  role: Literal["user", "assistant", "system"]
@@ -43,8 +45,14 @@ class ChatResponse(BaseModel):
43
45
  thinking: Optional[str] = Field(
44
46
  default=None, description="Model's reasoning/thinking content (for thinking models)"
45
47
  )
46
- tokens_used: Optional[int] = Field(
47
- default=None, description="Number of tokens used"
48
+ input_tokens: Optional[int] = Field(
49
+ default=None, description="Input tokens used"
50
+ )
51
+ output_tokens: Optional[int] = Field(
52
+ default=None, description="Output tokens generated"
53
+ )
54
+ cost: Optional[float] = Field(
55
+ default=None, description="Estimated cost in USD"
48
56
  )
49
57
  model: Optional[str] = Field(default=None, description="Model used")
50
58
  finish_reason: Optional[Literal["stop", "length", "error"]] = Field(
@@ -112,6 +120,9 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
112
120
  use_thinking = False if has_images else (body.think if body.think is not None else True)
113
121
 
114
122
  # Use chat API if available (supports thinking models), fallback to complete
123
+ input_tokens = None
124
+ output_tokens = None
125
+
115
126
  if hasattr(llm_service, "chat"):
116
127
  result = await llm_service.chat(
117
128
  messages,
@@ -119,10 +130,12 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
119
130
  max_tokens=body.max_tokens,
120
131
  think=use_thinking,
121
132
  )
122
- # chat() returns dict with 'content' and 'thinking'
133
+ # chat() returns dict with 'content', 'thinking', and token counts
123
134
  if isinstance(result, dict):
124
135
  response = result.get("content", "")
125
136
  thinking = result.get("thinking")
137
+ input_tokens = result.get("input_tokens")
138
+ output_tokens = result.get("output_tokens")
126
139
  else:
127
140
  # Fallback if chat returns string
128
141
  response, thinking = parse_thinking_content(result)
@@ -141,9 +154,17 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
141
154
  )
142
155
  response, thinking = parse_thinking_content(raw_response)
143
156
 
157
+ # Estimate cost if token counts are available
158
+ cost = None
159
+ if input_tokens is not None and output_tokens is not None:
160
+ cost = estimate_cost(llm_service.model_name, input_tokens, output_tokens)
161
+
144
162
  return ChatResponse(
145
163
  response=response,
146
164
  thinking=thinking,
165
+ input_tokens=input_tokens,
166
+ output_tokens=output_tokens,
167
+ cost=cost,
147
168
  model=llm_service.model_name,
148
169
  finish_reason="stop",
149
170
  )
@@ -5,7 +5,7 @@ Model discovery and switching endpoints.
5
5
  from fastapi import APIRouter, HTTPException, Request
6
6
  from pydantic import BaseModel, Field
7
7
 
8
- from app.services.llm import discover_ollama, get_ollama_base_url
8
+ from app.services.llm import discover_gemini_models, discover_ollama, get_ollama_base_url
9
9
 
10
10
 
11
11
  class ModelsResponse(BaseModel):
@@ -14,6 +14,7 @@ class ModelsResponse(BaseModel):
14
14
  models: list[str]
15
15
  backend: str
16
16
  error: str | None = None
17
+ needs_api_key: bool = False
17
18
 
18
19
 
19
20
  class SwitchModelRequest(BaseModel):
@@ -62,12 +63,31 @@ async def list_models(request: Request) -> ModelsResponse:
62
63
  error=discovery.error,
63
64
  )
64
65
 
66
+ # For Gemini, use discovered models
67
+ if backend == "gemini":
68
+ models = getattr(llm_service, "_available_models", [])
69
+ if not models and llm_service.is_ready():
70
+ # Re-discover if models list is empty but service is ready
71
+ client = getattr(llm_service, "_client", None)
72
+ if client:
73
+ discovery = await discover_gemini_models(client)
74
+ if discovery.available:
75
+ models = discovery.models
76
+ return ModelsResponse(
77
+ available=llm_service.is_ready() or llm_service.needs_api_key,
78
+ current_model=llm_service.model_name if llm_service.is_ready() else None,
79
+ models=models,
80
+ backend=backend,
81
+ needs_api_key=llm_service.needs_api_key,
82
+ )
83
+
65
84
  # For other backends, return the configured model
66
85
  return ModelsResponse(
67
86
  available=llm_service.is_ready(),
68
87
  current_model=llm_service.model_name,
69
88
  models=[llm_service.model_name] if llm_service.is_ready() else [],
70
89
  backend=backend,
90
+ needs_api_key=llm_service.needs_api_key,
71
91
  )
72
92
 
73
93
 
@@ -119,6 +139,24 @@ async def switch_model(request: Request, body: SwitchModelRequest) -> SwitchMode
119
139
  message=f"Switched from {previous_model} to {body.model}",
120
140
  )
121
141
 
142
+ # For Gemini, switch to the requested model
143
+ if backend == "gemini":
144
+ available = getattr(llm_service, "_available_models", [])
145
+ if body.model not in available:
146
+ raise HTTPException(
147
+ status_code=404,
148
+ detail=f"Model '{body.model}' not found. Available: {', '.join(available)}",
149
+ )
150
+
151
+ llm_service._model = body.model
152
+
153
+ return SwitchModelResponse(
154
+ success=True,
155
+ previous_model=previous_model,
156
+ current_model=body.model,
157
+ message=f"Switched from {previous_model} to {body.model}",
158
+ )
159
+
122
160
  # Other backends don't support runtime switching
123
161
  raise HTTPException(
124
162
  status_code=400,
@@ -41,7 +41,7 @@ class LLMSettings(BaseSettings):
41
41
  extra="ignore",
42
42
  )
43
43
 
44
- tether_llm_backend: Literal["local", "ollama", "openai", "mock"] = "ollama"
44
+ tether_llm_backend: Literal["local", "ollama", "openai", "gemini", "mock"] = "ollama"
45
45
  tether_model_path: Optional[str] = None
46
46
  openai_api_key: Optional[str] = None
47
47
  tether_openai_model: str = "gpt-4o-mini"
@@ -49,6 +49,9 @@ class LLMSettings(BaseSettings):
49
49
  # Ollama settings - model can be empty to auto-select
50
50
  tether_ollama_model: Optional[str] = None
51
51
  tether_ollama_base_url: Optional[str] = None # Uses OLLAMA_HOST or default
52
+ # Gemini settings
53
+ gemini_api_key: Optional[str] = None
54
+ tether_gemini_model: str = "gemini-2.0-flash"
52
55
 
53
56
 
54
57
  @lru_cache
@@ -59,9 +62,14 @@ def get_settings() -> LLMSettings:
59
62
  class LLMService(ABC):
60
63
  """Abstract base class for LLM services."""
61
64
 
62
- service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
65
+ service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "mock"
63
66
  model_name: str = "unknown"
64
67
 
68
+ @property
69
+ def needs_api_key(self) -> bool:
70
+ """Whether the service is waiting for an API key."""
71
+ return False
72
+
65
73
  @abstractmethod
66
74
  async def initialize(self) -> None:
67
75
  """Initialize the service."""
@@ -92,7 +100,7 @@ class LLMService(ABC):
92
100
  class MockLLMService(LLMService):
93
101
  """Mock LLM service for testing."""
94
102
 
95
- service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
103
+ service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "mock"
96
104
  model_name = "mock"
97
105
 
98
106
  def __init__(self):
@@ -120,7 +128,7 @@ class MockLLMService(LLMService):
120
128
  class OpenAIService(LLMService):
121
129
  """OpenAI API service."""
122
130
 
123
- service_type: Literal["local", "ollama", "openai", "mock"] = "openai"
131
+ service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "openai"
124
132
 
125
133
  def __init__(
126
134
  self,
@@ -132,12 +140,21 @@ class OpenAIService(LLMService):
132
140
  self._model = model or settings.tether_openai_model
133
141
  self._client = None
134
142
  self._ready = False
143
+ self._needs_key = False
135
144
 
136
145
  @property
137
146
  def model_name(self) -> str:
138
147
  return self._model
139
148
 
149
+ @property
150
+ def needs_api_key(self) -> bool:
151
+ return self._needs_key
152
+
140
153
  async def initialize(self) -> None:
154
+ if not self._api_key:
155
+ self._needs_key = True
156
+ return
157
+ self._needs_key = False
141
158
  try:
142
159
  from openai import AsyncOpenAI
143
160
 
@@ -146,6 +163,11 @@ class OpenAIService(LLMService):
146
163
  except ImportError:
147
164
  raise ImportError("openai package not installed")
148
165
 
166
+ async def set_api_key(self, api_key: str) -> None:
167
+ """Set the API key at runtime and reinitialize."""
168
+ self._api_key = api_key
169
+ await self.initialize()
170
+
149
171
  async def cleanup(self) -> None:
150
172
  if self._client:
151
173
  await self._client.close()
@@ -173,6 +195,275 @@ class OpenAIService(LLMService):
173
195
 
174
196
  return response.choices[0].message.content or ""
175
197
 
198
+ async def chat(
199
+ self,
200
+ messages: list[dict],
201
+ *,
202
+ temperature: float = 0.7,
203
+ max_tokens: Optional[int] = None,
204
+ think: bool = True,
205
+ ) -> dict:
206
+ """
207
+ Chat completion using the OpenAI API.
208
+
209
+ Args:
210
+ messages: List of message dicts with 'role' and 'content'
211
+ temperature: Sampling temperature
212
+ max_tokens: Maximum tokens to generate
213
+ think: Unused (kept for interface consistency)
214
+
215
+ Returns:
216
+ Dict with 'content', 'thinking', 'input_tokens', 'output_tokens'
217
+ """
218
+ if not self._client:
219
+ raise RuntimeError("OpenAI client not initialized")
220
+
221
+ response = await self._client.chat.completions.create(
222
+ model=self._model,
223
+ messages=messages,
224
+ temperature=temperature,
225
+ max_tokens=max_tokens,
226
+ )
227
+
228
+ usage = response.usage
229
+ return {
230
+ "content": response.choices[0].message.content or "",
231
+ "thinking": None,
232
+ "input_tokens": usage.prompt_tokens if usage else None,
233
+ "output_tokens": usage.completion_tokens if usage else None,
234
+ }
235
+
236
+
237
+ class GeminiService(LLMService):
238
+ """Google Gemini API service."""
239
+
240
+ service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "gemini"
241
+
242
+ def __init__(
243
+ self,
244
+ api_key: Optional[str] = None,
245
+ model: Optional[str] = None,
246
+ ):
247
+ settings = get_settings()
248
+ self._api_key = api_key or settings.gemini_api_key
249
+ self._model = model or settings.tether_gemini_model
250
+ self._client = None
251
+ self._ready = False
252
+ self._needs_key = False
253
+ self._available_models: list[str] = []
254
+
255
+ @property
256
+ def model_name(self) -> str:
257
+ return self._model
258
+
259
+ @property
260
+ def needs_api_key(self) -> bool:
261
+ return self._needs_key
262
+
263
+ @property
264
+ def available_models(self) -> list[str]:
265
+ """List of available models (populated after initialize)."""
266
+ return self._available_models
267
+
268
+ async def initialize(self) -> None:
269
+ if not self._api_key:
270
+ self._needs_key = True
271
+ return
272
+ self._needs_key = False
273
+ try:
274
+ from google import genai
275
+
276
+ self._client = genai.Client(api_key=self._api_key)
277
+
278
+ # Discover available models
279
+ discovery = await discover_gemini_models(self._client)
280
+ if discovery.available:
281
+ self._available_models = discovery.models
282
+ else:
283
+ print(f"Warning: Could not discover Gemini models: {discovery.error}")
284
+ # Fall back to just the configured model
285
+ self._available_models = [self._model]
286
+
287
+ # Verify configured model is available
288
+ if self._available_models and self._model not in self._available_models:
289
+ available_str = ", ".join(self._available_models[:5])
290
+ if len(self._available_models) > 5:
291
+ available_str += f", ... ({len(self._available_models) - 5} more)"
292
+ print(
293
+ f"Warning: Model '{self._model}' not found in available models. "
294
+ f"Available: {available_str}. "
295
+ f"It may still work if you have access."
296
+ )
297
+
298
+ self._ready = True
299
+ except ImportError:
300
+ raise ImportError(
301
+ "google-genai package not installed. Install it with:\n"
302
+ " pip install google-genai\n"
303
+ "Or: uv add google-genai"
304
+ )
305
+
306
+ async def set_api_key(self, api_key: str) -> None:
307
+ """Set the API key at runtime and reinitialize."""
308
+ self._api_key = api_key
309
+ await self.initialize()
310
+
311
+ async def cleanup(self) -> None:
312
+ self._client = None
313
+ self._ready = False
314
+
315
+ def is_ready(self) -> bool:
316
+ return self._ready and self._client is not None
317
+
318
+ async def complete(
319
+ self,
320
+ prompt: str,
321
+ *,
322
+ temperature: float = 0.7,
323
+ max_tokens: Optional[int] = None,
324
+ ) -> str:
325
+ if not self._client:
326
+ raise RuntimeError("Gemini client not initialized")
327
+
328
+ from google.genai import types
329
+
330
+ config = types.GenerateContentConfig(
331
+ temperature=temperature,
332
+ max_output_tokens=max_tokens,
333
+ )
334
+
335
+ response = await self._client.aio.models.generate_content(
336
+ model=self._model,
337
+ contents=prompt,
338
+ config=config,
339
+ )
340
+
341
+ return response.text or ""
342
+
343
+ async def chat(
344
+ self,
345
+ messages: list[dict],
346
+ *,
347
+ temperature: float = 0.7,
348
+ max_tokens: Optional[int] = None,
349
+ think: bool = True,
350
+ ) -> dict:
351
+ """
352
+ Chat completion using the Gemini API.
353
+
354
+ Args:
355
+ messages: List of message dicts with 'role' and 'content'
356
+ temperature: Sampling temperature
357
+ max_tokens: Maximum tokens to generate
358
+ think: Enable thinking for supported models (default: True)
359
+
360
+ Returns:
361
+ Dict with 'content' and optionally 'thinking' keys
362
+ """
363
+ if not self._client:
364
+ raise RuntimeError("Gemini client not initialized")
365
+
366
+ from google.genai import types
367
+
368
+ # Extract system instruction from messages
369
+ system_instruction = None
370
+ chat_messages = []
371
+ for msg in messages:
372
+ if msg["role"] == "system":
373
+ system_instruction = msg["content"]
374
+ else:
375
+ chat_messages.append(msg)
376
+
377
+ # Build history (all messages except the last one)
378
+ history = []
379
+ for msg in chat_messages[:-1]:
380
+ role = "model" if msg["role"] == "assistant" else msg["role"]
381
+ history.append(
382
+ types.Content(
383
+ role=role,
384
+ parts=[types.Part.from_text(text=msg["content"])],
385
+ )
386
+ )
387
+
388
+ # Build config
389
+ config = types.GenerateContentConfig(
390
+ temperature=temperature,
391
+ max_output_tokens=max_tokens,
392
+ system_instruction=system_instruction,
393
+ )
394
+
395
+ # Enable thinking for 2.5 models
396
+ is_thinking_model = "2.5" in self._model
397
+ if think and is_thinking_model:
398
+ config.thinking_config = types.ThinkingConfig(
399
+ thinking_budget=8192,
400
+ )
401
+
402
+ # Create chat and send current message
403
+ chat_session = self._client.aio.chats.create(
404
+ model=self._model,
405
+ history=history,
406
+ config=config,
407
+ )
408
+
409
+ current_message = chat_messages[-1]["content"] if chat_messages else ""
410
+ response = await chat_session.send_message(current_message)
411
+
412
+ # Parse response parts for thinking content
413
+ thinking_text = None
414
+ content_text = ""
415
+
416
+ if response.candidates and response.candidates[0].content:
417
+ for part in response.candidates[0].content.parts:
418
+ if hasattr(part, "thought") and part.thought:
419
+ thinking_text = (thinking_text or "") + (part.text or "")
420
+ else:
421
+ content_text += part.text or ""
422
+ else:
423
+ content_text = response.text or ""
424
+
425
+ usage = response.usage_metadata
426
+ return {
427
+ "content": content_text,
428
+ "thinking": thinking_text,
429
+ "input_tokens": usage.prompt_token_count if usage else None,
430
+ "output_tokens": usage.candidates_token_count if usage else None,
431
+ }
432
+
433
+
434
+ @dataclass
435
+ class GeminiDiscoveryResult:
436
+ """Result of Gemini model discovery."""
437
+
438
+ available: bool
439
+ models: list[str]
440
+ error: Optional[str] = None
441
+
442
+
443
+ async def discover_gemini_models(client) -> GeminiDiscoveryResult:
444
+ """Discover available Gemini models from the API."""
445
+ try:
446
+ loop = asyncio.get_event_loop()
447
+ response = await loop.run_in_executor(None, client.models.list)
448
+ models = []
449
+ for model in response:
450
+ # Only include models that support generateContent
451
+ actions = getattr(model, "supported_actions", None)
452
+ if actions and "generateContent" in actions:
453
+ name = model.name or ""
454
+ # Strip "models/" prefix
455
+ short_name = name.removeprefix("models/")
456
+ if short_name:
457
+ models.append(short_name)
458
+ models.sort()
459
+ return GeminiDiscoveryResult(available=True, models=models)
460
+ except Exception as e:
461
+ return GeminiDiscoveryResult(
462
+ available=False,
463
+ models=[],
464
+ error=f"Failed to list Gemini models: {str(e)}",
465
+ )
466
+
176
467
 
177
468
  async def discover_ollama(base_url: Optional[str] = None) -> OllamaDiscoveryResult:
178
469
  """Discover Ollama instance and available models."""
@@ -212,7 +503,7 @@ async def discover_ollama(base_url: Optional[str] = None) -> OllamaDiscoveryResu
212
503
  class OllamaService(LLMService):
213
504
  """Ollama LLM service."""
214
505
 
215
- service_type: Literal["local", "ollama", "openai", "mock"] = "ollama"
506
+ service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "ollama"
216
507
 
217
508
  def __init__(
218
509
  self,
@@ -386,13 +677,15 @@ class OllamaService(LLMService):
386
677
  return {
387
678
  "content": message.get("content", ""),
388
679
  "thinking": message.get("thinking"), # None if not a thinking model
680
+ "input_tokens": data.get("prompt_eval_count"),
681
+ "output_tokens": data.get("eval_count"),
389
682
  }
390
683
 
391
684
 
392
685
  class LocalLLMService(LLMService):
393
686
  """Local LLM service using llama-cpp-python."""
394
687
 
395
- service_type: Literal["local", "ollama", "openai", "mock"] = "local"
688
+ service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "local"
396
689
 
397
690
  def __init__(
398
691
  self,
@@ -518,6 +811,8 @@ def get_llm_service() -> LLMService:
518
811
 
519
812
  if backend == "openai":
520
813
  return OpenAIService()
814
+ elif backend == "gemini":
815
+ return GeminiService()
521
816
  elif backend == "ollama":
522
817
  return OllamaService()
523
818
  elif backend == "local":
@@ -0,0 +1,37 @@
1
+ """Model pricing data (USD per million tokens)."""
2
+
3
+ # (input_cost_per_m, output_cost_per_m)
4
+ MODEL_PRICING: dict[str, tuple[float, float]] = {
5
+ # OpenAI
6
+ "gpt-4o": (2.50, 10.00),
7
+ "gpt-4o-mini": (0.15, 0.60),
8
+ "gpt-4.1": (2.00, 8.00),
9
+ "gpt-4.1-mini": (0.40, 1.60),
10
+ "gpt-4.1-nano": (0.10, 0.40),
11
+ "o3-mini": (1.10, 4.40),
12
+ # Gemini
13
+ "gemini-2.0-flash": (0.10, 0.40),
14
+ "gemini-2.5-flash": (0.15, 0.60),
15
+ "gemini-2.5-pro": (1.25, 10.00),
16
+ "gemini-2.0-flash-lite": (0.075, 0.30),
17
+ # Ollama / Local — free
18
+ }
19
+
20
+
21
+ def estimate_cost(
22
+ model: str,
23
+ input_tokens: int,
24
+ output_tokens: int,
25
+ ) -> float | None:
26
+ """Estimate cost in USD. Returns None if model not in pricing table."""
27
+ # Try exact match first, then prefix match
28
+ pricing = MODEL_PRICING.get(model)
29
+ if not pricing:
30
+ for key, val in MODEL_PRICING.items():
31
+ if model.startswith(key):
32
+ pricing = val
33
+ break
34
+ if not pricing:
35
+ return None
36
+ input_cost, output_cost = pricing
37
+ return (input_tokens * input_cost + output_tokens * output_cost) / 1_000_000
@@ -10,6 +10,7 @@ dependencies = [
10
10
  "pydantic-settings>=2.0.0",
11
11
  "httpx>=0.27.0",
12
12
  "openai>=1.0.0",
13
+ "google-genai>=1.0.0",
13
14
  "llama-cpp-python>=0.2.0",
14
15
  ]
15
16
 
@@ -1,10 +1,11 @@
1
1
  import { useBackendStatus } from "./hooks/useApi";
2
2
  import { Chat } from "./components/Chat";
3
3
  import { ModelStatus } from "./components/ModelStatus";
4
+ import { ApiKeyForm } from "./components/ApiKeyForm";
4
5
  import "./App.css";
5
6
 
6
7
  function App() {
7
- const { status, health, modelInfo, error, retry, changeModel } =
8
+ const { status, health, modelInfo, error, retry, changeModel, submitApiKey } =
8
9
  useBackendStatus();
9
10
 
10
11
  return (
@@ -45,6 +46,10 @@ function App() {
45
46
  </div>
46
47
  )}
47
48
 
49
+ {status === "needs-api-key" && (
50
+ <ApiKeyForm modelInfo={modelInfo} onSubmit={submitApiKey} />
51
+ )}
52
+
48
53
  {status === "error" && (
49
54
  <div className="error">
50
55
  <p>Failed to connect</p>
@@ -0,0 +1,64 @@
1
+ .api-key-form-container {
2
+ flex: 1;
3
+ display: flex;
4
+ align-items: center;
5
+ justify-content: center;
6
+ padding: 2rem;
7
+ }
8
+
9
+ .api-key-form {
10
+ display: flex;
11
+ flex-direction: column;
12
+ align-items: center;
13
+ gap: 0.75rem;
14
+ max-width: 400px;
15
+ width: 100%;
16
+ padding: 2rem;
17
+ background-color: var(--color-surface);
18
+ border: 1px solid var(--color-border);
19
+ border-radius: var(--radius);
20
+ }
21
+
22
+ .api-key-form h2 {
23
+ font-size: 1.25rem;
24
+ font-weight: 600;
25
+ margin-bottom: 0.25rem;
26
+ }
27
+
28
+ .api-key-description {
29
+ color: var(--color-text-muted);
30
+ font-size: 0.875rem;
31
+ text-align: center;
32
+ }
33
+
34
+ .api-key-help-link {
35
+ color: var(--color-primary);
36
+ font-size: 0.875rem;
37
+ text-decoration: none;
38
+ }
39
+
40
+ .api-key-help-link:hover {
41
+ text-decoration: underline;
42
+ }
43
+
44
+ .api-key-input {
45
+ width: 100%;
46
+ margin-top: 0.25rem;
47
+ }
48
+
49
+ .api-key-error {
50
+ color: var(--color-error);
51
+ font-size: 0.8125rem;
52
+ text-align: center;
53
+ }
54
+
55
+ .api-key-submit {
56
+ width: 100%;
57
+ margin-top: 0.25rem;
58
+ }
59
+
60
+ .api-key-hint {
61
+ color: var(--color-text-muted);
62
+ font-size: 0.75rem;
63
+ opacity: 0.7;
64
+ }
@@ -0,0 +1,94 @@
1
+ import { useState } from "react";
2
+ import type { ModelsResponse } from "../hooks/useApi";
3
+ import "./ApiKeyForm.css";
4
+
5
+ interface BackendInfo {
6
+ name: string;
7
+ placeholder: string;
8
+ helpUrl: string;
9
+ helpText: string;
10
+ }
11
+
12
+ const BACKEND_INFO: Record<string, BackendInfo> = {
13
+ openai: {
14
+ name: "OpenAI",
15
+ placeholder: "sk-...",
16
+ helpUrl: "https://platform.openai.com/api-keys",
17
+ helpText: "Get an API key",
18
+ },
19
+ gemini: {
20
+ name: "Google Gemini",
21
+ placeholder: "AI...",
22
+ helpUrl: "https://aistudio.google.com/apikey",
23
+ helpText: "Get an API key",
24
+ },
25
+ };
26
+
27
+ interface ApiKeyFormProps {
28
+ modelInfo: ModelsResponse | null;
29
+ onSubmit: (apiKey: string) => Promise<void>;
30
+ }
31
+
32
+ export function ApiKeyForm({ modelInfo, onSubmit }: ApiKeyFormProps) {
33
+ const [apiKey, setApiKey] = useState("");
34
+ const [isSubmitting, setIsSubmitting] = useState(false);
35
+ const [error, setError] = useState<string | null>(null);
36
+
37
+ const backend = modelInfo?.backend || "openai";
38
+ const info = BACKEND_INFO[backend] || BACKEND_INFO.openai;
39
+
40
+ const handleSubmit = async (e: React.FormEvent) => {
41
+ e.preventDefault();
42
+ if (!apiKey.trim() || isSubmitting) return;
43
+
44
+ setIsSubmitting(true);
45
+ setError(null);
46
+
47
+ try {
48
+ await onSubmit(apiKey.trim());
49
+ } catch (err) {
50
+ setError(err instanceof Error ? err.message : "Failed to set API key");
51
+ } finally {
52
+ setIsSubmitting(false);
53
+ }
54
+ };
55
+
56
+ return (
57
+ <div className="api-key-form-container">
58
+ <form className="api-key-form" onSubmit={handleSubmit}>
59
+ <h2>API Key Required</h2>
60
+ <p className="api-key-description">
61
+ Enter your {info.name} API key to get started.
62
+ </p>
63
+ <a
64
+ className="api-key-help-link"
65
+ href={info.helpUrl}
66
+ target="_blank"
67
+ rel="noopener noreferrer"
68
+ >
69
+ {info.helpText}
70
+ </a>
71
+ <input
72
+ type="password"
73
+ className="api-key-input"
74
+ value={apiKey}
75
+ onChange={(e) => setApiKey(e.target.value)}
76
+ placeholder={info.placeholder}
77
+ autoFocus
78
+ disabled={isSubmitting}
79
+ />
80
+ {error && <p className="api-key-error">{error}</p>}
81
+ <button
82
+ type="submit"
83
+ className="api-key-submit"
84
+ disabled={!apiKey.trim() || isSubmitting}
85
+ >
86
+ {isSubmitting ? "Connecting..." : "Connect"}
87
+ </button>
88
+ <p className="api-key-hint">
89
+ Stored in memory only — not saved to disk.
90
+ </p>
91
+ </form>
92
+ </div>
93
+ );
94
+ }
@@ -185,6 +185,18 @@
185
185
  margin-bottom: 0;
186
186
  }
187
187
 
188
+ /* Token usage footer */
189
+ .message-usage {
190
+ display: flex;
191
+ gap: 0.75rem;
192
+ margin-top: 0.5rem;
193
+ padding-top: 0.375rem;
194
+ border-top: 1px solid var(--color-border);
195
+ font-size: 0.6875rem;
196
+ color: var(--color-text-muted);
197
+ opacity: 0.7;
198
+ }
199
+
188
200
  /* Message images */
189
201
  .message-images {
190
202
  display: flex;
@@ -57,6 +57,24 @@ export function ChatMessage({ message }: ChatMessageProps) {
57
57
  <div className="message-content">
58
58
  {isUser ? message.content : <Markdown>{message.content}</Markdown>}
59
59
  </div>
60
+ {!isUser && (message.input_tokens || message.output_tokens) && (
61
+ <div className="message-usage">
62
+ {message.input_tokens != null && (
63
+ <span>{message.input_tokens} in</span>
64
+ )}
65
+ {message.output_tokens != null && (
66
+ <span>{message.output_tokens} out</span>
67
+ )}
68
+ {message.cost != null && (
69
+ <span>
70
+ $
71
+ {message.cost < 0.01
72
+ ? message.cost.toFixed(4)
73
+ : message.cost.toFixed(2)}
74
+ </span>
75
+ )}
76
+ </div>
77
+ )}
60
78
  </div>
61
79
  );
62
80
  }
@@ -28,6 +28,8 @@ export function ModelStatus({
28
28
  return "var(--color-success)";
29
29
  case "connecting":
30
30
  return "var(--color-warning)";
31
+ case "needs-api-key":
32
+ return "var(--color-warning)";
31
33
  case "error":
32
34
  case "disconnected":
33
35
  return "var(--color-error)";
@@ -41,6 +43,8 @@ export function ModelStatus({
41
43
  return health?.model_loaded ? "Ready" : "Connected";
42
44
  case "connecting":
43
45
  return "Connecting...";
46
+ case "needs-api-key":
47
+ return "API Key Required";
44
48
  case "error":
45
49
  return "Error";
46
50
  case "disconnected":
@@ -7,6 +7,9 @@ export interface ChatMessage {
7
7
  content: string;
8
8
  images?: string[];
9
9
  thinking?: string;
10
+ input_tokens?: number;
11
+ output_tokens?: number;
12
+ cost?: number;
10
13
  timestamp?: number;
11
14
  }
12
15
 
@@ -23,7 +26,9 @@ export interface ChatRequest {
23
26
  export interface ChatResponse {
24
27
  response: string;
25
28
  thinking?: string;
26
- tokens_used?: number;
29
+ input_tokens?: number;
30
+ output_tokens?: number;
31
+ cost?: number;
27
32
  model?: string;
28
33
  finish_reason?: "stop" | "length" | "error";
29
34
  }
@@ -40,6 +45,12 @@ export interface ModelsResponse {
40
45
  models: string[];
41
46
  backend: string;
42
47
  error: string | null;
48
+ needs_api_key?: boolean;
49
+ }
50
+
51
+ export interface SetApiKeyResponse {
52
+ success: boolean;
53
+ message: string;
43
54
  }
44
55
 
45
56
  export interface SwitchModelResponse {
@@ -54,6 +65,7 @@ export type ConnectionStatus =
54
65
  | "loading-model"
55
66
  | "connected"
56
67
  | "disconnected"
68
+ | "needs-api-key"
57
69
  | "error";
58
70
 
59
71
  // Configuration
@@ -121,6 +133,13 @@ async function switchModel(model: string): Promise<SwitchModelResponse> {
121
133
  });
122
134
  }
123
135
 
136
+ async function sendApiKey(apiKey: string): Promise<SetApiKeyResponse> {
137
+ return apiFetch<SetApiKeyResponse>("/api-key", {
138
+ method: "POST",
139
+ body: JSON.stringify({ api_key: apiKey }),
140
+ });
141
+ }
142
+
124
143
  async function waitForBackend(): Promise<boolean> {
125
144
  // First, get the correct API URL from Tauri
126
145
  await getApiUrl();
@@ -177,8 +196,10 @@ export function useBackendStatus() {
177
196
  setHealth(healthData);
178
197
  setModelInfo(modelsData);
179
198
 
180
- // Check if model is loaded
181
- if (healthData.model_loaded) {
199
+ // Check if API key is needed
200
+ if (modelsData?.needs_api_key) {
201
+ setStatus("needs-api-key");
202
+ } else if (healthData.model_loaded) {
182
203
  setStatus("connected");
183
204
  } else if (modelsData?.error) {
184
205
  setStatus("error");
@@ -266,7 +287,22 @@ export function useBackendStatus() {
266
287
  }
267
288
  }, []);
268
289
 
269
- return { status, health, modelInfo, error, retry, changeModel };
290
+ const submitApiKey = useCallback(async (apiKey: string) => {
291
+ const result = await sendApiKey(apiKey);
292
+ if (!result.success) {
293
+ throw new Error(result.message);
294
+ }
295
+ // Refresh health and models after successful key submission
296
+ const [healthData, modelsData] = await Promise.all([
297
+ checkHealth(),
298
+ fetchModels().catch(() => null),
299
+ ]);
300
+ setHealth(healthData);
301
+ setModelInfo(modelsData);
302
+ setStatus("connected");
303
+ }, []);
304
+
305
+ return { status, health, modelInfo, error, retry, changeModel, submitApiKey };
270
306
  }
271
307
 
272
308
  export function useChat() {
@@ -302,6 +338,9 @@ export function useChat() {
302
338
  role: "assistant",
303
339
  content: response.response,
304
340
  thinking: response.thinking,
341
+ input_tokens: response.input_tokens,
342
+ output_tokens: response.output_tokens,
343
+ cost: response.cost,
305
344
  timestamp: Date.now(),
306
345
  };
307
346