npm - create-tether-app - Versions diffs - 0.1.2 → 0.1.4 - Mend

create-tether-app 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/index.js +33 -2
package/package.json +1 -1
package/template/.env.example +5 -1
package/template/backend/app/main.py +2 -1
package/template/backend/app/routes/apikey.py +57 -0
package/template/backend/app/routes/chat.py +24 -3
package/template/backend/app/routes/models.py +39 -1
package/template/backend/app/services/llm.py +301 -6
package/template/backend/app/services/pricing.py +37 -0
package/template/backend/pyproject.toml.template +1 -0
package/template/frontend/App.tsx +6 -1
package/template/frontend/components/ApiKeyForm.css +64 -0
package/template/frontend/components/ApiKeyForm.tsx +94 -0
package/template/frontend/components/ChatMessage.css +12 -0
package/template/frontend/components/ChatMessage.tsx +18 -0
package/template/frontend/components/ModelStatus.tsx +4 -0
package/template/frontend/hooks/useApi.ts +43 -4

package/dist/index.js CHANGED Viewed

@@ -297,6 +297,16 @@ async function scaffoldProject(options) {
       )
     );
     console.log();
+  } else if (options.template === "gemini") {
+    console.log(
+      chalk.dim(
+        "  Note: Set your GEMINI_API_KEY in .env to use the Gemini API."
+      )
+    );
+    console.log(
+      chalk.dim("  Get an API key at: https://aistudio.google.com/apikey")
+    );
+    console.log();
   }
   if (options.useTailwind) {
     console.log(
@@ -363,11 +373,17 @@ async function customizeForTemplate(targetDir, options) {
     if (options.template === "ollama") {
       content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
       content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
+      content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
     } else if (options.template === "openai") {
       content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
+      content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
+    } else if (options.template === "gemini") {
+      content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
+      content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
     } else if (options.template === "custom") {
       content = content.replace(/^\s*"llama-cpp-python[^"]*",?\n/gm, "");
       content = content.replace(/^\s*"openai[^"]*",?\n/gm, "");
+      content = content.replace(/^\s*"google-genai[^"]*",?\n/gm, "");
     }
     await fs2.writeFile(pyprojectPath, content);
   }
@@ -380,6 +396,8 @@ async function customizeForTemplate(targetDir, options) {
       content = content.replace(backendRegex, '$1"ollama"');
     } else if (options.template === "openai") {
       content = content.replace(backendRegex, '$1"openai"');
+    } else if (options.template === "gemini") {
+      content = content.replace(backendRegex, '$1"gemini"');
     } else if (options.template === "custom") {
       content = content.replace(backendRegex, '$1"mock"');
     }
@@ -412,7 +430,10 @@ async function removeExampleComponents(targetDir) {
   if (await fs2.pathExists(appPath)) {
     let content = await fs2.readFile(appPath, "utf-8");
     content = content.replace(/import.*Chat.*from.*\n?/g, "");
-    content = content.replace(/<Chat\s*\/>/g, "");
+    content = content.replace(
+      /\s*\{status === "connected" && <Chat\s*\/>\}/g,
+      ""
+    );
     await fs2.writeFile(appPath, content);
   }
 }
@@ -456,6 +477,10 @@ async function promptForOptions(options) {
           name: "OpenAI API - Use GPT models via API",
           value: "openai"
         },
+        {
+          name: "Google Gemini API - Use Gemini models via API",
+          value: "gemini"
+        },
         {
           name: "Custom - Bare FastAPI setup",
           value: "custom"
@@ -503,6 +528,11 @@ var LLM_TEMPLATES = [
     description: "Use OpenAI API (requires API key)",
     details: "Uses GPT models via the OpenAI API. Requires OPENAI_API_KEY env var."
   },
+  {
+    name: "gemini",
+    description: "Use Google Gemini API (requires API key)",
+    details: "Uses Gemini models via the Google AI API. Requires GEMINI_API_KEY env var."
+  },
   {
     name: "custom",
     description: "Bare FastAPI setup, no LLM integration",
@@ -513,7 +543,7 @@ function createCli() {
   const program = new Command();
   program.name("create-tether-app").description("Create a new Tether AI/ML desktop application").version(getPackageVersion()).argument("[project-name]", "Name of the project to create").option(
     "--llm <provider>",
-    "LLM backend: ollama (default), local-llm, openai, custom"
+    "LLM backend: ollama (default), local-llm, openai, gemini, custom"
   ).option("-t, --template <template>", "Alias for --llm").option("-y, --yes", "Skip prompts and use defaults (ollama, with example)").option("--skip-prompts", "Alias for --yes").option("--skip-install", "Skip dependency installation").option("--use-npm", "Use npm instead of pnpm").option("--use-yarn", "Use yarn instead of pnpm").option("--dry-run", "Show what would be created without making changes").option("--no-example", "Skip example chat component").option("--tailwind", "Include Tailwind CSS setup").option("--no-tailwind", "Skip Tailwind CSS setup").option("-v, --verbose", "Show detailed output").option("--list-templates", "List available LLM templates").option("--check", "Check if all required dependencies are installed").addHelpText(
     "after",
     `
@@ -543,6 +573,7 @@ LLM Backends:
   ollama      Run models locally via Ollama (recommended)
   local-llm   Embed models directly with llama-cpp-python
   openai      Use OpenAI API (requires API key)
+  gemini      Use Google Gemini API (requires API key)
   custom      Bare FastAPI setup, no LLM integration
 `
   ).action(async (projectName, options) => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "create-tether-app",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "description": "CLI tool for scaffolding Tether AI/ML desktop applications",
   "type": "module",
   "bin": {

package/template/.env.example CHANGED Viewed

@@ -2,7 +2,7 @@
 TETHER_HOST=127.0.0.1
 TETHER_PORT=8000
-# LLM Backend: local, openai, or mock
+# LLM Backend: local, ollama, openai, gemini, or mock
 TETHER_LLM_BACKEND=local
 # For local LLM (llama-cpp-python)
@@ -13,6 +13,10 @@ TETHER_CONTEXT_LENGTH=4096
 OPENAI_API_KEY=sk-your-api-key
 TETHER_OPENAI_MODEL=gpt-4o-mini
+# For Google Gemini API
+GEMINI_API_KEY=your-gemini-api-key
+TETHER_GEMINI_MODEL=gemini-2.0-flash
 # Model parameters
 TETHER_DEFAULT_TEMPERATURE=0.7
 TETHER_DEFAULT_MAX_TOKENS=1024

package/template/backend/app/main.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import AsyncIterator
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from app.routes import health, chat, models
+from app.routes import health, chat, models, apikey
 from app.services.llm import get_llm_service
@@ -47,6 +47,7 @@ def create_app() -> FastAPI:
     app.include_router(health.router)
     app.include_router(chat.router)
     app.include_router(models.router)
+    app.include_router(apikey.router)
     return app

package/template/backend/app/routes/apikey.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+API key submission endpoint.
+"""
+from fastapi import APIRouter, HTTPException, Request
+from pydantic import BaseModel
+class SetApiKeyRequest(BaseModel):
+    api_key: str
+class SetApiKeyResponse(BaseModel):
+    success: bool
+    message: str
+router = APIRouter()
+@router.post("/api-key", response_model=SetApiKeyResponse)
+async def set_api_key(request: Request, body: SetApiKeyRequest) -> SetApiKeyResponse:
+    """
+    Set the API key for the current LLM service at runtime.
+    Only supported for backends that require an API key (OpenAI, Gemini).
+    The key is stored in memory only and not persisted to disk.
+    """
+    llm_service = getattr(request.app.state, "llm_service", None)
+    if not llm_service:
+        raise HTTPException(status_code=503, detail="No LLM service configured")
+    if not hasattr(llm_service, "set_api_key"):
+        raise HTTPException(
+            status_code=400,
+            detail=f"Backend '{llm_service.service_type}' does not support runtime API key configuration",
+        )
+    try:
+        await llm_service.set_api_key(body.api_key)
+    except Exception as e:
+        return SetApiKeyResponse(
+            success=False,
+            message=f"Failed to initialize with provided key: {str(e)}",
+        )
+    if llm_service.is_ready():
+        return SetApiKeyResponse(
+            success=True,
+            message="API key accepted. Service is ready.",
+        )
+    return SetApiKeyResponse(
+        success=False,
+        message="API key set but service failed to become ready.",
+    )

package/template/backend/app/routes/chat.py CHANGED Viewed

@@ -8,6 +8,8 @@ from typing import Literal, Optional
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel, Field
+from app.services.pricing import estimate_cost
 class ChatMessage(BaseModel):
     role: Literal["user", "assistant", "system"]
@@ -43,8 +45,14 @@ class ChatResponse(BaseModel):
     thinking: Optional[str] = Field(
         default=None, description="Model's reasoning/thinking content (for thinking models)"
     )
-    tokens_used: Optional[int] = Field(
-        default=None, description="Number of tokens used"
+    input_tokens: Optional[int] = Field(
+        default=None, description="Input tokens used"
+    )
+    output_tokens: Optional[int] = Field(
+        default=None, description="Output tokens generated"
+    )
+    cost: Optional[float] = Field(
+        default=None, description="Estimated cost in USD"
     )
     model: Optional[str] = Field(default=None, description="Model used")
     finish_reason: Optional[Literal["stop", "length", "error"]] = Field(
@@ -112,6 +120,9 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
         use_thinking = False if has_images else (body.think if body.think is not None else True)
         # Use chat API if available (supports thinking models), fallback to complete
+        input_tokens = None
+        output_tokens = None
         if hasattr(llm_service, "chat"):
             result = await llm_service.chat(
                 messages,
@@ -119,10 +130,12 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
                 max_tokens=body.max_tokens,
                 think=use_thinking,
             )
-            # chat() returns dict with 'content' and 'thinking'
+            # chat() returns dict with 'content', 'thinking', and token counts
             if isinstance(result, dict):
                 response = result.get("content", "")
                 thinking = result.get("thinking")
+                input_tokens = result.get("input_tokens")
+                output_tokens = result.get("output_tokens")
             else:
                 # Fallback if chat returns string
                 response, thinking = parse_thinking_content(result)
@@ -141,9 +154,17 @@ async def chat(request: Request, body: ChatRequest) -> ChatResponse:
             )
             response, thinking = parse_thinking_content(raw_response)
+        # Estimate cost if token counts are available
+        cost = None
+        if input_tokens is not None and output_tokens is not None:
+            cost = estimate_cost(llm_service.model_name, input_tokens, output_tokens)
         return ChatResponse(
             response=response,
             thinking=thinking,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            cost=cost,
             model=llm_service.model_name,
             finish_reason="stop",
         )

package/template/backend/app/routes/models.py CHANGED Viewed

@@ -5,7 +5,7 @@ Model discovery and switching endpoints.
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel, Field
-from app.services.llm import discover_ollama, get_ollama_base_url
+from app.services.llm import discover_gemini_models, discover_ollama, get_ollama_base_url
 class ModelsResponse(BaseModel):
@@ -14,6 +14,7 @@ class ModelsResponse(BaseModel):
     models: list[str]
     backend: str
     error: str | None = None
+    needs_api_key: bool = False
 class SwitchModelRequest(BaseModel):
@@ -62,12 +63,31 @@ async def list_models(request: Request) -> ModelsResponse:
             error=discovery.error,
         )
+    # For Gemini, use discovered models
+    if backend == "gemini":
+        models = getattr(llm_service, "_available_models", [])
+        if not models and llm_service.is_ready():
+            # Re-discover if models list is empty but service is ready
+            client = getattr(llm_service, "_client", None)
+            if client:
+                discovery = await discover_gemini_models(client)
+                if discovery.available:
+                    models = discovery.models
+        return ModelsResponse(
+            available=llm_service.is_ready() or llm_service.needs_api_key,
+            current_model=llm_service.model_name if llm_service.is_ready() else None,
+            models=models,
+            backend=backend,
+            needs_api_key=llm_service.needs_api_key,
+        )
     # For other backends, return the configured model
     return ModelsResponse(
         available=llm_service.is_ready(),
         current_model=llm_service.model_name,
         models=[llm_service.model_name] if llm_service.is_ready() else [],
         backend=backend,
+        needs_api_key=llm_service.needs_api_key,
     )
@@ -119,6 +139,24 @@ async def switch_model(request: Request, body: SwitchModelRequest) -> SwitchMode
             message=f"Switched from {previous_model} to {body.model}",
         )
+    # For Gemini, switch to the requested model
+    if backend == "gemini":
+        available = getattr(llm_service, "_available_models", [])
+        if body.model not in available:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Model '{body.model}' not found. Available: {', '.join(available)}",
+            )
+        llm_service._model = body.model
+        return SwitchModelResponse(
+            success=True,
+            previous_model=previous_model,
+            current_model=body.model,
+            message=f"Switched from {previous_model} to {body.model}",
+        )
     # Other backends don't support runtime switching
     raise HTTPException(
         status_code=400,

package/template/backend/app/services/llm.py CHANGED Viewed

@@ -41,7 +41,7 @@ class LLMSettings(BaseSettings):
         extra="ignore",
     )
-    tether_llm_backend: Literal["local", "ollama", "openai", "mock"] = "ollama"
+    tether_llm_backend: Literal["local", "ollama", "openai", "gemini", "mock"] = "ollama"
     tether_model_path: Optional[str] = None
     openai_api_key: Optional[str] = None
     tether_openai_model: str = "gpt-4o-mini"
@@ -49,6 +49,9 @@ class LLMSettings(BaseSettings):
     # Ollama settings - model can be empty to auto-select
     tether_ollama_model: Optional[str] = None
     tether_ollama_base_url: Optional[str] = None  # Uses OLLAMA_HOST or default
+    # Gemini settings
+    gemini_api_key: Optional[str] = None
+    tether_gemini_model: str = "gemini-2.0-flash"
 @lru_cache
@@ -59,9 +62,14 @@ def get_settings() -> LLMSettings:
 class LLMService(ABC):
     """Abstract base class for LLM services."""
-    service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
+    service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "mock"
     model_name: str = "unknown"
+    @property
+    def needs_api_key(self) -> bool:
+        """Whether the service is waiting for an API key."""
+        return False
     @abstractmethod
     async def initialize(self) -> None:
         """Initialize the service."""
@@ -92,7 +100,7 @@ class LLMService(ABC):
 class MockLLMService(LLMService):
     """Mock LLM service for testing."""
-    service_type: Literal["local", "ollama", "openai", "mock"] = "mock"
+    service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "mock"
     model_name = "mock"
     def __init__(self):
@@ -120,7 +128,7 @@ class MockLLMService(LLMService):
 class OpenAIService(LLMService):
     """OpenAI API service."""
-    service_type: Literal["local", "ollama", "openai", "mock"] = "openai"
+    service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "openai"
     def __init__(
         self,
@@ -132,12 +140,21 @@ class OpenAIService(LLMService):
         self._model = model or settings.tether_openai_model
         self._client = None
         self._ready = False
+        self._needs_key = False
     @property
     def model_name(self) -> str:
         return self._model
+    @property
+    def needs_api_key(self) -> bool:
+        return self._needs_key
     async def initialize(self) -> None:
+        if not self._api_key:
+            self._needs_key = True
+            return
+        self._needs_key = False
         try:
             from openai import AsyncOpenAI
@@ -146,6 +163,11 @@ class OpenAIService(LLMService):
         except ImportError:
             raise ImportError("openai package not installed")
+    async def set_api_key(self, api_key: str) -> None:
+        """Set the API key at runtime and reinitialize."""
+        self._api_key = api_key
+        await self.initialize()
     async def cleanup(self) -> None:
         if self._client:
             await self._client.close()
@@ -173,6 +195,275 @@ class OpenAIService(LLMService):
         return response.choices[0].message.content or ""
+    async def chat(
+        self,
+        messages: list[dict],
+        *,
+        temperature: float = 0.7,
+        max_tokens: Optional[int] = None,
+        think: bool = True,
+    ) -> dict:
+        """
+        Chat completion using the OpenAI API.
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            think: Unused (kept for interface consistency)
+        Returns:
+            Dict with 'content', 'thinking', 'input_tokens', 'output_tokens'
+        """
+        if not self._client:
+            raise RuntimeError("OpenAI client not initialized")
+        response = await self._client.chat.completions.create(
+            model=self._model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        usage = response.usage
+        return {
+            "content": response.choices[0].message.content or "",
+            "thinking": None,
+            "input_tokens": usage.prompt_tokens if usage else None,
+            "output_tokens": usage.completion_tokens if usage else None,
+        }
+class GeminiService(LLMService):
+    """Google Gemini API service."""
+    service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "gemini"
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+    ):
+        settings = get_settings()
+        self._api_key = api_key or settings.gemini_api_key
+        self._model = model or settings.tether_gemini_model
+        self._client = None
+        self._ready = False
+        self._needs_key = False
+        self._available_models: list[str] = []
+    @property
+    def model_name(self) -> str:
+        return self._model
+    @property
+    def needs_api_key(self) -> bool:
+        return self._needs_key
+    @property
+    def available_models(self) -> list[str]:
+        """List of available models (populated after initialize)."""
+        return self._available_models
+    async def initialize(self) -> None:
+        if not self._api_key:
+            self._needs_key = True
+            return
+        self._needs_key = False
+        try:
+            from google import genai
+            self._client = genai.Client(api_key=self._api_key)
+            # Discover available models
+            discovery = await discover_gemini_models(self._client)
+            if discovery.available:
+                self._available_models = discovery.models
+            else:
+                print(f"Warning: Could not discover Gemini models: {discovery.error}")
+                # Fall back to just the configured model
+                self._available_models = [self._model]
+            # Verify configured model is available
+            if self._available_models and self._model not in self._available_models:
+                available_str = ", ".join(self._available_models[:5])
+                if len(self._available_models) > 5:
+                    available_str += f", ... ({len(self._available_models) - 5} more)"
+                print(
+                    f"Warning: Model '{self._model}' not found in available models. "
+                    f"Available: {available_str}. "
+                    f"It may still work if you have access."
+                )
+            self._ready = True
+        except ImportError:
+            raise ImportError(
+                "google-genai package not installed. Install it with:\n"
+                "  pip install google-genai\n"
+                "Or: uv add google-genai"
+            )
+    async def set_api_key(self, api_key: str) -> None:
+        """Set the API key at runtime and reinitialize."""
+        self._api_key = api_key
+        await self.initialize()
+    async def cleanup(self) -> None:
+        self._client = None
+        self._ready = False
+    def is_ready(self) -> bool:
+        return self._ready and self._client is not None
+    async def complete(
+        self,
+        prompt: str,
+        *,
+        temperature: float = 0.7,
+        max_tokens: Optional[int] = None,
+    ) -> str:
+        if not self._client:
+            raise RuntimeError("Gemini client not initialized")
+        from google.genai import types
+        config = types.GenerateContentConfig(
+            temperature=temperature,
+            max_output_tokens=max_tokens,
+        )
+        response = await self._client.aio.models.generate_content(
+            model=self._model,
+            contents=prompt,
+            config=config,
+        )
+        return response.text or ""
+    async def chat(
+        self,
+        messages: list[dict],
+        *,
+        temperature: float = 0.7,
+        max_tokens: Optional[int] = None,
+        think: bool = True,
+    ) -> dict:
+        """
+        Chat completion using the Gemini API.
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            think: Enable thinking for supported models (default: True)
+        Returns:
+            Dict with 'content' and optionally 'thinking' keys
+        """
+        if not self._client:
+            raise RuntimeError("Gemini client not initialized")
+        from google.genai import types
+        # Extract system instruction from messages
+        system_instruction = None
+        chat_messages = []
+        for msg in messages:
+            if msg["role"] == "system":
+                system_instruction = msg["content"]
+            else:
+                chat_messages.append(msg)
+        # Build history (all messages except the last one)
+        history = []
+        for msg in chat_messages[:-1]:
+            role = "model" if msg["role"] == "assistant" else msg["role"]
+            history.append(
+                types.Content(
+                    role=role,
+                    parts=[types.Part.from_text(text=msg["content"])],
+                )
+            )
+        # Build config
+        config = types.GenerateContentConfig(
+            temperature=temperature,
+            max_output_tokens=max_tokens,
+            system_instruction=system_instruction,
+        )
+        # Enable thinking for 2.5 models
+        is_thinking_model = "2.5" in self._model
+        if think and is_thinking_model:
+            config.thinking_config = types.ThinkingConfig(
+                thinking_budget=8192,
+            )
+        # Create chat and send current message
+        chat_session = self._client.aio.chats.create(
+            model=self._model,
+            history=history,
+            config=config,
+        )
+        current_message = chat_messages[-1]["content"] if chat_messages else ""
+        response = await chat_session.send_message(current_message)
+        # Parse response parts for thinking content
+        thinking_text = None
+        content_text = ""
+        if response.candidates and response.candidates[0].content:
+            for part in response.candidates[0].content.parts:
+                if hasattr(part, "thought") and part.thought:
+                    thinking_text = (thinking_text or "") + (part.text or "")
+                else:
+                    content_text += part.text or ""
+        else:
+            content_text = response.text or ""
+        usage = response.usage_metadata
+        return {
+            "content": content_text,
+            "thinking": thinking_text,
+            "input_tokens": usage.prompt_token_count if usage else None,
+            "output_tokens": usage.candidates_token_count if usage else None,
+        }
+@dataclass
+class GeminiDiscoveryResult:
+    """Result of Gemini model discovery."""
+    available: bool
+    models: list[str]
+    error: Optional[str] = None
+async def discover_gemini_models(client) -> GeminiDiscoveryResult:
+    """Discover available Gemini models from the API."""
+    try:
+        loop = asyncio.get_event_loop()
+        response = await loop.run_in_executor(None, client.models.list)
+        models = []
+        for model in response:
+            # Only include models that support generateContent
+            actions = getattr(model, "supported_actions", None)
+            if actions and "generateContent" in actions:
+                name = model.name or ""
+                # Strip "models/" prefix
+                short_name = name.removeprefix("models/")
+                if short_name:
+                    models.append(short_name)
+        models.sort()
+        return GeminiDiscoveryResult(available=True, models=models)
+    except Exception as e:
+        return GeminiDiscoveryResult(
+            available=False,
+            models=[],
+            error=f"Failed to list Gemini models: {str(e)}",
+        )
 async def discover_ollama(base_url: Optional[str] = None) -> OllamaDiscoveryResult:
     """Discover Ollama instance and available models."""
@@ -212,7 +503,7 @@ async def discover_ollama(base_url: Optional[str] = None) -> OllamaDiscoveryResu
 class OllamaService(LLMService):
     """Ollama LLM service."""
-    service_type: Literal["local", "ollama", "openai", "mock"] = "ollama"
+    service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "ollama"
     def __init__(
         self,
@@ -386,13 +677,15 @@ class OllamaService(LLMService):
         return {
             "content": message.get("content", ""),
             "thinking": message.get("thinking"),  # None if not a thinking model
+            "input_tokens": data.get("prompt_eval_count"),
+            "output_tokens": data.get("eval_count"),
         }
 class LocalLLMService(LLMService):
     """Local LLM service using llama-cpp-python."""
-    service_type: Literal["local", "ollama", "openai", "mock"] = "local"
+    service_type: Literal["local", "ollama", "openai", "gemini", "mock"] = "local"
     def __init__(
         self,
@@ -518,6 +811,8 @@ def get_llm_service() -> LLMService:
     if backend == "openai":
         return OpenAIService()
+    elif backend == "gemini":
+        return GeminiService()
     elif backend == "ollama":
         return OllamaService()
     elif backend == "local":

package/template/backend/app/services/pricing.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Model pricing data (USD per million tokens)."""
+# (input_cost_per_m, output_cost_per_m)
+MODEL_PRICING: dict[str, tuple[float, float]] = {
+    # OpenAI
+    "gpt-4o": (2.50, 10.00),
+    "gpt-4o-mini": (0.15, 0.60),
+    "gpt-4.1": (2.00, 8.00),
+    "gpt-4.1-mini": (0.40, 1.60),
+    "gpt-4.1-nano": (0.10, 0.40),
+    "o3-mini": (1.10, 4.40),
+    # Gemini
+    "gemini-2.0-flash": (0.10, 0.40),
+    "gemini-2.5-flash": (0.15, 0.60),
+    "gemini-2.5-pro": (1.25, 10.00),
+    "gemini-2.0-flash-lite": (0.075, 0.30),
+    # Ollama / Local — free
+}
+def estimate_cost(
+    model: str,
+    input_tokens: int,
+    output_tokens: int,
+) -> float | None:
+    """Estimate cost in USD. Returns None if model not in pricing table."""
+    # Try exact match first, then prefix match
+    pricing = MODEL_PRICING.get(model)
+    if not pricing:
+        for key, val in MODEL_PRICING.items():
+            if model.startswith(key):
+                pricing = val
+                break
+    if not pricing:
+        return None
+    input_cost, output_cost = pricing
+    return (input_tokens * input_cost + output_tokens * output_cost) / 1_000_000

package/template/backend/pyproject.toml.template CHANGED Viewed

@@ -10,6 +10,7 @@ dependencies = [
     "pydantic-settings>=2.0.0",
     "httpx>=0.27.0",
     "openai>=1.0.0",
+    "google-genai>=1.0.0",
     "llama-cpp-python>=0.2.0",
 ]

package/template/frontend/App.tsx CHANGED Viewed

@@ -1,10 +1,11 @@
 import { useBackendStatus } from "./hooks/useApi";
 import { Chat } from "./components/Chat";
 import { ModelStatus } from "./components/ModelStatus";
+import { ApiKeyForm } from "./components/ApiKeyForm";
 import "./App.css";
 function App() {
-  const { status, health, modelInfo, error, retry, changeModel } =
+  const { status, health, modelInfo, error, retry, changeModel, submitApiKey } =
     useBackendStatus();
   return (
@@ -45,6 +46,10 @@ function App() {
           </div>
         )}
+        {status === "needs-api-key" && (
+          <ApiKeyForm modelInfo={modelInfo} onSubmit={submitApiKey} />
+        )}
         {status === "error" && (
           <div className="error">
             <p>Failed to connect</p>

package/template/frontend/components/ApiKeyForm.css ADDED Viewed

@@ -0,0 +1,64 @@
+.api-key-form-container {
+  flex: 1;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  padding: 2rem;
+}
+.api-key-form {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 0.75rem;
+  max-width: 400px;
+  width: 100%;
+  padding: 2rem;
+  background-color: var(--color-surface);
+  border: 1px solid var(--color-border);
+  border-radius: var(--radius);
+}
+.api-key-form h2 {
+  font-size: 1.25rem;
+  font-weight: 600;
+  margin-bottom: 0.25rem;
+}
+.api-key-description {
+  color: var(--color-text-muted);
+  font-size: 0.875rem;
+  text-align: center;
+}
+.api-key-help-link {
+  color: var(--color-primary);
+  font-size: 0.875rem;
+  text-decoration: none;
+}
+.api-key-help-link:hover {
+  text-decoration: underline;
+}
+.api-key-input {
+  width: 100%;
+  margin-top: 0.25rem;
+}
+.api-key-error {
+  color: var(--color-error);
+  font-size: 0.8125rem;
+  text-align: center;
+}
+.api-key-submit {
+  width: 100%;
+  margin-top: 0.25rem;
+}
+.api-key-hint {
+  color: var(--color-text-muted);
+  font-size: 0.75rem;
+  opacity: 0.7;
+}

package/template/frontend/components/ApiKeyForm.tsx ADDED Viewed

@@ -0,0 +1,94 @@
+import { useState } from "react";
+import type { ModelsResponse } from "../hooks/useApi";
+import "./ApiKeyForm.css";
+interface BackendInfo {
+  name: string;
+  placeholder: string;
+  helpUrl: string;
+  helpText: string;
+}
+const BACKEND_INFO: Record<string, BackendInfo> = {
+  openai: {
+    name: "OpenAI",
+    placeholder: "sk-...",
+    helpUrl: "https://platform.openai.com/api-keys",
+    helpText: "Get an API key",
+  },
+  gemini: {
+    name: "Google Gemini",
+    placeholder: "AI...",
+    helpUrl: "https://aistudio.google.com/apikey",
+    helpText: "Get an API key",
+  },
+};
+interface ApiKeyFormProps {
+  modelInfo: ModelsResponse | null;
+  onSubmit: (apiKey: string) => Promise<void>;
+}
+export function ApiKeyForm({ modelInfo, onSubmit }: ApiKeyFormProps) {
+  const [apiKey, setApiKey] = useState("");
+  const [isSubmitting, setIsSubmitting] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const backend = modelInfo?.backend || "openai";
+  const info = BACKEND_INFO[backend] || BACKEND_INFO.openai;
+  const handleSubmit = async (e: React.FormEvent) => {
+    e.preventDefault();
+    if (!apiKey.trim() || isSubmitting) return;
+    setIsSubmitting(true);
+    setError(null);
+    try {
+      await onSubmit(apiKey.trim());
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Failed to set API key");
+    } finally {
+      setIsSubmitting(false);
+    }
+  };
+  return (
+    <div className="api-key-form-container">
+      <form className="api-key-form" onSubmit={handleSubmit}>
+        <h2>API Key Required</h2>
+        <p className="api-key-description">
+          Enter your {info.name} API key to get started.
+        </p>
+        <a
+          className="api-key-help-link"
+          href={info.helpUrl}
+          target="_blank"
+          rel="noopener noreferrer"
+        >
+          {info.helpText}
+        </a>
+        <input
+          type="password"
+          className="api-key-input"
+          value={apiKey}
+          onChange={(e) => setApiKey(e.target.value)}
+          placeholder={info.placeholder}
+          autoFocus
+          disabled={isSubmitting}
+        />
+        {error && <p className="api-key-error">{error}</p>}
+        <button
+          type="submit"
+          className="api-key-submit"
+          disabled={!apiKey.trim() || isSubmitting}
+        >
+          {isSubmitting ? "Connecting..." : "Connect"}
+        </button>
+        <p className="api-key-hint">
+          Stored in memory only — not saved to disk.
+        </p>
+      </form>
+    </div>
+  );
+}

package/template/frontend/components/ChatMessage.css CHANGED Viewed

@@ -185,6 +185,18 @@
   margin-bottom: 0;
 }
+/* Token usage footer */
+.message-usage {
+  display: flex;
+  gap: 0.75rem;
+  margin-top: 0.5rem;
+  padding-top: 0.375rem;
+  border-top: 1px solid var(--color-border);
+  font-size: 0.6875rem;
+  color: var(--color-text-muted);
+  opacity: 0.7;
+}
 /* Message images */
 .message-images {
   display: flex;

package/template/frontend/components/ChatMessage.tsx CHANGED Viewed

@@ -57,6 +57,24 @@ export function ChatMessage({ message }: ChatMessageProps) {
       <div className="message-content">
         {isUser ? message.content : <Markdown>{message.content}</Markdown>}
       </div>
+      {!isUser && (message.input_tokens || message.output_tokens) && (
+        <div className="message-usage">
+          {message.input_tokens != null && (
+            <span>{message.input_tokens} in</span>
+          )}
+          {message.output_tokens != null && (
+            <span>{message.output_tokens} out</span>
+          )}
+          {message.cost != null && (
+            <span>
+              $
+              {message.cost < 0.01
+                ? message.cost.toFixed(4)
+                : message.cost.toFixed(2)}
+            </span>
+          )}
+        </div>
+      )}
     </div>
   );
 }

package/template/frontend/components/ModelStatus.tsx CHANGED Viewed

@@ -28,6 +28,8 @@ export function ModelStatus({
         return "var(--color-success)";
       case "connecting":
         return "var(--color-warning)";
+      case "needs-api-key":
+        return "var(--color-warning)";
       case "error":
       case "disconnected":
         return "var(--color-error)";
@@ -41,6 +43,8 @@ export function ModelStatus({
         return health?.model_loaded ? "Ready" : "Connected";
       case "connecting":
         return "Connecting...";
+      case "needs-api-key":
+        return "API Key Required";
       case "error":
         return "Error";
       case "disconnected":

package/template/frontend/hooks/useApi.ts CHANGED Viewed

@@ -7,6 +7,9 @@ export interface ChatMessage {
   content: string;
   images?: string[];
   thinking?: string;
+  input_tokens?: number;
+  output_tokens?: number;
+  cost?: number;
   timestamp?: number;
 }
@@ -23,7 +26,9 @@ export interface ChatRequest {
 export interface ChatResponse {
   response: string;
   thinking?: string;
-  tokens_used?: number;
+  input_tokens?: number;
+  output_tokens?: number;
+  cost?: number;
   model?: string;
   finish_reason?: "stop" | "length" | "error";
 }
@@ -40,6 +45,12 @@ export interface ModelsResponse {
   models: string[];
   backend: string;
   error: string | null;
+  needs_api_key?: boolean;
+}
+export interface SetApiKeyResponse {
+  success: boolean;
+  message: string;
 }
 export interface SwitchModelResponse {
@@ -54,6 +65,7 @@ export type ConnectionStatus =
   | "loading-model"
   | "connected"
   | "disconnected"
+  | "needs-api-key"
   | "error";
 // Configuration
@@ -121,6 +133,13 @@ async function switchModel(model: string): Promise<SwitchModelResponse> {
   });
 }
+async function sendApiKey(apiKey: string): Promise<SetApiKeyResponse> {
+  return apiFetch<SetApiKeyResponse>("/api-key", {
+    method: "POST",
+    body: JSON.stringify({ api_key: apiKey }),
+  });
+}
 async function waitForBackend(): Promise<boolean> {
   // First, get the correct API URL from Tauri
   await getApiUrl();
@@ -177,8 +196,10 @@ export function useBackendStatus() {
           setHealth(healthData);
           setModelInfo(modelsData);
-          // Check if model is loaded
-          if (healthData.model_loaded) {
+          // Check if API key is needed
+          if (modelsData?.needs_api_key) {
+            setStatus("needs-api-key");
+          } else if (healthData.model_loaded) {
             setStatus("connected");
           } else if (modelsData?.error) {
             setStatus("error");
@@ -266,7 +287,22 @@ export function useBackendStatus() {
     }
   }, []);
-  return { status, health, modelInfo, error, retry, changeModel };
+  const submitApiKey = useCallback(async (apiKey: string) => {
+    const result = await sendApiKey(apiKey);
+    if (!result.success) {
+      throw new Error(result.message);
+    }
+    // Refresh health and models after successful key submission
+    const [healthData, modelsData] = await Promise.all([
+      checkHealth(),
+      fetchModels().catch(() => null),
+    ]);
+    setHealth(healthData);
+    setModelInfo(modelsData);
+    setStatus("connected");
+  }, []);
+  return { status, health, modelInfo, error, retry, changeModel, submitApiKey };
 }
 export function useChat() {
@@ -302,6 +338,9 @@ export function useChat() {
           role: "assistant",
           content: response.response,
           thinking: response.thinking,
+          input_tokens: response.input_tokens,
+          output_tokens: response.output_tokens,
+          cost: response.cost,
           timestamp: Date.now(),
         };