npm - openvoiceui - Versions diffs - 1.0.0 - Mend

openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (185) hide show

package/.env.example +104 -0
package/Dockerfile +30 -0
package/LICENSE +21 -0
package/README.md +638 -0
package/SETUP.md +360 -0
package/app.py +232 -0
package/auto-approve-devices.js +111 -0
package/cli/index.js +372 -0
package/config/__init__.py +4 -0
package/config/default.yaml +43 -0
package/config/flags.yaml +67 -0
package/config/loader.py +203 -0
package/config/providers.yaml +71 -0
package/config/speech_normalization.yaml +182 -0
package/config/theme.json +4 -0
package/data/greetings.json +25 -0
package/default-pages/ai-image-creator.html +915 -0
package/default-pages/bulk-image-uploader.html +492 -0
package/default-pages/desktop.html +2865 -0
package/default-pages/file-explorer.html +854 -0
package/default-pages/interactive-map.html +655 -0
package/default-pages/style-guide.html +1005 -0
package/default-pages/website-setup.html +1623 -0
package/deploy/openclaw/Dockerfile +46 -0
package/deploy/openvoiceui.service +30 -0
package/deploy/setup-nginx.sh +50 -0
package/deploy/setup-sudo.sh +306 -0
package/deploy/skill-runner/Dockerfile +19 -0
package/deploy/skill-runner/requirements.txt +14 -0
package/deploy/skill-runner/server.py +269 -0
package/deploy/supertonic/Dockerfile +22 -0
package/deploy/supertonic/server.py +79 -0
package/docker-compose.pinokio.yml +11 -0
package/docker-compose.yml +59 -0
package/greetings.json +25 -0
package/index.html +65 -0
package/inject-device-identity.js +142 -0
package/package.json +82 -0
package/profiles/default.json +114 -0
package/profiles/manager.py +354 -0
package/profiles/schema.json +337 -0
package/prompts/voice-system-prompt.md +149 -0
package/providers/__init__.py +39 -0
package/providers/base.py +63 -0
package/providers/llm/__init__.py +12 -0
package/providers/llm/base.py +71 -0
package/providers/llm/clawdbot_provider.py +112 -0
package/providers/llm/zai_provider.py +115 -0
package/providers/registry.py +320 -0
package/providers/stt/__init__.py +12 -0
package/providers/stt/base.py +58 -0
package/providers/stt/webspeech_provider.py +49 -0
package/providers/stt/whisper_provider.py +100 -0
package/providers/tts/__init__.py +20 -0
package/providers/tts/base.py +91 -0
package/providers/tts/groq_provider.py +74 -0
package/providers/tts/supertonic_provider.py +72 -0
package/requirements.txt +38 -0
package/routes/__init__.py +10 -0
package/routes/admin.py +515 -0
package/routes/canvas.py +1315 -0
package/routes/chat.py +51 -0
package/routes/conversation.py +2158 -0
package/routes/elevenlabs_hybrid.py +306 -0
package/routes/greetings.py +98 -0
package/routes/icons.py +279 -0
package/routes/image_gen.py +364 -0
package/routes/instructions.py +190 -0
package/routes/music.py +838 -0
package/routes/onboarding.py +43 -0
package/routes/pi.py +62 -0
package/routes/profiles.py +215 -0
package/routes/report_issue.py +68 -0
package/routes/static_files.py +533 -0
package/routes/suno.py +664 -0
package/routes/theme.py +81 -0
package/routes/transcripts.py +199 -0
package/routes/vision.py +348 -0
package/routes/workspace.py +288 -0
package/server.py +1510 -0
package/services/__init__.py +1 -0
package/services/auth.py +143 -0
package/services/canvas_versioning.py +239 -0
package/services/db_pool.py +107 -0
package/services/gateway.py +16 -0
package/services/gateway_manager.py +333 -0
package/services/gateways/__init__.py +12 -0
package/services/gateways/base.py +110 -0
package/services/gateways/compat.py +264 -0
package/services/gateways/openclaw.py +1134 -0
package/services/health.py +100 -0
package/services/memory_client.py +455 -0
package/services/paths.py +26 -0
package/services/speech_normalizer.py +285 -0
package/services/tts.py +270 -0
package/setup-config.js +262 -0
package/sounds/air_horn.mp3 +0 -0
package/sounds/bruh.mp3 +0 -0
package/sounds/crowd_cheer.mp3 +0 -0
package/sounds/gunshot.mp3 +0 -0
package/sounds/impact.mp3 +0 -0
package/sounds/lets_go.mp3 +0 -0
package/sounds/record_stop.mp3 +0 -0
package/sounds/rewind.mp3 +0 -0
package/sounds/sad_trombone.mp3 +0 -0
package/sounds/scratch_long.mp3 +0 -0
package/sounds/yeah.mp3 +0 -0
package/src/adapters/ClawdBotAdapter.js +264 -0
package/src/adapters/_template.js +133 -0
package/src/adapters/elevenlabs-classic.js +841 -0
package/src/adapters/elevenlabs-hybrid.js +812 -0
package/src/adapters/hume-evi.js +676 -0
package/src/admin.html +1339 -0
package/src/app.js +8802 -0
package/src/core/Config.js +173 -0
package/src/core/EmotionEngine.js +307 -0
package/src/core/EventBridge.js +180 -0
package/src/core/EventBus.js +117 -0
package/src/core/VoiceSession.js +607 -0
package/src/face/BaseFace.js +259 -0
package/src/face/EyeFace.js +208 -0
package/src/face/HaloSmokeFace.js +509 -0
package/src/face/manifest.json +27 -0
package/src/face/previews/eyes.svg +16 -0
package/src/face/previews/orb.svg +29 -0
package/src/features/MusicPlayer.js +620 -0
package/src/features/Soundboard.js +128 -0
package/src/providers/DeepgramSTT.js +472 -0
package/src/providers/DeepgramStreamingSTT.js +766 -0
package/src/providers/GroqSTT.js +559 -0
package/src/providers/TTSPlayer.js +323 -0
package/src/providers/WebSpeechSTT.js +479 -0
package/src/providers/tts/BaseTTSProvider.js +81 -0
package/src/providers/tts/HumeProvider.js +77 -0
package/src/providers/tts/SupertonicProvider.js +174 -0
package/src/providers/tts/index.js +140 -0
package/src/shell/adapter-registry.js +154 -0
package/src/shell/caller-bridge.js +35 -0
package/src/shell/camera-bridge.js +28 -0
package/src/shell/canvas-bridge.js +32 -0
package/src/shell/commercial-bridge.js +44 -0
package/src/shell/face-bridge.js +44 -0
package/src/shell/music-bridge.js +60 -0
package/src/shell/orchestrator.js +233 -0
package/src/shell/profile-discovery.js +303 -0
package/src/shell/sounds-bridge.js +28 -0
package/src/shell/transcript-bridge.js +61 -0
package/src/shell/waveform-bridge.js +33 -0
package/src/styles/base.css +2862 -0
package/src/styles/face.css +417 -0
package/src/styles/pi-overrides.css +89 -0
package/src/styles/theme-dark.css +67 -0
package/src/test-tts.html +175 -0
package/src/ui/AppShell.js +544 -0
package/src/ui/ProfileSwitcher.js +228 -0
package/src/ui/SessionControl.js +240 -0
package/src/ui/face/FacePicker.js +195 -0
package/src/ui/face/FaceRenderer.js +309 -0
package/src/ui/settings/PlaylistEditor.js +366 -0
package/src/ui/settings/SettingsPanel.css +684 -0
package/src/ui/settings/SettingsPanel.js +419 -0
package/src/ui/settings/TTSVoicePreview.js +210 -0
package/src/ui/themes/ThemeManager.js +213 -0
package/src/ui/visualizers/BaseVisualizer.js +29 -0
package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
package/static/emulators/jsdos/js-dos.css +1 -0
package/static/emulators/jsdos/js-dos.js +22 -0
package/static/favicon.svg +55 -0
package/static/icons/apple-touch-icon.png +0 -0
package/static/icons/favicon-32.png +0 -0
package/static/icons/icon-192.png +0 -0
package/static/icons/icon-512.png +0 -0
package/static/install.html +449 -0
package/static/manifest.json +26 -0
package/static/sw.js +21 -0
package/tts_providers/__init__.py +136 -0
package/tts_providers/base_provider.py +319 -0
package/tts_providers/groq_provider.py +155 -0
package/tts_providers/hume_provider.py +226 -0
package/tts_providers/providers_config.json +119 -0
package/tts_providers/qwen3_provider.py +371 -0
package/tts_providers/resemble_provider.py +315 -0
package/tts_providers/supertonic_provider.py +557 -0
package/tts_providers/supertonic_tts.py +399 -0

package/profiles/schema.json ADDED Viewed

@@ -0,0 +1,337 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "openvoiceui/profile-schema",
+  "title": "Agent Profile",
+  "description": "Schema for agent personality and configuration profiles. ADR-002. v3.2 — full capability model: gateway queue, STT/TTS advanced, conversation flow, modes, session strategy, auth.",
+  "type": "object",
+  "required": ["id", "name", "system_prompt"],
+  "additionalProperties": true,
+  "properties": {
+    "id": {
+      "type": "string",
+      "pattern": "^[a-z0-9-]+$",
+      "description": "Unique identifier (lowercase, hyphens only)"
+    },
+    "name":        { "type": "string", "maxLength": 50 },
+    "description": { "type": "string", "maxLength": 200 },
+    "version":     { "type": "string", "pattern": "^\\d+\\.\\d+$", "default": "1.0" },
+    "author":      { "type": "string" },
+    "created":     { "type": "string", "format": "date" },
+    "icon":        { "type": "string", "description": "Emoji icon for UI display" },
+    "mode":        { "type": "string", "description": "Legacy special mode flag. Use adapter instead." },
+    "adapter": {
+      "type": "string",
+      "pattern": "^[a-z0-9-]+$",
+      "description": "Transport adapter: clawdbot | hume-evi | elevenlabs-classic. Defaults to clawdbot."
+    },
+    "adapter_config": {
+      "type": "object",
+      "additionalProperties": true,
+      "description": "Passed verbatim to adapter.init(). sessionKey + agentId are the clawdbot-specific keys."
+    },
+    "system_prompt": {
+      "type": "string",
+      "minLength": 10,
+      "description": "Agent personality / instructions."
+    },
+    "llm": {
+      "type": "object",
+      "required": ["provider"],
+      "additionalProperties": false,
+      "properties": {
+        "provider": {
+          "type": "string",
+          "enum": ["zai", "clawdbot", "gateway", "openai", "ollama", "anthropic", "hume", "elevenlabs"]
+        },
+        "model":     { "type": "string" },
+        "config_id": { "type": "string" },
+        "parameters": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "max_tokens":  { "type": "integer", "minimum": 1 },
+            "temperature": { "type": "number",  "minimum": 0, "maximum": 2 }
+          }
+        },
+        "queue": {
+          "type": "object",
+          "additionalProperties": false,
+          "description": "OpenClaw Gateway queue + streaming behaviour. null = use openclaw.json global default.",
+          "properties": {
+            "mode": {
+              "type": ["string", "null"],
+              "enum": ["collect", "steer", "steer-backlog", null],
+              "default": null,
+              "description": "collect = full response then TTS (safe, default). steer = user can interrupt mid-tool-chain, gateway pivots immediately. steer-backlog = steer but original message preserved."
+            },
+            "block_streaming_chunk": {
+              "type": ["integer", "null"],
+              "minimum": 10,
+              "maximum": 2000,
+              "default": null,
+              "description": "Min chars in streaming delta before firing a TTS sentence. Maps to _extract_sentence(min_len) in conversation.py. null = platform default (40)."
+            },
+            "block_streaming_coalesce": {
+              "type": ["boolean", "null"],
+              "default": null,
+              "description": "Coalesce streaming tokens into larger chunks before TTS. null = platform default."
+            }
+          }
+        }
+      }
+    },
+    "voice": {
+      "type": "object",
+      "required": ["tts_provider", "voice_id"],
+      "additionalProperties": false,
+      "properties": {
+        "tts_provider": {
+          "type": "string",
+          "enum": ["supertonic", "groq", "elevenlabs", "hume", "qwen3"]
+        },
+        "voice_id":  { "type": "string" },
+        "speed":     { "type": "number", "minimum": 0.5, "maximum": 2.0, "default": 1.0 },
+        "parameters":{ "type": "object" },
+        "notes":     { "type": "string" },
+        "parallel_sentences": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "Fire TTS for all post-LLM sentences simultaneously. false = sequential. null = platform default (true). Set false if provider rate-limits parallel requests."
+        },
+        "min_sentence_chars": {
+          "type": ["integer", "null"],
+          "minimum": 5,
+          "maximum": 500,
+          "default": null,
+          "description": "Min chars before dispatching a chunk to TTS. Prevents TTS on single words. null = platform default (40)."
+        },
+        "inter_sentence_gap_ms": {
+          "type": ["integer", "null"],
+          "minimum": 0,
+          "maximum": 2000,
+          "default": null,
+          "description": "Silence between TTS audio chunks in playback. null = no gap. Future hook — not yet wired in frontend."
+        }
+      }
+    },
+    "stt": {
+      "type": "object",
+      "required": ["provider"],
+      "additionalProperties": false,
+      "properties": {
+        "provider": {
+          "type": "string",
+          "enum": ["deepgram", "deepgram-streaming", "deepgram-batch", "groq", "webspeech", "whisper", "hume", "elevenlabs"]
+        },
+        "language": { "type": "string", "default": "en-US" },
+        "notes":    { "type": "string" },
+        "silence_timeout_ms": {
+          "type": ["integer", "null"],
+          "minimum": 500,
+          "maximum": 10000,
+          "default": null,
+          "description": "Ms of silence after final result before dispatching to AI. Maps to WebSpeechSTT.silenceDelayMs. null = platform default (3000ms)."
+        },
+        "vad_threshold": {
+          "type": ["integer", "null"],
+          "minimum": 10,
+          "maximum": 80,
+          "default": null,
+          "description": "FFT average amplitude threshold for voice detection. Lower = more sensitive. null = platform default (35). Range 10-80."
+        },
+        "max_recording_s": {
+          "type": ["integer", "null"],
+          "minimum": 10,
+          "maximum": 120,
+          "default": null,
+          "description": "Max recording duration in seconds before auto-chunking. Prevents FFmpeg timeout on long speech. null = platform default (45)."
+        },
+        "continuous": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "Continuous listening vs one-shot per utterance. null = provider default. false = PTT-only agents."
+        },
+        "wake_words": {
+          "type": ["array", "null"],
+          "items": { "type": "string" },
+          "default": null,
+          "description": "Override WakeWordDetector phrase list. null = platform defaults. [] = disable wake-word."
+        },
+        "wake_word_required": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "Start session in passive wake-word mode. null = not required (active from start)."
+        },
+        "ptt_default": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "Default UI to PTT mode on session start. null = continuous listening (current default)."
+        },
+        "identify_on_wake": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "Run face identification when wake word fires. Result is injected as context so agent can greet by name. null/true = enabled when camera is on."
+        },
+        "require_camera_auth": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "Block wake word activation unless a registered face is recognized (min 50% confidence). Requires camera to be on. null/false = disabled."
+        }
+      }
+    },
+    "context": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "enable_fts":           { "type": "boolean", "default": true },
+        "enable_briefing":      { "type": "boolean", "default": true },
+        "enable_history":       { "type": "boolean", "default": true },
+        "max_history_messages": { "type": "integer", "minimum": 1, "maximum": 100, "default": 12 },
+        "notes":                { "type": "string" }
+      }
+    },
+    "vision": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Vision / camera AI model configuration.",
+      "properties": {
+        "model":    { "type": "string", "default": "glm-4.6v",
+                      "description": "Vision model ID for camera analysis and face recognition." },
+        "provider": { "type": "string", "default": "zai",
+                      "description": "Vision provider (zai = ZhipuAI/GLM)." }
+      }
+    },
+    "features": {
+      "type": "object",
+      "additionalProperties": true,
+      "description": "Feature toggles. Unknown features are allowed (forward-compat). Absent = off.",
+      "properties": {
+        "canvas":           { "type": "boolean", "default": true },
+        "vision":           { "type": "boolean", "default": true },
+        "music":            { "type": "boolean", "default": false },
+        "tools":            { "type": "boolean", "default": false },
+        "emotion_detection":{ "type": "boolean", "default": false },
+        "dj_soundboard":    { "type": "boolean", "default": false }
+      }
+    },
+    "ui": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "theme":            { "type": "string", "enum": ["dark", "light"], "default": "dark" },
+        "theme_preset":     { "type": "string", "enum": ["blue", "purple", "green", "red", "orange"] },
+        "face_enabled":     { "type": "boolean", "default": true },
+        "face_mood":        { "type": "string", "default": "neutral" },
+        "transcript_panel": { "type": "boolean", "default": true },
+        "thought_bubbles":  { "type": "boolean", "default": true },
+        "show_mode_badge":  { "type": "boolean", "default": false },
+        "mode_badge_text":  { "type": "string" }
+      }
+    },
+    "speech_normalization": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "strip_markdown": { "type": "boolean", "default": true },
+        "strip_urls":     { "type": "boolean", "default": true },
+        "strip_emoji":    { "type": "boolean", "default": true },
+        "max_length":     { "type": "integer", "minimum": 50, "maximum": 5000, "default": 800 },
+        "abbreviations":  { "type": "object", "additionalProperties": { "type": "string" } }
+      }
+    },
+    "conversation": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Conversation flow controls.",
+      "properties": {
+        "greeting": {
+          "type": ["string", "null"],
+          "maxLength": 500,
+          "default": null,
+          "description": "Text spoken on session start. null = random pool. '' = silent connect."
+        },
+        "auto_hangup_silence_ms": {
+          "type": ["integer", "null"],
+          "minimum": 5000,
+          "maximum": 600000,
+          "default": null,
+          "description": "Auto-hangup after N ms total silence. null = never."
+        },
+        "interruption_enabled": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "STT active during TTS playback — user can barge in. Requires llm.queue.mode=steer on gateway. null = platform default (false, STT blocked during TTS)."
+        },
+        "max_response_chars": {
+          "type": ["integer", "null"],
+          "minimum": 50,
+          "maximum": 16000,
+          "default": null,
+          "description": "Hard cap on AI response before TTS. Truncates at sentence boundary. null = no cap."
+        }
+      }
+    },
+    "modes": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Which UI mode buttons are available for this agent.",
+      "properties": {
+        "normal": { "type": "boolean", "default": true,  "description": "Standard continuous voice." },
+        "listen": { "type": "boolean", "default": false, "description": "Transcribe-only, no AI sends." },
+        "ptt":    { "type": "boolean", "default": true,  "description": "Push-to-talk UI option." },
+        "a2a":    { "type": "boolean", "default": false, "description": "Agent-to-Agent programmatic input mode." }
+      }
+    },
+    "session": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Gateway session key strategy.",
+      "properties": {
+        "key_strategy": {
+          "type": ["string", "null"],
+          "enum": ["persistent", "per-call", "per-message", null],
+          "default": null,
+          "description": "persistent = warm shared key (best for voice). per-call = new key per session.start(). per-message = fresh key per message (stateless). null = defer to adapter_config.sessionKey or platform default."
+        },
+        "key_prefix": {
+          "type": ["string", "null"],
+          "pattern": "^[a-z0-9-]+$",
+          "default": null,
+          "description": "Prefix for auto-generated keys. 'voice-dj' → 'voice-dj-1'. null = use env var or 'voice-main'."
+        }
+      }
+    },
+    "auth": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Per-profile auth override. Supplements global Clerk auth gate.",
+      "properties": {
+        "required": {
+          "type": ["boolean", "null"],
+          "default": null,
+          "description": "Override global auth. true = always require Clerk. false = public even when global on. null = inherit global."
+        },
+        "allowed_roles": {
+          "type": ["array", "null"],
+          "items": { "type": "string" },
+          "default": null,
+          "description": "Clerk roles allowed to activate this profile. null = any auth user. [] = disabled. ['admin'] = admin only."
+        }
+      }
+    }
+  }
+}

package/prompts/voice-system-prompt.md ADDED Viewed

@@ -0,0 +1,149 @@
+# OpenVoiceUI — Voice System Prompt
+# ============================================================
+# This file is injected before every user message sent to the LLM gateway.
+# It is the SINGLE SOURCE OF TRUTH for all interface capabilities.
+# Lines starting with # are comments — stripped before sending. NOT seen by the LLM.
+#
+# HOT-RELOAD: Changes here take effect on the next conversation request. No restart needed.
+# EDIT VIA ADMIN API: PUT /api/instructions/voice-system-prompt
+#
+# SCOPE: Everything in this file is OpenVoiceUI-native — independent of OpenClaw
+# workspace files, agent personality, or any external configuration.
+# An agent with completely empty workspace files can operate the full interface
+# using ONLY what is documented here.
+#
+# WORKSPACE FILES (AGENTS.md, SOUL.md, TOOLS.md, etc.) are for PERSONALIZATION ONLY:
+#   - Custom behavior on specific canvas pages ("speak like a bartender on the tavern page")
+#   - Auto-actions on specific user patterns ("when I open playlist, start playing")
+#   - Business context, user preferences, agent identity
+# ============================================================
+[OPENVOICEUI SYSTEM INSTRUCTIONS:
+VOICE AND TONE:
+You are a voice AI assistant embedded in OpenVoiceUI. Always respond in English.
+Respond in natural, conversational tone — NO markdown (no #, -, *, bullet lists, or tables).
+Be brief and direct. Use paragraphs, not lists. Never sound like a call center agent.
+BANNED OPENERS — never start a response with: "Hey there", "Great question", "Absolutely", "Of course", "Certainly", "Sure thing", "I hear you", "I understand you saying", "That's a great", or any variation. Just answer directly.
+Do NOT repeat or paraphrase what the user just said. Do NOT end every reply with a question.
+IDENTITY:
+Do NOT address anyone by name unless a [FACE RECOGNITION] tag appears in this exact message confirming their identity. Different people use this interface. Never use names from memory or prior sessions without face recognition confirmation in the current message.
+When a [FACE RECOGNITION] tag IS present, greet the person naturally by name and speak to them personally for the rest of the session.
+CRITICAL RULE — WORDS WITH EVERY TAG:
+Every response MUST contain spoken words alongside any action tags. NEVER output a bare tag alone — the user hears silence and sees nothing.
+BAD: [CANVAS:page-id]  GOOD: Here is your dashboard. [CANVAS:page-id]
+BAD: [MUSIC_PLAY]  GOOD: Playing something for you now. [MUSIC_PLAY]
+Tags are invisible to the user — they only hear your words and see your words.
+---
+CANVAS — OPEN EXISTING PAGE:
+[CANVAS:page-id] opens a canvas page in the UI overlay. Use the exact page-id from the [Canvas pages:] list provided above in this message. When opening, briefly say what the page shows (1-2 sentences).
+[CANVAS_MENU] opens the page picker so the user can browse all available pages.
+[CANVAS_URL:https://example.com] loads an external URL inside the canvas iframe (only works on sites that allow iframe embedding).
+CRITICAL: NEVER use the openclaw "canvas" tool with action:"present" — it fails with "node required". ONLY the [CANVAS:page-id] tag works to open pages.
+Repeating [CANVAS:same-page] on an already-open page forces a refresh — use this after updating a page.
+---
+CANVAS — CREATE A NEW PAGE:
+Step 1 — Write the HTML file using your write tool: path is workspace/canvas/pagename.html
+Step 2 — Open it in your response: say something like "Here it is. [CANVAS:pagename]"
+Step 3 — Verify it opened: exec("curl -s http://openvoiceui:5001/api/canvas/context") returns {"current_page": "pagename.html", "current_title": "..."}
+If current_page matches what you opened — confirm to user: "You should be seeing [page name] now."
+If current_page is still the old page — say so and resend [CANVAS:pagename].
+If current_page is null or empty — say "Opening the canvas now." and resend [CANVAS:pagename].
+---
+CANVAS — HTML RULES (mandatory for every page you create):
+NO external CDN scripts — Tailwind CDN, Bootstrap CDN, any <script src="https://..."> are BANNED. They silently break inside sandboxed iframes.
+All CSS and JS must be inline — inside <style> and <script> tags only.
+Google Fonts @import url(...) inside a <style> tag is OK (graceful fallback if it fails).
+Dark theme: background #0d1117 or #13141a, text #e2e8f0, accent blue #3b82f6 or amber #f59e0b.
+Body CSS must include: padding: 20px; color: #e2e8f0; background: #0a0a0a;
+Make pages visual — use cards, grids, tables, icons, real data from the conversation. No blank pages.
+---
+CANVAS — INTERACTIVE BUTTONS:
+Use postMessage for buttons that trigger AI actions — NEVER use href="#" (does nothing in iframe).
+Send text to AI: onclick="window.parent.postMessage({type:'canvas-action', action:'speak', text:'your message here'}, '*')"
+Open another page: onclick="window.parent.postMessage({type:'canvas-action', action:'navigate', page:'page-id'}, '*')"
+Open page picker menu: onclick="window.parent.postMessage({type:'canvas-action', action:'menu'}, '*')"
+Close canvas: onclick="window.parent.postMessage({type:'canvas-action', action:'close'}, '*')"
+External links that open new tab: <a href="https://example.com" target="_blank">Link text</a>
+---
+CANVAS — MAKE A PAGE PUBLIC (shareable without login):
+exec("curl -s -X PATCH http://openvoiceui:5001/api/canvas/manifest/page/PAGE_ID -H 'Content-Type: application/json' -d '{\"is_public\": true}'")
+Replace PAGE_ID with the page filename without .html extension.
+To make private again: use {"is_public": false}
+Shareable URL format: https://DOMAIN/pages/pagename.html
+---
+MUSIC CONTROL:
+[MUSIC_PLAY] plays a random track from the library.
+[MUSIC_PLAY:track name] plays a specific track — use the exact title from the [Available tracks:] list provided above in this message.
+[MUSIC_STOP] stops music playback.
+[MUSIC_NEXT] skips to the next track.
+Only use music tags when the user explicitly asks — EXCEPT when opening a music-related canvas page (music-list, playlist, library, etc.), also send [MUSIC_PLAY] in the same response so music starts automatically alongside the page.
+---
+SONG GENERATION (AI Music via Suno):
+[SUNO_GENERATE:description of the song] generates a new AI song. Takes approximately 45 seconds.
+Always tell the user what to expect: say something like "I will get that cooking now — should be ready in about 45 seconds!"
+The frontend handles the Suno API and shows a notification when done. Do NOT call any Suno APIs yourself.
+After generation, the new song appears in the [Available tracks:] list by its title. Use [MUSIC_PLAY:song title] to play it — do NOT use exec or shell commands to search for the file. The music system matches by title automatically.
+---
+SPOTIFY:
+[SPOTIFY:song name] or [SPOTIFY:song name|artist name] switches the player to Spotify and plays that track.
+Example: [SPOTIFY:Bohemian Rhapsody|Queen]
+Only use when the user specifically asks for a Spotify track.
+---
+SLEEP — GOODBYE AND DEACTIVATE:
+[SLEEP] puts the interface into passive wake-word listening mode.
+Use when the user says goodbye, goodnight, stop listening, go to sleep, I am out, peace, later, or any farewell phrase.
+Always give a brief farewell (1-2 sentences) BEFORE the [SLEEP] tag.
+Examples: "Later! Catch you next time. [SLEEP]" or "Goodnight! Sweet dreams. [SLEEP]"
+NEVER say you "should" go to sleep without including the [SLEEP] tag — the tag IS the action. Saying it without the tag does nothing.
+---
+SESSION RESET:
+[SESSION_RESET] clears the conversation history and starts fresh.
+Use sparingly — only when the context is clearly broken or the user explicitly asks to start over.
+---
+DJ SOUNDBOARD:
+[SOUND:name] plays a sound effect.
+ONLY use in DJ mode — triggered when the user explicitly says "be a DJ", "DJ mode", or "put on a set".
+NEVER use sound tags in normal conversation.
+Available sounds: air_horn, scratch_long, rewind, record_stop, crowd_cheer, crowd_hype, yeah, lets_go, gunshot, bruh, sad_trombone
+---
+FACE REGISTRATION:
+[REGISTER_FACE:Name] captures and saves the person's face from the camera.
+Only use when someone explicitly asks or introduces themselves — never register without consent.
+If the camera is off, tell the user they need to turn it on first.
+Example: "Nice to meet you! I will remember your face. [REGISTER_FACE:Sarah]"
+---
+CAMERA VISION:
+When a [CAMERA VISION: ...] tag appears in the context above this message, it contains a description of what the camera currently sees, analyzed by a vision model.
+Use this to answer the user's question naturally in your own words — do not repeat the raw description verbatim.
+If the vision tag says the camera is off or unavailable, tell the user they need to turn on the camera first.
+]

package/providers/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""
+Provider package — ADR-003: Abstract base class + registry pattern.
+Sub-packages:
+  providers.llm      — LLMProvider base class + concrete providers
+  providers.tts      — TTSProvider base class + concrete providers
+  providers.stt      — STTProvider base class + concrete providers
+  providers.registry — ProviderRegistry singleton (P5-T2)
+"""
+from providers.base import (
+    BaseProvider,
+    ProviderError,
+    ProviderUnavailableError,
+    ProviderGenerationError,
+)
+from providers.registry import (
+    ProviderRegistry,
+    ProviderType,
+    registry,
+    get_llm_provider,
+    get_tts_provider,
+    get_stt_provider,
+)
+__all__ = [
+    # Base classes
+    "BaseProvider",
+    "ProviderError",
+    "ProviderUnavailableError",
+    "ProviderGenerationError",
+    # Registry
+    "ProviderRegistry",
+    "ProviderType",
+    "registry",
+    "get_llm_provider",
+    "get_tts_provider",
+    "get_stt_provider",
+]

package/providers/base.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""
+Provider abstract base classes — ADR-003: Abstract base class + registry pattern.
+All provider types (LLM, TTS, STT) share this common interface contract.
+"""
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+class BaseProvider(ABC):
+    """Common base for all provider types."""
+    def __init__(self, config: Dict[str, Any] = None):
+        self._config = config or {}
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Return True if the provider can handle requests right now."""
+        pass
+    @abstractmethod
+    def get_info(self) -> Dict[str, Any]:
+        """Return metadata dict with at minimum 'name' and 'status' keys."""
+        pass
+    def get_config(self, key: str, default: Any = None) -> Any:
+        """Safely read a value from the provider config dict."""
+        return self._config.get(key, default)
+    def __repr__(self) -> str:
+        info = self.get_info()
+        return (
+            f"{self.__class__.__name__}("
+            f"name='{info.get('name', 'unknown')}', "
+            f"available={self.is_available()})"
+        )
+class ProviderError(Exception):
+    """Base exception for all provider errors."""
+    def __init__(self, provider_name: str, message: str):
+        self.provider_name = provider_name
+        super().__init__(f"[{provider_name}] {message}")
+class ProviderUnavailableError(ProviderError):
+    """Raised when a provider is not available or not configured."""
+    pass
+class ProviderGenerationError(ProviderError):
+    """Raised when a provider fails during generation/inference."""
+    pass
+__all__ = [
+    "BaseProvider",
+    "ProviderError",
+    "ProviderUnavailableError",
+    "ProviderGenerationError",
+]

package/providers/llm/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""LLM provider package.
+Importing this package registers all LLM providers with the registry.
+"""
+from providers.llm.base import LLMProvider, LLMResponse, LLMError
+# Import concrete providers so their registry.register() calls fire
+from providers.llm import zai_provider  # noqa: F401
+from providers.llm import clawdbot_provider  # noqa: F401
+__all__ = ["LLMProvider", "LLMResponse", "LLMError"]

package/providers/llm/base.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""
+LLM provider abstract base class.
+Based on: future-dev-plans/02-PROVIDER-SYSTEMS.md (llm_providers/base.py section)
+ADR-003: Abstract base class + registry pattern.
+"""
+from abc import abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Dict, Iterator, List, Optional
+from providers.base import BaseProvider, ProviderError
+@dataclass
+class LLMResponse:
+    content: str
+    model: str
+    provider: str
+    usage: Dict[str, int] = field(default_factory=dict)
+    latency_ms: float = 0.0
+    finish_reason: str = "stop"
+    raw_response: Any = None
+class LLMProvider(BaseProvider):
+    """Abstract base class for LLM providers (ZAI, Clawdbot, OpenAI, etc.)."""
+    @abstractmethod
+    def generate(
+        self,
+        messages: List[Dict[str, str]],
+        system_prompt: Optional[str] = None,
+        model: Optional[str] = None,
+        **kwargs,
+    ) -> LLMResponse:
+        """Generate a complete (non-streaming) response."""
+        pass
+    @abstractmethod
+    def generate_stream(
+        self,
+        messages: List[Dict[str, str]],
+        system_prompt: Optional[str] = None,
+        model: Optional[str] = None,
+        **kwargs,
+    ) -> Iterator[str]:
+        """Generate a streaming response, yielding text chunks."""
+        pass
+    def list_models(self) -> List[Dict[str, Any]]:
+        return self._config.get("models", [])
+    def get_default_model(self) -> str:
+        return self._config.get("default_model", "default")
+    def get_info(self) -> Dict[str, Any]:
+        return {
+            "name": self._config.get("name", self.__class__.__name__),
+            "models": self.list_models(),
+            "available": self.is_available(),
+            "status": "active" if self.is_available() else "inactive",
+        }
+class LLMError(ProviderError):
+    """LLM-specific provider error."""
+    pass
+__all__ = ["LLMProvider", "LLMResponse", "LLMError"]