openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
package/SETUP.md ADDED
@@ -0,0 +1,360 @@
1
+ # OpenVoiceUI Setup Guide
2
+
3
+ This guide covers three install paths:
4
+ - [Docker](#docker-quick-start) — easiest, works on any OS
5
+ - [VPS / Linux server](#vps-setup) — for production self-hosting
6
+ - [Local development](#local-development) — for contributors
7
+
8
+ ---
9
+
10
+ ## Prerequisites
11
+
12
+ | Requirement | Notes |
13
+ |---|---|
14
+ | **OpenClaw `2026.3.13`** | The AI gateway that powers conversations. Required. [Download here](https://openclaw.ai) |
15
+ | **Groq API key** | For Orpheus TTS (fast, high quality). Free tier available. [Get key](https://console.groq.com) |
16
+ | Python 3.10+ | For local / VPS installs |
17
+ | Docker + Compose | For Docker install only |
18
+
19
+ > **OpenClaw is the most important dependency.** Without it the server starts but cannot respond to any voice input. OpenVoiceUI is tested with **openclaw@2026.3.13** — other versions may have breaking changes. See [OpenClaw Requirements](docs/openclaw-requirements.md) for full compatibility details.
20
+
21
+ ---
22
+
23
+ ## Upgrading from Pre-2.0
24
+
25
+ If you have an existing installation, runtime data directories have moved under `runtime/`:
26
+
27
+ | Old Location | New Location |
28
+ |---|---|
29
+ | `uploads/` | `runtime/uploads/` |
30
+ | `canvas-pages/` | `runtime/canvas-pages/` |
31
+ | `known_faces/` | `runtime/known_faces/` |
32
+ | `music/` | `runtime/music/` |
33
+ | `generated_music/` | `runtime/generated_music/` |
34
+ | `faces/` | `runtime/faces/` |
35
+ | `transcripts/` | `runtime/transcripts/` |
36
+ | `usage.db` | `runtime/usage.db` |
37
+
38
+ To migrate, move your existing data into the new paths:
39
+ ```bash
40
+ mkdir -p runtime
41
+ for dir in uploads canvas-pages known_faces music generated_music faces transcripts; do
42
+ [ -d "$dir" ] && mv "$dir" "runtime/$dir"
43
+ done
44
+ [ -f usage.db ] && mv usage.db runtime/usage.db
45
+ ```
46
+
47
+ Docker users: `docker compose down`, pull the latest code, then `docker compose up --build`. Volume mounts in `docker-compose.yml` already point to `runtime/`.
48
+
49
+ ---
50
+
51
+ ## OpenClaw Setup
52
+
53
+ > **Docker users:** Skip this section — `docker compose up` installs and configures
54
+ > OpenClaw automatically with the correct version and settings.
55
+
56
+ 1. Install the tested version: `npm i -g openclaw@2026.3.13`
57
+ 2. Run the setup wizard: `openclaw onboard` (choose your LLM provider and API key)
58
+ 3. Start the gateway: `openclaw gateway` (listens on `ws://127.0.0.1:18791`)
59
+ 4. Copy your auth token — you'll need it for `CLAWDBOT_AUTH_TOKEN` in `.env`
60
+
61
+ **Using an existing OpenClaw install?** See [OpenClaw Requirements](docs/openclaw-requirements.md)
62
+ for the full list of version and configuration requirements. OpenVoiceUI needs
63
+ specific gateway settings to work — don't skip this if you already have OpenClaw
64
+ running with other agents.
65
+
66
+ ---
67
+
68
+ ## Docker Quick Start
69
+
70
+ **Fastest path. Recommended for trying OpenVoiceUI.**
71
+
72
+ ```bash
73
+ git clone https://github.com/MCERQUA/OpenVoiceUI.git
74
+ cd OpenVoiceUI
75
+ cp .env.example .env
76
+ ```
77
+
78
+ ---
79
+
80
+ ### Do you already have OpenClaw running?
81
+
82
+ #### No — start everything fresh (recommended)
83
+
84
+ The compose stack starts three containers for you:
85
+ - **openclaw** — AI gateway on port 18791
86
+ - **openvoiceui** — the UI/API server on port 5001
87
+ - **supertonic** — local TTS engine
88
+
89
+ Edit `.env` and set at minimum:
90
+ ```bash
91
+ CLAWDBOT_AUTH_TOKEN=your-openclaw-token # from openclaw gateway config
92
+ GROQ_API_KEY=your-groq-key
93
+ SECRET_KEY=any-random-string-here
94
+ ```
95
+
96
+ **Optional: enable the coding-agent skill**
97
+
98
+ The coding-agent skill lets the AI write code, create files, and run commands
99
+ autonomously. It requires a coding CLI installed in the openclaw container.
100
+ Set `CODING_CLI` in your `.env` before building — same options as openclaw's
101
+ setup wizard:
102
+
103
+ ```bash
104
+ # Choose one (or leave unset to skip):
105
+ CODING_CLI=codex # OpenAI Codex — also needs OPENAI_API_KEY
106
+ CODING_CLI=claude # Anthropic Claude Code — also needs ANTHROPIC_API_KEY
107
+ CODING_CLI=opencode # OpenCode — bring your own provider key
108
+ CODING_CLI=pi # Pi coding agent — bring your own provider key
109
+ ```
110
+
111
+ > If you already ran openclaw's interactive setup wizard, it asked you this
112
+ > question — you don't need to set it here.
113
+
114
+ ```bash
115
+ docker compose up --build
116
+ ```
117
+
118
+ #### Yes — connect to your existing OpenClaw
119
+
120
+ > **Important:** OpenVoiceUI is tested with **openclaw@2026.3.13**. If your existing
121
+ > install is a different version, voice features may not work. See
122
+ > [OpenClaw Requirements](docs/openclaw-requirements.md) for the full compatibility
123
+ > checklist.
124
+
125
+ Point openvoiceui at your running OpenClaw gateway instead of starting a new one.
126
+
127
+ 1. Make sure your existing openclaw gateway has `bind: "lan"` (not `"loopback"`) so it
128
+ accepts connections from other containers, and the required auth settings:
129
+ ```json
130
+ "gateway": {
131
+ "bind": "lan",
132
+ "auth": { "mode": "token" },
133
+ "controlUi": {
134
+ "dangerouslyDisableDeviceAuth": true,
135
+ "dangerouslyAllowHostHeaderOriginFallback": true
136
+ }
137
+ }
138
+ ```
139
+
140
+ 2. Share the canvas-pages directory between your existing openclaw container and openvoiceui
141
+ (both need to read/write the same pages). Add a bind mount to **both** containers:
142
+ ```yaml
143
+ # your existing openclaw container (add to its volumes):
144
+ - ./canvas-pages:/path/to/openclaw/workspace/canvas-pages
145
+
146
+ # openvoiceui (already in docker-compose.yml):
147
+ - ./canvas-pages:/app/runtime/canvas-pages
148
+ ```
149
+ Pre-create the canvas manifest file before starting (Docker would otherwise create it as a directory):
150
+ ```bash
151
+ mkdir -p canvas-pages
152
+ echo '{"pages":{},"categories":{},"order":[]}' > canvas-manifest.json
153
+ ```
154
+
155
+ 3. Edit `.env`:
156
+ ```bash
157
+ CLAWDBOT_GATEWAY_URL=ws://<your-openclaw-host>:<port> # e.g. ws://192.168.1.10:18791
158
+ CLAWDBOT_AUTH_TOKEN=your-openclaw-token
159
+ GROQ_API_KEY=your-groq-key
160
+ SECRET_KEY=any-random-string-here
161
+ ```
162
+
163
+ 4. Start only the openvoiceui and supertonic services (skip the built-in openclaw):
164
+ ```bash
165
+ docker compose up --build openvoiceui supertonic
166
+ ```
167
+
168
+ ---
169
+
170
+ > Leave `CANVAS_PAGES_DIR` unset for Docker — it defaults correctly to the mounted volume.
171
+
172
+ Open [http://localhost:5001](http://localhost:5001) in your browser. Allow microphone access and speak.
173
+
174
+ **To stop:**
175
+ ```bash
176
+ docker compose down
177
+ ```
178
+
179
+ **Persistent data** (canvas pages, music, uploads, transcripts) lives in Docker named volumes and survives container restarts.
180
+
181
+ ---
182
+
183
+ ## VPS Setup
184
+
185
+ For a production install on a Linux VPS with nginx + SSL.
186
+
187
+ ### 1. Clone and configure
188
+
189
+ ```bash
190
+ git clone https://github.com/MCERQUA/OpenVoiceUI.git
191
+ cd OpenVoiceUI
192
+ cp .env.example .env
193
+ nano .env # or your preferred editor
194
+ ```
195
+
196
+ Set these in `.env`:
197
+ ```bash
198
+ PORT=5001
199
+ DOMAIN=your-domain.com
200
+ SECRET_KEY=<run: python3 -c "import secrets; print(secrets.token_hex(32))">
201
+ CLAWDBOT_AUTH_TOKEN=your-openclaw-token
202
+ CLAWDBOT_GATEWAY_URL=ws://127.0.0.1:18791
203
+ GROQ_API_KEY=your-groq-key
204
+ CANVAS_PAGES_DIR=/var/www/openvoiceui/canvas-pages
205
+ ```
206
+
207
+ ### 2. Create Python virtual environment
208
+
209
+ ```bash
210
+ python3 -m venv venv
211
+ venv/bin/pip install -r requirements.txt
212
+ ```
213
+
214
+ ### 3. Test the server runs
215
+
216
+ ```bash
217
+ set -a && source .env && set +a
218
+ venv/bin/python3 server.py
219
+ ```
220
+
221
+ Open `http://your-server-ip:5001` to verify. Press Ctrl+C when done.
222
+
223
+ ### 4. Run the setup script (nginx + SSL + systemd)
224
+
225
+ Edit the top of `deploy/setup-sudo.sh` to set your domain and email, then:
226
+
227
+ ```bash
228
+ sudo bash deploy/setup-sudo.sh
229
+ ```
230
+
231
+ This creates:
232
+ - `/etc/nginx/sites-available/your-domain.com` — nginx reverse proxy config
233
+ - `/etc/systemd/system/openvoiceui.service` — systemd service
234
+ - `/var/www/openvoiceui/canvas-pages` — canvas page storage directory
235
+ - Let's Encrypt SSL certificate
236
+
237
+ ### 5. Verify
238
+
239
+ ```bash
240
+ sudo systemctl status openvoiceui
241
+ sudo journalctl -u openvoiceui -f
242
+ ```
243
+
244
+ Open `https://your-domain.com` in your browser.
245
+
246
+ ---
247
+
248
+ ## Local Development
249
+
250
+ For contributors running without Docker or a VPS.
251
+
252
+ ```bash
253
+ git clone https://github.com/MCERQUA/OpenVoiceUI.git
254
+ cd OpenVoiceUI
255
+ python3 -m venv venv
256
+ venv/bin/pip install -r requirements.txt
257
+ cp .env.example .env
258
+ # Edit .env — set CLAWDBOT_AUTH_TOKEN and GROQ_API_KEY at minimum
259
+ venv/bin/python3 server.py
260
+ ```
261
+
262
+ Open [http://localhost:5001](http://localhost:5001).
263
+
264
+ The system prompt (`prompts/voice-system-prompt.md`) hot-reloads — edit it without restarting the server.
265
+
266
+ ---
267
+
268
+ ## Configuration Reference
269
+
270
+ All configuration is via `.env`. Key variables:
271
+
272
+ | Variable | Required | Default | Description |
273
+ |---|---|---|---|
274
+ | `CLAWDBOT_AUTH_TOKEN` | **Yes** | — | OpenClaw gateway auth token |
275
+ | `CLAWDBOT_GATEWAY_URL` | No | `ws://127.0.0.1:18791` | OpenClaw WebSocket URL |
276
+ | `OPENCLAW_VERSION` | No | `2026.3.13` | Docker build arg: OpenClaw version to install |
277
+ | `GROQ_API_KEY` | Recommended | — | Groq Orpheus TTS |
278
+ | `SECRET_KEY` | Recommended | random | Flask session key |
279
+ | `PORT` | No | `5001` | Server port |
280
+ | `CANVAS_PAGES_DIR` | No | `canvas-pages/` in app dir | Where canvas HTML pages are stored |
281
+ | `GATEWAY_SESSION_KEY` | No | `voice-main-1` | Session prefix (change for multiple instances) |
282
+ | `SUPERTONIC_MODEL_PATH` | No | — | Path to local ONNX TTS model |
283
+ | `FAL_KEY` | No | — | fal.ai key for Qwen3-TTS |
284
+ | `HUME_API_KEY` | No | — | Hume EVI TTS |
285
+ | `HUME_SECRET_KEY` | No | — | Hume EVI TTS secret |
286
+ | `GEMINI_API_KEY` | No | — | Vision / screenshot analysis |
287
+ | `SUNO_API_KEY` | No | — | AI music generation |
288
+ | `CLERK_PUBLISHABLE_KEY` | No | — | Auth (leave unset for open access) |
289
+
290
+ ---
291
+
292
+ ## Useful Commands
293
+
294
+ ```bash
295
+ # VPS: view live logs
296
+ sudo journalctl -u openvoiceui -f
297
+
298
+ # VPS: restart
299
+ sudo systemctl restart openvoiceui
300
+
301
+ # VPS: status
302
+ systemctl status openvoiceui
303
+
304
+ # Docker: view logs
305
+ docker compose logs -f
306
+
307
+ # Docker: restart
308
+ docker compose restart
309
+
310
+ # Run tests
311
+ venv/bin/python -m pytest tests/
312
+ ```
313
+
314
+ ---
315
+
316
+ ## Troubleshooting
317
+
318
+ **Voice input not working**
319
+ - Allow microphone in browser (HTTPS required in production, HTTP localhost is fine for dev)
320
+ - Check browser console for WebSpeech API errors
321
+ - Chrome/Edge recommended; Firefox has limited WebSpeech support
322
+
323
+ **Agent not responding**
324
+ - Check OpenClaw is running: `ss -tlnp | grep 18791`
325
+ - Check `CLAWDBOT_AUTH_TOKEN` is set in `.env` and matches your OpenClaw token
326
+ - Check logs: `sudo journalctl -u openvoiceui -f` or `docker compose logs -f`
327
+ - Look for `### Persistent WS connected` in logs — if missing, gateway connection failed
328
+
329
+ **TTS audio not playing**
330
+ - Check `GROQ_API_KEY` is set and valid
331
+ - Try a different TTS provider in the Settings panel
332
+ - Check logs for `tts_error` events
333
+
334
+ **502 Bad Gateway (nginx)**
335
+ - Verify the server is running: `systemctl status openvoiceui`
336
+ - Verify PORT in `.env` matches nginx proxy port (default 5001)
337
+ - Check nginx error log: `sudo tail -f /var/log/nginx/error.log`
338
+
339
+ **Canvas pages not loading / black screen**
340
+ - Verify `CANVAS_PAGES_DIR` path exists and is writable by the server user
341
+ - Docker: leave `CANVAS_PAGES_DIR` unset so it uses the mounted volume
342
+ - Docker: both `openclaw` and `openvoiceui` share the `canvas-pages` named volume — if you
343
+ customised the compose file make sure both services mount it at the same paths as the
344
+ default `docker-compose.yml`
345
+ - Check logs for canvas route errors
346
+
347
+ **Permission errors on VPS**
348
+ - Canvas dir and uploads must be owned by the service user: `sudo chown -R $USER /var/www/openvoiceui`
349
+
350
+ **Separate openclaw container (not using docker-compose)**
351
+ - If you run openclaw outside of this compose stack (e.g. an existing installation), make sure
352
+ openclaw's gateway `bind` is set to `"lan"` (not `"loopback"`) so openvoiceui can reach it:
353
+ ```json
354
+ "gateway": {
355
+ "bind": "lan",
356
+ "controlUi": { "dangerouslyAllowHostHeaderOriginFallback": true }
357
+ }
358
+ ```
359
+ - Share the canvas-pages directory between the two containers via a bind mount so openclaw
360
+ can write pages that openvoiceui serves.
package/app.py ADDED
@@ -0,0 +1,232 @@
1
+ """
2
+ Flask application factory for ai-eyes2.
3
+
4
+ Usage:
5
+ from app import create_app
6
+ app, sock = create_app()
7
+
8
+ This factory pattern allows:
9
+ - Blueprint registration (Phase 2 tasks P2-T2 through P2-T8)
10
+ - Test isolation via config_override
11
+ - Clean extension initialization
12
+
13
+ ADR-009 (simple manager pattern): factory returns app + sock tuple so
14
+ server.py module-level decorators (@app.route, @sock.route) keep working.
15
+ """
16
+ import logging
17
+ import os
18
+
19
+ from flask import Flask, jsonify, redirect, request
20
+ from flask_cors import CORS
21
+ from flask_limiter import Limiter
22
+ from flask_limiter.util import get_remote_address
23
+ from flask_sock import Sock
24
+ from werkzeug.middleware.proxy_fix import ProxyFix
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Match static_files.py and nginx (100 MB) — bulk uploads need full limit
29
+ _MAX_UPLOAD_BYTES = 100 * 1024 * 1024 # 100 MB
30
+
31
+
32
+ def create_app(config_override: dict = None):
33
+ """
34
+ Create and configure the Flask application.
35
+
36
+ Args:
37
+ config_override: Optional dict of Flask config values to apply.
38
+ Primarily used in tests to inject TESTING=True etc.
39
+
40
+ Returns:
41
+ tuple: (app, sock) — configured Flask app and Flask-Sock instance.
42
+ """
43
+ app = Flask(
44
+ __name__,
45
+ # Serve static files from project root (index.html etc.) via explicit routes
46
+ static_folder=None,
47
+ )
48
+
49
+ # Core Flask config
50
+ secret_key = os.getenv('SECRET_KEY')
51
+ if not secret_key:
52
+ import secrets as _secrets
53
+ secret_key = _secrets.token_hex(32)
54
+ logger.warning(
55
+ 'No SECRET_KEY set — generated a random key for this session. '
56
+ 'Sessions will NOT persist across restarts. '
57
+ 'Set SECRET_KEY in .env for production.'
58
+ )
59
+ app.config['SECRET_KEY'] = secret_key
60
+ app.config['MAX_CONTENT_LENGTH'] = _MAX_UPLOAD_BYTES
61
+
62
+ # Apply test / caller overrides last so they take precedence
63
+ if config_override:
64
+ app.config.update(config_override)
65
+
66
+ # Trust one level of X-Forwarded-* headers (nginx / reverse proxy).
67
+ # Without this, request.remote_addr is always 127.0.0.1 behind nginx,
68
+ # breaking per-IP rate limiting (all users share one bucket).
69
+ app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
70
+
71
+ # Initialize Flask-Sock for WebSocket support
72
+ sock = Sock(app)
73
+
74
+ # Configure CORS — allow your production domain and any localhost port for dev
75
+ # Anchored regex prevents partial matches like http://localhostX.evil.com
76
+ # Add extra origins via CORS_ORIGINS env var (comma-separated, e.g. https://yourdomain.com)
77
+ _extra_origins = [o.strip() for o in os.getenv('CORS_ORIGINS', '').split(',') if o.strip()]
78
+ CORS(app, origins=[
79
+ r'^http://localhost:\d+$',
80
+ *_extra_origins,
81
+ ], supports_credentials=True)
82
+
83
+ # ── Rate limiting ─────────────────────────────────────────────────────────
84
+ # Per-IP limits protect expensive endpoints from abuse.
85
+ # Override default via RATELIMIT_DEFAULT env var (e.g. "100 per minute").
86
+ # Disable for tests: config_override={'RATELIMIT_ENABLED': False}.
87
+ limiter = Limiter(
88
+ get_remote_address,
89
+ app=app,
90
+ default_limits=[os.getenv('RATELIMIT_DEFAULT', '200 per minute')],
91
+ storage_uri='memory://',
92
+ )
93
+ app.limiter = limiter
94
+
95
+ # ── Clerk auth gate ────────────────────────────────────────────────────────
96
+ # Auth is only active when CLERK_PUBLISHABLE_KEY is set in .env.
97
+ # Without it, the app runs fully open (single-user / local mode).
98
+ _clerk_key = (os.getenv('CLERK_PUBLISHABLE_KEY') or os.getenv('VITE_CLERK_PUBLISHABLE_KEY', '')).strip()
99
+ _auth_enabled = bool(_clerk_key)
100
+
101
+ if not _auth_enabled:
102
+ logger.info('No CLERK_PUBLISHABLE_KEY set — auth disabled (local mode)')
103
+ else:
104
+ # Routes that never require authentication:
105
+ _PUBLIC_PREFIXES = (
106
+ '/src/', # static JS/CSS (needed to render the login screen)
107
+ '/sounds/',
108
+ '/music/',
109
+ '/images/', # canvas images (individual pages check their own flag)
110
+ '/uploads/', # uploaded/generated files — served from VPS filesystem (no secrets)
111
+ '/static/', # PWA icons, app icons
112
+ '/pages/', # canvas pages — served without auth (CANVAS_REQUIRE_AUTH opt-in)
113
+ '/api/canvas/', # canvas API — creation, manifest, context (no per-user auth needed)
114
+ '/api/uploads', # uploads list — files are already public at /uploads/, listing is fine
115
+ '/api/profiles', # read-only profile config — loaded before Clerk init
116
+ '/api/chat', # LLM proxy (Groq) — used by canvas pages for inline AI
117
+ '/api/tts/', # TTS provider list — loaded before Clerk init
118
+ '/api/theme', # theme config — loaded before Clerk init
119
+ '/api/music', # music track list — loaded before Clerk init
120
+ '/api/faces', # face list — loaded before Clerk init
121
+ '/api/icons/', # icon library + generated icons — static images, no secrets
122
+ )
123
+ _PUBLIC_EXACT = {
124
+ '/', # main page — hosts the Clerk login gate itself
125
+ '/pi', # Pi-optimized page — same login gate, different entry point
126
+ '/health/live',
127
+ '/health/ready',
128
+ '/api/auth/check', # Auth check endpoint — does its own token verification
129
+ '/api/suno/callback', # Suno's servers POST here from external IPs (no Clerk token)
130
+ '/sw.js', # PWA service worker — browser fetches this before auth
131
+ '/manifest.json', # PWA manifest — browser fetches this before auth
132
+ '/favicon.ico', # Browser favicon request — before auth
133
+ '/ws/clawdbot', # WebSocket — browsers can't send Clerk token in WS headers;
134
+ # handler secures itself via CLAWDBOT_AUTH_TOKEN to the gateway
135
+ '/openclaw-ui', # WebSocket upgrade for OpenClaw Control UI proxy;
136
+ # handler does its own Clerk auth via __session cookie
137
+ }
138
+
139
+ # Detect whether Clerk auth is configured at startup.
140
+ # Auth is opt-in: when no key is set, all routes are accessible (README § Authentication).
141
+ _clerk_key = (os.getenv('CLERK_PUBLISHABLE_KEY') or os.getenv('VITE_CLERK_PUBLISHABLE_KEY', '')).strip()
142
+
143
+ # Internal agent API key — allows openclaw agents to call Flask APIs
144
+ # without a Clerk JWT. Set AGENT_API_KEY in the container .env.
145
+ _agent_api_key = os.getenv('AGENT_API_KEY', '').strip()
146
+
147
+ @app.before_request
148
+ def require_auth():
149
+ """Block unauthenticated requests to all non-exempt routes.
150
+
151
+ Skipped entirely when Clerk is not configured (no CLERK_PUBLISHABLE_KEY),
152
+ matching the documented opt-in auth behaviour.
153
+ """
154
+ if not _clerk_key:
155
+ return # No Clerk configured — open access (single-user / self-hosted)
156
+
157
+ path = request.path
158
+
159
+ # Always allow health probes and static assets
160
+ if path in _PUBLIC_EXACT:
161
+ return
162
+ if any(path.startswith(p) for p in _PUBLIC_PREFIXES):
163
+ return
164
+ # Canvas pages and images have their own auth logic (public flag)
165
+ # handled inside canvas_bp — let them through here
166
+ if path.startswith('/pages/') or path.startswith('/canvas-proxy') or path.startswith('/website-dev'):
167
+ return
168
+
169
+ # Internal agent API key — openclaw agents calling Flask APIs from inside Docker network
170
+ if _agent_api_key and request.headers.get('X-Agent-Key') == _agent_api_key:
171
+ return
172
+
173
+ from services.auth import get_token_from_request, verify_clerk_token
174
+ token = get_token_from_request()
175
+ user_id = verify_clerk_token(token) if token else None
176
+
177
+ if not user_id:
178
+ # For API calls return JSON 401; for page navigations redirect to /
179
+ if path.startswith('/api/') or request.headers.get('X-Requested-With'):
180
+ return jsonify({'error': 'Unauthorized', 'code': 'auth_required'}), 401
181
+ # HTML page request — redirect to root (login gate)
182
+ return redirect('/')
183
+
184
+ # ── JSON error handler for 413 (file too large) ────────────────────────
185
+ @app.errorhandler(413)
186
+ def handle_413(e):
187
+ return jsonify({'error': 'File too large (100 MB max)'}), 413
188
+
189
+ # ── Security headers (P7-T3 security audit) ──────────────────────────────
190
+ @app.after_request
191
+ def add_security_headers(response):
192
+ """Add defensive HTTP security headers to every response."""
193
+ response.headers.setdefault('X-Content-Type-Options', 'nosniff')
194
+ response.headers.setdefault('X-Frame-Options', 'SAMEORIGIN')
195
+ response.headers.setdefault('X-XSS-Protection', '1; mode=block')
196
+ response.headers.setdefault('Referrer-Policy', 'strict-origin-when-cross-origin')
197
+ # Allow microphone and camera for voice/vision app; block geolocation
198
+ response.headers.setdefault(
199
+ 'Permissions-Policy', 'camera=(self), microphone=(self), geolocation=()'
200
+ )
201
+ response.headers.setdefault(
202
+ 'Content-Security-Policy',
203
+ "default-src 'self'; "
204
+ "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net https://*.clerk.accounts.dev https://*.jam-bot.com; "
205
+ "style-src 'self' 'unsafe-inline'; "
206
+ "img-src 'self' data: blob: https://img.clerk.com https://images.clerk.dev https://*.clerk.accounts.dev https://lh3.googleusercontent.com https://avatars.githubusercontent.com; "
207
+ "media-src 'self' blob:; "
208
+ "connect-src 'self' wss: https:; "
209
+ "frame-src 'self' https://*.clerk.accounts.dev https://*.jam-bot.com https:; "
210
+ "worker-src 'self' blob:"
211
+ )
212
+ return response
213
+
214
+ # ── CDN cache cleanup (MUST run after flask_cors) ──────────────────────
215
+ # flask_cors adds Vary:Origin + Access-Control-* to ALL responses, which
216
+ # causes Cloudflare to mark them cf-cache-status:DYNAMIC (uncacheable).
217
+ # Canvas media files don't need CORS — strip those headers so CDN caches them.
218
+ # Inserted at position 0 in after_request list so it runs LAST in LIFO order.
219
+ def _strip_cdn_blocking_headers(response):
220
+ _media_exts = ('.mp4', '.webm', '.mp3', '.wav', '.ogg', '.png', '.jpg',
221
+ '.jpeg', '.gif', '.svg', '.webp', '.pdf')
222
+ if request.path.startswith('/pages/') and any(request.path.endswith(e) for e in _media_exts):
223
+ for h in ['Vary', 'Access-Control-Allow-Origin',
224
+ 'Access-Control-Allow-Credentials',
225
+ 'Content-Security-Policy', 'X-Frame-Options',
226
+ 'Permissions-Policy', 'X-XSS-Protection',
227
+ 'Referrer-Policy']:
228
+ response.headers.pop(h, None)
229
+ return response
230
+ app.after_request_funcs.setdefault(None, []).insert(0, _strip_cdn_blocking_headers)
231
+
232
+ return app, sock