openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
package/server.py
ADDED
|
@@ -0,0 +1,1510 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
OpenVoiceUI Server — Entry Point
|
|
4
|
+
|
|
5
|
+
Initialises the Flask application and registers all route blueprints.
|
|
6
|
+
Routes are split into focused blueprints under routes/; this file handles
|
|
7
|
+
startup wiring, session management, usage tracking, and standalone endpoints
|
|
8
|
+
that don't belong to a specific feature blueprint.
|
|
9
|
+
|
|
10
|
+
Start:
|
|
11
|
+
venv/bin/python3 server.py
|
|
12
|
+
|
|
13
|
+
See README.md for full setup instructions.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import base64
|
|
18
|
+
import faulthandler
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import queue
|
|
23
|
+
import re
|
|
24
|
+
import requests
|
|
25
|
+
import signal
|
|
26
|
+
import sqlite3
|
|
27
|
+
import subprocess
|
|
28
|
+
import tempfile
|
|
29
|
+
import threading
|
|
30
|
+
import time
|
|
31
|
+
import uuid
|
|
32
|
+
from datetime import datetime
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
import websockets
|
|
36
|
+
from dotenv import load_dotenv
|
|
37
|
+
from flask import Response, request, jsonify
|
|
38
|
+
|
|
39
|
+
faulthandler.enable() # print traceback on hard crashes (SIGSEGV etc.)
|
|
40
|
+
|
|
41
|
+
# Load environment variables before anything else
|
|
42
|
+
env_path = Path(__file__).parent / ".env"
|
|
43
|
+
load_dotenv(env_path, override=True)
|
|
44
|
+
|
|
45
|
+
logging.basicConfig(level=logging.INFO)
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
SERVER_START_TIME = time.time()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
# Faster-Whisper — lazy-loaded on first /api/stt/local request
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
_whisper_model = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_whisper_model():
|
|
59
|
+
global _whisper_model
|
|
60
|
+
if _whisper_model is None:
|
|
61
|
+
from faster_whisper import WhisperModel
|
|
62
|
+
logger.info("Loading Faster-Whisper model (first STT request)...")
|
|
63
|
+
_whisper_model = WhisperModel("tiny", device="cpu", compute_type="float32")
|
|
64
|
+
logger.info("Faster-Whisper model ready.")
|
|
65
|
+
return _whisper_model
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Flask app factory + blueprint registration
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
from app import create_app
|
|
73
|
+
app, sock = create_app()
|
|
74
|
+
|
|
75
|
+
from routes.music import music_bp
|
|
76
|
+
app.register_blueprint(music_bp)
|
|
77
|
+
|
|
78
|
+
from routes.canvas import (
|
|
79
|
+
canvas_bp,
|
|
80
|
+
canvas_context,
|
|
81
|
+
update_canvas_context,
|
|
82
|
+
extract_canvas_page_content,
|
|
83
|
+
get_canvas_context,
|
|
84
|
+
load_canvas_manifest,
|
|
85
|
+
save_canvas_manifest,
|
|
86
|
+
add_page_to_manifest,
|
|
87
|
+
sync_canvas_manifest,
|
|
88
|
+
CANVAS_MANIFEST_PATH,
|
|
89
|
+
CANVAS_PAGES_DIR,
|
|
90
|
+
CATEGORY_ICONS,
|
|
91
|
+
CATEGORY_COLORS,
|
|
92
|
+
)
|
|
93
|
+
app.register_blueprint(canvas_bp)
|
|
94
|
+
|
|
95
|
+
# Seed default pages into canvas-pages on startup (ships with the app image)
|
|
96
|
+
# Default pages are app infrastructure (e.g. desktop menu) — auth is skipped in canvas.py.
|
|
97
|
+
from services.paths import DEFAULT_PAGES_DIR
|
|
98
|
+
if DEFAULT_PAGES_DIR.is_dir():
|
|
99
|
+
CANVAS_PAGES_DIR.mkdir(parents=True, exist_ok=True)
|
|
100
|
+
import shutil
|
|
101
|
+
for src in DEFAULT_PAGES_DIR.iterdir():
|
|
102
|
+
if not src.is_file():
|
|
103
|
+
continue
|
|
104
|
+
dest = CANVAS_PAGES_DIR / src.name
|
|
105
|
+
if not dest.exists():
|
|
106
|
+
shutil.copy2(src, dest)
|
|
107
|
+
logger.info("Seeded default page: %s", src.name)
|
|
108
|
+
|
|
109
|
+
from routes.static_files import static_files_bp, DJ_SOUNDS, SOUNDS_DIR
|
|
110
|
+
app.register_blueprint(static_files_bp)
|
|
111
|
+
|
|
112
|
+
from routes.admin import admin_bp
|
|
113
|
+
app.register_blueprint(admin_bp)
|
|
114
|
+
|
|
115
|
+
from routes.theme import theme_bp
|
|
116
|
+
app.register_blueprint(theme_bp)
|
|
117
|
+
|
|
118
|
+
from routes.conversation import conversation_bp, clean_for_tts
|
|
119
|
+
app.register_blueprint(conversation_bp)
|
|
120
|
+
|
|
121
|
+
from routes.profiles import profiles_bp
|
|
122
|
+
app.register_blueprint(profiles_bp)
|
|
123
|
+
|
|
124
|
+
from routes.elevenlabs_hybrid import elevenlabs_hybrid_bp
|
|
125
|
+
app.register_blueprint(elevenlabs_hybrid_bp)
|
|
126
|
+
|
|
127
|
+
from routes.instructions import instructions_bp
|
|
128
|
+
app.register_blueprint(instructions_bp)
|
|
129
|
+
|
|
130
|
+
from routes.greetings import greetings_bp
|
|
131
|
+
app.register_blueprint(greetings_bp)
|
|
132
|
+
|
|
133
|
+
from routes.suno import suno_bp
|
|
134
|
+
app.register_blueprint(suno_bp)
|
|
135
|
+
|
|
136
|
+
from routes.vision import vision_bp
|
|
137
|
+
app.register_blueprint(vision_bp)
|
|
138
|
+
|
|
139
|
+
from routes.transcripts import transcripts_bp
|
|
140
|
+
app.register_blueprint(transcripts_bp)
|
|
141
|
+
|
|
142
|
+
from routes.pi import pi_bp
|
|
143
|
+
app.register_blueprint(pi_bp)
|
|
144
|
+
|
|
145
|
+
from routes.onboarding import onboarding_bp
|
|
146
|
+
app.register_blueprint(onboarding_bp)
|
|
147
|
+
|
|
148
|
+
from routes.image_gen import image_gen_bp
|
|
149
|
+
app.register_blueprint(image_gen_bp)
|
|
150
|
+
|
|
151
|
+
from routes.chat import chat_bp
|
|
152
|
+
app.register_blueprint(chat_bp)
|
|
153
|
+
|
|
154
|
+
from routes.workspace import workspace_bp
|
|
155
|
+
app.register_blueprint(workspace_bp)
|
|
156
|
+
|
|
157
|
+
from routes.icons import icons_bp
|
|
158
|
+
from routes.report_issue import report_issue_bp
|
|
159
|
+
app.register_blueprint(icons_bp)
|
|
160
|
+
app.register_blueprint(report_issue_bp)
|
|
161
|
+
|
|
162
|
+
# Auto-sync canvas manifest on startup so any pages written outside the API
|
|
163
|
+
# are picked up immediately without a restart.
|
|
164
|
+
try:
|
|
165
|
+
sync_canvas_manifest()
|
|
166
|
+
logger.info("Canvas manifest synced on startup.")
|
|
167
|
+
except Exception as _e:
|
|
168
|
+
logger.warning(f"Canvas manifest auto-sync failed (non-critical): {_e}")
|
|
169
|
+
|
|
170
|
+
# Start canvas page version watcher (auto-saves versions when pages change)
|
|
171
|
+
try:
|
|
172
|
+
from services.canvas_versioning import start_version_watcher
|
|
173
|
+
start_version_watcher()
|
|
174
|
+
logger.info("Canvas version watcher started.")
|
|
175
|
+
except Exception as _e:
|
|
176
|
+
logger.warning(f"Canvas version watcher failed to start (non-critical): {_e}")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# Voice session management
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
from services.paths import VOICE_SESSION_FILE as _VSF_PATH, DB_PATH, UPLOADS_DIR
|
|
184
|
+
VOICE_SESSION_FILE = Path(_VSF_PATH)
|
|
185
|
+
_consecutive_empty_responses = 0
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _save_session_counter(counter: int) -> None:
|
|
189
|
+
VOICE_SESSION_FILE.write_text(str(counter))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_voice_session_key() -> str:
|
|
193
|
+
"""Return the current voice session key, e.g. 'voice-main-6'."""
|
|
194
|
+
prefix = os.getenv("VOICE_SESSION_PREFIX", "voice-main")
|
|
195
|
+
try:
|
|
196
|
+
counter = int(VOICE_SESSION_FILE.read_text().strip())
|
|
197
|
+
except (FileNotFoundError, ValueError):
|
|
198
|
+
counter = 1
|
|
199
|
+
_save_session_counter(counter)
|
|
200
|
+
return f"{prefix}-{counter}"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def bump_voice_session() -> str:
|
|
204
|
+
"""Increment the session counter and return the new session key."""
|
|
205
|
+
global _consecutive_empty_responses
|
|
206
|
+
prefix = os.getenv("VOICE_SESSION_PREFIX", "voice-main")
|
|
207
|
+
try:
|
|
208
|
+
counter = int(VOICE_SESSION_FILE.read_text().strip())
|
|
209
|
+
except (FileNotFoundError, ValueError):
|
|
210
|
+
counter = 1
|
|
211
|
+
counter += 1
|
|
212
|
+
_save_session_counter(counter)
|
|
213
|
+
_consecutive_empty_responses = 0
|
|
214
|
+
new_key = f"{prefix}-{counter}"
|
|
215
|
+
logger.info(f"Session bumped → {new_key}")
|
|
216
|
+
return new_key
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# ---------------------------------------------------------------------------
|
|
220
|
+
# User usage tracking (SQLite)
|
|
221
|
+
# ---------------------------------------------------------------------------
|
|
222
|
+
|
|
223
|
+
MONTHLY_LIMIT = int(os.getenv("MONTHLY_USAGE_LIMIT", "20"))
|
|
224
|
+
UNLIMITED_USERS: list = [
|
|
225
|
+
u.strip() for u in os.getenv("UNLIMITED_USER_IDS", "").split(",") if u.strip()
|
|
226
|
+
]
|
|
227
|
+
from services.db_pool import SQLitePool
|
|
228
|
+
db_pool = SQLitePool(DB_PATH, pool_size=5)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def init_db() -> None:
|
|
232
|
+
conn = sqlite3.connect(DB_PATH)
|
|
233
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
234
|
+
c = conn.cursor()
|
|
235
|
+
c.execute("""
|
|
236
|
+
CREATE TABLE IF NOT EXISTS usage (
|
|
237
|
+
user_id TEXT PRIMARY KEY,
|
|
238
|
+
message_count INTEGER DEFAULT 0,
|
|
239
|
+
month TEXT,
|
|
240
|
+
updated_at TEXT
|
|
241
|
+
)
|
|
242
|
+
""")
|
|
243
|
+
c.execute("""
|
|
244
|
+
CREATE TABLE IF NOT EXISTS conversation_log (
|
|
245
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
246
|
+
session_id TEXT DEFAULT 'default',
|
|
247
|
+
role TEXT NOT NULL,
|
|
248
|
+
message TEXT NOT NULL,
|
|
249
|
+
tts_provider TEXT,
|
|
250
|
+
voice TEXT,
|
|
251
|
+
created_at TEXT
|
|
252
|
+
)
|
|
253
|
+
""")
|
|
254
|
+
c.execute("""
|
|
255
|
+
CREATE TABLE IF NOT EXISTS conversation_metrics (
|
|
256
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
257
|
+
session_id TEXT DEFAULT 'default',
|
|
258
|
+
profile TEXT,
|
|
259
|
+
model TEXT,
|
|
260
|
+
handshake_ms INTEGER,
|
|
261
|
+
llm_inference_ms INTEGER,
|
|
262
|
+
tts_generation_ms INTEGER,
|
|
263
|
+
total_ms INTEGER,
|
|
264
|
+
user_message_len INTEGER,
|
|
265
|
+
response_len INTEGER,
|
|
266
|
+
tts_text_len INTEGER,
|
|
267
|
+
tts_provider TEXT,
|
|
268
|
+
tts_success INTEGER DEFAULT 1,
|
|
269
|
+
tts_error TEXT,
|
|
270
|
+
tool_count INTEGER DEFAULT 0,
|
|
271
|
+
fallback_used INTEGER DEFAULT 0,
|
|
272
|
+
error TEXT,
|
|
273
|
+
created_at TEXT
|
|
274
|
+
)
|
|
275
|
+
""")
|
|
276
|
+
conn.commit()
|
|
277
|
+
conn.close()
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def get_current_month() -> str:
|
|
281
|
+
return datetime.now().strftime("%Y-%m")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def get_user_usage(user_id: str) -> int:
|
|
285
|
+
conn = sqlite3.connect(DB_PATH)
|
|
286
|
+
c = conn.cursor()
|
|
287
|
+
c.execute("SELECT message_count, month FROM usage WHERE user_id = ?", (user_id,))
|
|
288
|
+
row = c.fetchone()
|
|
289
|
+
conn.close()
|
|
290
|
+
if row:
|
|
291
|
+
count, month = row
|
|
292
|
+
return count if month == get_current_month() else 0
|
|
293
|
+
return 0
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def increment_usage(user_id: str) -> None:
|
|
297
|
+
conn = sqlite3.connect(DB_PATH)
|
|
298
|
+
c = conn.cursor()
|
|
299
|
+
current_month = get_current_month()
|
|
300
|
+
now = datetime.now().isoformat()
|
|
301
|
+
c.execute("SELECT month FROM usage WHERE user_id = ?", (user_id,))
|
|
302
|
+
row = c.fetchone()
|
|
303
|
+
if row:
|
|
304
|
+
if row[0] != current_month:
|
|
305
|
+
c.execute(
|
|
306
|
+
"UPDATE usage SET message_count = 1, month = ?, updated_at = ? WHERE user_id = ?",
|
|
307
|
+
(current_month, now, user_id),
|
|
308
|
+
)
|
|
309
|
+
else:
|
|
310
|
+
c.execute(
|
|
311
|
+
"UPDATE usage SET message_count = message_count + 1, updated_at = ? WHERE user_id = ?",
|
|
312
|
+
(now, user_id),
|
|
313
|
+
)
|
|
314
|
+
else:
|
|
315
|
+
c.execute(
|
|
316
|
+
"INSERT INTO usage (user_id, message_count, month, updated_at) VALUES (?, 1, ?, ?)",
|
|
317
|
+
(user_id, current_month, now),
|
|
318
|
+
)
|
|
319
|
+
conn.commit()
|
|
320
|
+
conn.close()
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
init_db()
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
# ---------------------------------------------------------------------------
|
|
327
|
+
# Upload directory
|
|
328
|
+
# ---------------------------------------------------------------------------
|
|
329
|
+
|
|
330
|
+
UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# ---------------------------------------------------------------------------
|
|
334
|
+
# Routes — index
|
|
335
|
+
# ---------------------------------------------------------------------------
|
|
336
|
+
|
|
337
|
+
@app.route("/")
|
|
338
|
+
def serve_index():
|
|
339
|
+
"""Serve index.html with injected runtime config.
|
|
340
|
+
|
|
341
|
+
Set AGENT_SERVER_URL in .env to override the backend URL the frontend
|
|
342
|
+
connects to. Defaults to window.location.origin (correct for same-origin
|
|
343
|
+
deployments).
|
|
344
|
+
"""
|
|
345
|
+
import pathlib
|
|
346
|
+
html = pathlib.Path("index.html").read_text()
|
|
347
|
+
server_url = os.environ.get("AGENT_SERVER_URL", "").strip().rstrip("/")
|
|
348
|
+
clerk_key = (os.environ.get("CLERK_PUBLISHABLE_KEY") or os.environ.get("VITE_CLERK_PUBLISHABLE_KEY", "")).strip()
|
|
349
|
+
client_name = os.environ.get("CLIENT_NAME", "").strip()
|
|
350
|
+
import json as _json
|
|
351
|
+
devsite_map_raw = os.environ.get("DEVSITE_MAP", "{}").strip()
|
|
352
|
+
try:
|
|
353
|
+
devsite_map = _json.loads(devsite_map_raw)
|
|
354
|
+
except Exception:
|
|
355
|
+
devsite_map = {}
|
|
356
|
+
config_parts = []
|
|
357
|
+
config_parts.append(f'serverUrl:"{server_url}"' if server_url else 'serverUrl:window.location.origin')
|
|
358
|
+
if clerk_key:
|
|
359
|
+
config_parts.append(f'clerkPublishableKey:"{clerk_key}"')
|
|
360
|
+
if devsite_map:
|
|
361
|
+
config_parts.append(f'devsiteMap:{_json.dumps(devsite_map)}')
|
|
362
|
+
if client_name:
|
|
363
|
+
config_parts.append(f'clientName:{_json.dumps(client_name)}')
|
|
364
|
+
config_block = f'<script>window.AGENT_CONFIG={{{",".join(config_parts)}}};</script>'
|
|
365
|
+
html = html.replace("<head>", f"<head>\n {config_block}", 1)
|
|
366
|
+
# Replace PWA title and apple-mobile-web-app-title with client name
|
|
367
|
+
if client_name:
|
|
368
|
+
html = html.replace("<title>OpenVoiceUI</title>", f"<title>{client_name}</title>")
|
|
369
|
+
html = html.replace('content="OpenVoiceUI"', f'content="{client_name}"')
|
|
370
|
+
resp = Response(html, mimetype="text/html")
|
|
371
|
+
resp.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
|
|
372
|
+
return resp
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
# ---------------------------------------------------------------------------
|
|
376
|
+
# Routes — health probes
|
|
377
|
+
# ---------------------------------------------------------------------------
|
|
378
|
+
|
|
379
|
+
from services.health import health_checker as _health_checker
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
@app.route("/health/live", methods=["GET"])
|
|
383
|
+
def health_live():
|
|
384
|
+
"""Liveness probe — always 200 while the process is running."""
|
|
385
|
+
result = _health_checker.liveness()
|
|
386
|
+
return jsonify({"healthy": result.healthy, "message": result.message, "details": result.details}), 200
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
@app.route("/health/ready", methods=["GET"])
|
|
390
|
+
def health_ready():
|
|
391
|
+
"""Readiness probe — 200 only when Gateway and TTS are available."""
|
|
392
|
+
result = _health_checker.readiness()
|
|
393
|
+
code = 200 if result.healthy else 503
|
|
394
|
+
return jsonify({"healthy": result.healthy, "message": result.message, "details": result.details}), code
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
@app.route("/api/memory-status", methods=["GET"])
|
|
398
|
+
def memory_status():
|
|
399
|
+
"""Process memory usage — for watchdog monitoring."""
|
|
400
|
+
import resource
|
|
401
|
+
rusage = resource.getrusage(resource.RUSAGE_SELF)
|
|
402
|
+
current_mb = rusage.ru_maxrss / 1024 # ru_maxrss is KB on Linux
|
|
403
|
+
return jsonify({"process": {"current_mb": round(current_mb, 1)}})
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
# ---------------------------------------------------------------------------
|
|
407
|
+
# Routes — session
|
|
408
|
+
# ---------------------------------------------------------------------------
|
|
409
|
+
|
|
410
|
+
@app.route("/api/session", methods=["GET"])
|
|
411
|
+
def session_info():
|
|
412
|
+
"""Return the current voice session key and consecutive-empty-response count."""
|
|
413
|
+
return jsonify({
|
|
414
|
+
"sessionKey": get_voice_session_key(),
|
|
415
|
+
"consecutiveEmpty": _consecutive_empty_responses,
|
|
416
|
+
})
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
@app.route("/api/session/reset", methods=["POST"])
|
|
420
|
+
def session_reset():
|
|
421
|
+
"""Reset the voice session context.
|
|
422
|
+
|
|
423
|
+
Body (JSON, optional):
|
|
424
|
+
{ "mode": "soft" } — bump session key only (default)
|
|
425
|
+
{ "mode": "hard" } — bump session key and pre-warm the new session
|
|
426
|
+
"""
|
|
427
|
+
from services.gateway import gateway_connection
|
|
428
|
+
|
|
429
|
+
data = request.get_json(silent=True) or {}
|
|
430
|
+
mode = data.get("mode", "soft")
|
|
431
|
+
if mode not in ("soft", "hard"):
|
|
432
|
+
return jsonify({"error": f"Invalid mode '{mode}'. Use 'soft' or 'hard'."}), 400
|
|
433
|
+
|
|
434
|
+
old_key = get_voice_session_key()
|
|
435
|
+
new_key = bump_voice_session()
|
|
436
|
+
|
|
437
|
+
if mode == "hard":
|
|
438
|
+
def _prewarm():
|
|
439
|
+
try:
|
|
440
|
+
gateway_connection.stream_to_queue(
|
|
441
|
+
queue.Queue(),
|
|
442
|
+
"[SYSTEM: session pre-warm, reply with exactly: ok]",
|
|
443
|
+
new_key,
|
|
444
|
+
[],
|
|
445
|
+
)
|
|
446
|
+
logger.info(f"Pre-warm complete for {new_key}")
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.warning(f"Pre-warm failed: {e}")
|
|
449
|
+
threading.Thread(target=_prewarm, daemon=True).start()
|
|
450
|
+
|
|
451
|
+
return jsonify({
|
|
452
|
+
"old": old_key,
|
|
453
|
+
"new": new_key,
|
|
454
|
+
"mode": mode,
|
|
455
|
+
"message": f"Session reset ({mode})." + (" Pre-warming new session..." if mode == "hard" else ""),
|
|
456
|
+
})
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
# ---------------------------------------------------------------------------
|
|
460
|
+
# Routes — diagnostics
|
|
461
|
+
# ---------------------------------------------------------------------------
|
|
462
|
+
|
|
463
|
+
@app.route("/api/diagnostics", methods=["GET"])
|
|
464
|
+
def diagnostics():
|
|
465
|
+
"""Diagnostic dashboard — uptime, active config, recent timing metrics."""
|
|
466
|
+
import resource
|
|
467
|
+
|
|
468
|
+
uptime_seconds = int(time.time() - SERVER_START_TIME)
|
|
469
|
+
uptime_h = uptime_seconds // 3600
|
|
470
|
+
uptime_m = (uptime_seconds % 3600) // 60
|
|
471
|
+
rusage = resource.getrusage(resource.RUSAGE_SELF)
|
|
472
|
+
memory_mb = round(rusage.ru_maxrss / 1024, 1)
|
|
473
|
+
|
|
474
|
+
state = {
|
|
475
|
+
"server": {
|
|
476
|
+
"uptime": f"{uptime_h}h {uptime_m}m",
|
|
477
|
+
"uptime_seconds": uptime_seconds,
|
|
478
|
+
"memory_mb": memory_mb,
|
|
479
|
+
"pid": os.getpid(),
|
|
480
|
+
"started_at": datetime.fromtimestamp(SERVER_START_TIME).isoformat(),
|
|
481
|
+
},
|
|
482
|
+
"config": {
|
|
483
|
+
"gateway_url": os.getenv("CLAWDBOT_GATEWAY_URL", "ws://127.0.0.1:18791"),
|
|
484
|
+
"session_key": get_voice_session_key(),
|
|
485
|
+
"tts_provider": os.getenv("DEFAULT_TTS_PROVIDER", "groq"),
|
|
486
|
+
"port": os.getenv("PORT", "5001"),
|
|
487
|
+
},
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
try:
|
|
491
|
+
conn = sqlite3.connect(DB_PATH)
|
|
492
|
+
conn.row_factory = sqlite3.Row
|
|
493
|
+
c = conn.cursor()
|
|
494
|
+
c.execute("""
|
|
495
|
+
SELECT profile, model, handshake_ms, llm_inference_ms,
|
|
496
|
+
tts_generation_ms, total_ms, user_message_len,
|
|
497
|
+
response_len, tts_text_len, tts_provider, tts_success,
|
|
498
|
+
tts_error, tool_count, fallback_used, error, created_at
|
|
499
|
+
FROM conversation_metrics
|
|
500
|
+
ORDER BY id DESC LIMIT 10
|
|
501
|
+
""")
|
|
502
|
+
state["recent_conversations"] = [dict(r) for r in c.fetchall()]
|
|
503
|
+
c.execute("""
|
|
504
|
+
SELECT COUNT(*) as total_conversations,
|
|
505
|
+
AVG(total_ms) as avg_total_ms,
|
|
506
|
+
AVG(llm_inference_ms) as avg_llm_ms,
|
|
507
|
+
AVG(tts_generation_ms) as avg_tts_ms,
|
|
508
|
+
AVG(handshake_ms) as avg_handshake_ms,
|
|
509
|
+
SUM(CASE WHEN tts_success = 0 THEN 1 ELSE 0 END) as tts_failures,
|
|
510
|
+
SUM(CASE WHEN error IS NOT NULL THEN 1 ELSE 0 END) as errors,
|
|
511
|
+
MAX(total_ms) as max_total_ms,
|
|
512
|
+
MIN(total_ms) as min_total_ms
|
|
513
|
+
FROM conversation_metrics
|
|
514
|
+
WHERE created_at > datetime('now', '-1 hour')
|
|
515
|
+
""")
|
|
516
|
+
stats = dict(c.fetchone() or {})
|
|
517
|
+
for key in ("avg_total_ms", "avg_llm_ms", "avg_tts_ms", "avg_handshake_ms"):
|
|
518
|
+
if stats.get(key) is not None:
|
|
519
|
+
stats[key] = round(stats[key])
|
|
520
|
+
state["last_hour_stats"] = stats
|
|
521
|
+
conn.close()
|
|
522
|
+
except Exception as e:
|
|
523
|
+
state["metrics_error"] = str(e)
|
|
524
|
+
|
|
525
|
+
return jsonify(state)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
# ---------------------------------------------------------------------------
|
|
529
|
+
# Routes — Hume EVI token (used by src/adapters/hume-evi.js)
|
|
530
|
+
# ---------------------------------------------------------------------------
|
|
531
|
+
|
|
532
|
+
@app.route("/api/hume/token", methods=["GET"])
|
|
533
|
+
def get_hume_token():
|
|
534
|
+
"""Return a short-lived Hume access token for EVI WebSocket connections.
|
|
535
|
+
|
|
536
|
+
Returns 403 when Hume credentials are not configured — the frontend
|
|
537
|
+
adapter treats this as 'Hume unavailable' rather than an error.
|
|
538
|
+
"""
|
|
539
|
+
api_key = os.getenv("HUME_API_KEY")
|
|
540
|
+
secret_key = os.getenv("HUME_SECRET_KEY")
|
|
541
|
+
|
|
542
|
+
if not api_key or not secret_key:
|
|
543
|
+
return jsonify({"error": "Hume API credentials not configured", "available": False}), 403
|
|
544
|
+
|
|
545
|
+
try:
|
|
546
|
+
credentials = f"{api_key}:{secret_key}"
|
|
547
|
+
encoded = base64.b64encode(credentials.encode()).decode()
|
|
548
|
+
response = requests.post(
|
|
549
|
+
"https://api.hume.ai/oauth2-cc/token",
|
|
550
|
+
headers={
|
|
551
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
552
|
+
"Authorization": f"Basic {encoded}",
|
|
553
|
+
},
|
|
554
|
+
data={"grant_type": "client_credentials"},
|
|
555
|
+
timeout=10,
|
|
556
|
+
)
|
|
557
|
+
if response.status_code != 200:
|
|
558
|
+
logger.error(f"Hume token request failed: {response.status_code} — {response.text}")
|
|
559
|
+
return jsonify({"error": "Failed to get Hume access token", "available": False}), 500
|
|
560
|
+
token_data = response.json()
|
|
561
|
+
return jsonify({
|
|
562
|
+
"access_token": token_data.get("access_token"),
|
|
563
|
+
"expires_in": token_data.get("expires_in", 3600),
|
|
564
|
+
"config_id": os.getenv("HUME_CONFIG_ID"),
|
|
565
|
+
"available": True,
|
|
566
|
+
})
|
|
567
|
+
except Exception as e:
|
|
568
|
+
logger.error(f"Hume token error: {e}")
|
|
569
|
+
return jsonify({"error": "Failed to retrieve token", "available": False}), 500
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
# ---------------------------------------------------------------------------
|
|
573
|
+
# Routes — STT (Speech-to-Text)
|
|
574
|
+
# ---------------------------------------------------------------------------
|
|
575
|
+
|
|
576
|
+
@app.route("/api/stt/groq", methods=["POST"])
|
|
577
|
+
def groq_stt():
|
|
578
|
+
"""Transcribe audio using Groq Whisper Large v3 Turbo (cloud, fast)."""
|
|
579
|
+
from services.tts import get_groq_client as _get_groq_client
|
|
580
|
+
|
|
581
|
+
if "audio" not in request.files:
|
|
582
|
+
return jsonify({"error": "No audio file provided"}), 400
|
|
583
|
+
|
|
584
|
+
audio_file = request.files["audio"]
|
|
585
|
+
groq = _get_groq_client()
|
|
586
|
+
if not groq:
|
|
587
|
+
return jsonify({"error": "Groq client not available — check GROQ_API_KEY"}), 500
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
audio_bytes = audio_file.read()
|
|
591
|
+
audio_tuple = (
|
|
592
|
+
audio_file.filename or "audio.webm",
|
|
593
|
+
audio_bytes,
|
|
594
|
+
audio_file.content_type or "audio/webm",
|
|
595
|
+
)
|
|
596
|
+
transcription = groq.audio.transcriptions.create(
|
|
597
|
+
file=audio_tuple,
|
|
598
|
+
model="whisper-large-v3-turbo",
|
|
599
|
+
response_format="verbose_json",
|
|
600
|
+
language="en",
|
|
601
|
+
temperature=0,
|
|
602
|
+
prompt="",
|
|
603
|
+
)
|
|
604
|
+
# Filter segments with high no_speech_prob or low confidence (Whisper hallucinations)
|
|
605
|
+
import re as _re
|
|
606
|
+
segments = getattr(transcription, 'segments', None)
|
|
607
|
+
if segments:
|
|
608
|
+
filtered_texts = []
|
|
609
|
+
for seg in segments:
|
|
610
|
+
_nsp = seg.get('no_speech_prob', 0) if isinstance(seg, dict) else getattr(seg, 'no_speech_prob', 0)
|
|
611
|
+
_alp = seg.get('avg_logprob', 0) if isinstance(seg, dict) else getattr(seg, 'avg_logprob', 0)
|
|
612
|
+
_stxt = (seg.get('text', '') if isinstance(seg, dict) else seg.text).strip()
|
|
613
|
+
# Reject: high no-speech probability OR very low confidence
|
|
614
|
+
if _nsp >= 0.2:
|
|
615
|
+
logger.debug(f"Groq STT: dropping segment (no_speech_prob={_nsp:.2f}): {_stxt!r}")
|
|
616
|
+
continue
|
|
617
|
+
if _alp < -1.0:
|
|
618
|
+
logger.debug(f"Groq STT: dropping segment (avg_logprob={_alp:.2f}): {_stxt!r}")
|
|
619
|
+
continue
|
|
620
|
+
filtered_texts.append(_stxt)
|
|
621
|
+
text = ' '.join(filtered_texts).strip()
|
|
622
|
+
else:
|
|
623
|
+
text = (transcription.text or "").strip()
|
|
624
|
+
logger.info(f"Groq STT: {text!r}")
|
|
625
|
+
|
|
626
|
+
# --- Whisper hallucination filtering ---
|
|
627
|
+
_WHISPER_HALLUCINATIONS = {
|
|
628
|
+
"thank you", "thanks for watching", "thanks for listening",
|
|
629
|
+
"i'm here with closed captioning", "closed captioning",
|
|
630
|
+
"subscribe", "please subscribe", "like and subscribe",
|
|
631
|
+
"you", "bye", "the end", "subtitles by", "translated by",
|
|
632
|
+
"voice command for ai assistant", "voice command for ai",
|
|
633
|
+
"alright", "all right", "okay", "ok", "yeah", "yes",
|
|
634
|
+
"um", "uh", "hmm", "huh", "oh", "ah",
|
|
635
|
+
"so", "well", "right", "sure", "hey",
|
|
636
|
+
"thanks", "thank you so much",
|
|
637
|
+
"i don't know", "i'm sorry",
|
|
638
|
+
}
|
|
639
|
+
# Substrings that indicate prompt-echo or known garbage
|
|
640
|
+
_HALLUCINATION_SUBSTRINGS = [
|
|
641
|
+
"voice command for ai",
|
|
642
|
+
"thanks for watching", "thanks for listening",
|
|
643
|
+
"like and subscribe", "please subscribe",
|
|
644
|
+
"subtitles by", "translated by", "closed captioning",
|
|
645
|
+
"coupo foundation", # known recurring hallucination
|
|
646
|
+
]
|
|
647
|
+
text_lower = text.lower().rstrip('.!?,;:')
|
|
648
|
+
_meaningful = _re.sub(r'[^a-zA-Z0-9]', '', text)
|
|
649
|
+
|
|
650
|
+
def _is_hallucination(t, t_lower):
|
|
651
|
+
# Exact match against known phrases
|
|
652
|
+
if t_lower in _WHISPER_HALLUCINATIONS:
|
|
653
|
+
return True
|
|
654
|
+
# Too short to be real speech
|
|
655
|
+
if len(_meaningful) < 3:
|
|
656
|
+
return True
|
|
657
|
+
# Prompt text or known garbage appears anywhere in transcription
|
|
658
|
+
for sub in _HALLUCINATION_SUBSTRINGS:
|
|
659
|
+
if sub in t_lower:
|
|
660
|
+
return True
|
|
661
|
+
# Repetitive pattern: same word/phrase repeated many times
|
|
662
|
+
words = _re.findall(r'[a-zA-Z]+', t)
|
|
663
|
+
if len(words) >= 4:
|
|
664
|
+
from collections import Counter
|
|
665
|
+
counts = Counter(w.lower() for w in words)
|
|
666
|
+
most_common_count = counts.most_common(1)[0][1]
|
|
667
|
+
if most_common_count / len(words) >= 0.5:
|
|
668
|
+
return True
|
|
669
|
+
return False
|
|
670
|
+
|
|
671
|
+
if _is_hallucination(text, text_lower):
|
|
672
|
+
logger.info(f"Groq STT: FILTERED hallucination/garbage: {text!r}")
|
|
673
|
+
return jsonify({"transcript": "", "success": True, "filtered": True})
|
|
674
|
+
|
|
675
|
+
return jsonify({"transcript": text, "success": True})
|
|
676
|
+
except Exception as e:
|
|
677
|
+
logger.error(f"Groq STT error: {e}")
|
|
678
|
+
return jsonify({"error": "Speech-to-text failed"}), 500
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
@app.route("/api/stt/deepgram/token", methods=["GET"])
|
|
682
|
+
def deepgram_stt_token():
|
|
683
|
+
"""Return the Deepgram API key for browser-side WebSocket streaming.
|
|
684
|
+
|
|
685
|
+
The browser needs the key to open a direct WebSocket to Deepgram's
|
|
686
|
+
live transcription API. The key is passed via the WebSocket sub-protocol
|
|
687
|
+
header so it never appears in URLs or logs.
|
|
688
|
+
|
|
689
|
+
NOTE: Deepgram supports scoped / short-lived project keys — if you want
|
|
690
|
+
tighter security, create a key with only 'usage:write' permission and
|
|
691
|
+
rotate it. For now we hand out the configured key since the UI is
|
|
692
|
+
already authenticated.
|
|
693
|
+
"""
|
|
694
|
+
api_key = os.environ.get("DEEPGRAM_API_KEY", "")
|
|
695
|
+
if not api_key:
|
|
696
|
+
return jsonify({"error": "DEEPGRAM_API_KEY not configured"}), 500
|
|
697
|
+
return jsonify({"token": api_key})
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
@app.route("/api/stt/deepgram", methods=["POST"])
|
|
701
|
+
def deepgram_stt():
|
|
702
|
+
"""Transcribe audio using Deepgram Nova-2 API (reliable, low-cost)."""
|
|
703
|
+
import re as _re
|
|
704
|
+
|
|
705
|
+
if "audio" not in request.files:
|
|
706
|
+
return jsonify({"error": "No audio file provided"}), 400
|
|
707
|
+
|
|
708
|
+
api_key = os.environ.get("DEEPGRAM_API_KEY", "")
|
|
709
|
+
if not api_key:
|
|
710
|
+
return jsonify({"error": "DEEPGRAM_API_KEY not configured"}), 500
|
|
711
|
+
|
|
712
|
+
audio_file = request.files["audio"]
|
|
713
|
+
try:
|
|
714
|
+
audio_bytes = audio_file.read()
|
|
715
|
+
content_type = audio_file.content_type or "audio/webm"
|
|
716
|
+
|
|
717
|
+
import requests as _requests
|
|
718
|
+
resp = _requests.post(
|
|
719
|
+
"https://api.deepgram.com/v1/listen",
|
|
720
|
+
params={
|
|
721
|
+
"model": "nova-2",
|
|
722
|
+
"language": "en",
|
|
723
|
+
"smart_format": "true",
|
|
724
|
+
"punctuate": "true",
|
|
725
|
+
},
|
|
726
|
+
headers={
|
|
727
|
+
"Authorization": f"Token {api_key}",
|
|
728
|
+
"Content-Type": content_type,
|
|
729
|
+
},
|
|
730
|
+
data=audio_bytes,
|
|
731
|
+
timeout=15,
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
if resp.status_code != 200:
|
|
735
|
+
logger.error(f"Deepgram API error {resp.status_code}: {resp.text[:300]}")
|
|
736
|
+
return jsonify({"error": f"Deepgram API error: {resp.status_code}"}), 502
|
|
737
|
+
|
|
738
|
+
result = resp.json()
|
|
739
|
+
channels = result.get("results", {}).get("channels", [])
|
|
740
|
+
if not channels:
|
|
741
|
+
return jsonify({"transcript": "", "success": True})
|
|
742
|
+
|
|
743
|
+
alt = channels[0].get("alternatives", [{}])[0]
|
|
744
|
+
text = alt.get("transcript", "").strip()
|
|
745
|
+
confidence = alt.get("confidence", 0)
|
|
746
|
+
|
|
747
|
+
logger.info(f"Deepgram STT: {text!r} (confidence={confidence:.2f})")
|
|
748
|
+
|
|
749
|
+
# Low confidence filter
|
|
750
|
+
if confidence < 0.3 and text:
|
|
751
|
+
logger.info(f"Deepgram STT: FILTERED low confidence ({confidence:.2f}): {text!r}")
|
|
752
|
+
return jsonify({"transcript": "", "success": True, "filtered": True})
|
|
753
|
+
|
|
754
|
+
# Hallucination filtering (same as Groq)
|
|
755
|
+
_HALLUCINATIONS = {
|
|
756
|
+
"thank you", "thanks for watching", "thanks for listening",
|
|
757
|
+
"subscribe", "please subscribe", "like and subscribe",
|
|
758
|
+
"the end", "subtitles by", "translated by", "closed captioning",
|
|
759
|
+
"voice command for ai assistant", "voice command for ai",
|
|
760
|
+
"thanks", "thank you so much",
|
|
761
|
+
}
|
|
762
|
+
_HALLUCINATION_SUBSTRINGS = [
|
|
763
|
+
"voice command for ai", "thanks for watching", "thanks for listening",
|
|
764
|
+
"like and subscribe", "please subscribe",
|
|
765
|
+
"subtitles by", "translated by", "closed captioning",
|
|
766
|
+
]
|
|
767
|
+
text_lower = text.lower().rstrip('.!?,;:')
|
|
768
|
+
meaningful = _re.sub(r'[^a-zA-Z0-9]', '', text)
|
|
769
|
+
|
|
770
|
+
if text_lower in _HALLUCINATIONS:
|
|
771
|
+
logger.info(f"Deepgram STT: FILTERED hallucination: {text!r}")
|
|
772
|
+
return jsonify({"transcript": "", "success": True, "filtered": True})
|
|
773
|
+
if len(meaningful) < 3:
|
|
774
|
+
logger.info(f"Deepgram STT: FILTERED too short: {text!r}")
|
|
775
|
+
return jsonify({"transcript": "", "success": True, "filtered": True})
|
|
776
|
+
for sub in _HALLUCINATION_SUBSTRINGS:
|
|
777
|
+
if sub in text_lower:
|
|
778
|
+
logger.info(f"Deepgram STT: FILTERED hallucination substring: {text!r}")
|
|
779
|
+
return jsonify({"transcript": "", "success": True, "filtered": True})
|
|
780
|
+
|
|
781
|
+
return jsonify({"transcript": text, "success": True, "confidence": confidence})
|
|
782
|
+
except Exception as e:
|
|
783
|
+
logger.error(f"Deepgram STT error: {e}")
|
|
784
|
+
return jsonify({"error": "Speech-to-text failed"}), 500
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
@app.route("/api/stt/local", methods=["POST"])
|
|
788
|
+
def local_stt():
|
|
789
|
+
"""Transcribe audio using local Faster-Whisper with Silero VAD.
|
|
790
|
+
|
|
791
|
+
Requires faster-whisper and ffmpeg. Uses the 'tiny' model to keep
|
|
792
|
+
memory usage low.
|
|
793
|
+
"""
|
|
794
|
+
if "audio" not in request.files:
|
|
795
|
+
return jsonify({"error": "No audio file provided"}), 400
|
|
796
|
+
|
|
797
|
+
audio_file = request.files["audio"]
|
|
798
|
+
audio_bytes = audio_file.read()
|
|
799
|
+
|
|
800
|
+
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as tmp:
|
|
801
|
+
tmp.write(audio_bytes)
|
|
802
|
+
tmp_path = tmp.name
|
|
803
|
+
|
|
804
|
+
try:
|
|
805
|
+
wav_path = tmp_path.replace(".webm", ".wav")
|
|
806
|
+
result = subprocess.run(
|
|
807
|
+
["ffmpeg", "-y", "-i", tmp_path, "-ar", "16000", "-ac", "1", "-f", "wav", wav_path],
|
|
808
|
+
capture_output=True,
|
|
809
|
+
timeout=10,
|
|
810
|
+
)
|
|
811
|
+
if result.returncode != 0:
|
|
812
|
+
logger.warning(f"FFmpeg conversion failed, transcribing original: {result.stderr.decode()}")
|
|
813
|
+
wav_path = tmp_path
|
|
814
|
+
|
|
815
|
+
segments, info = get_whisper_model().transcribe(
|
|
816
|
+
wav_path,
|
|
817
|
+
language="en",
|
|
818
|
+
vad_filter=True,
|
|
819
|
+
vad_parameters={"min_silence_duration_ms": 500, "threshold": 0.5},
|
|
820
|
+
)
|
|
821
|
+
transcript = " ".join(seg.text for seg in segments).strip()
|
|
822
|
+
logger.info(f"Local STT: {transcript!r} ({info.duration:.1f}s)")
|
|
823
|
+
return jsonify({"transcript": transcript, "success": True})
|
|
824
|
+
except Exception as e:
|
|
825
|
+
logger.error(f"Local STT error: {e}")
|
|
826
|
+
return jsonify({"error": "Speech-to-text failed"}), 500
|
|
827
|
+
finally:
|
|
828
|
+
for f in [tmp_path, tmp_path.replace(".webm", ".wav")]:
|
|
829
|
+
try:
|
|
830
|
+
os.unlink(f)
|
|
831
|
+
except OSError:
|
|
832
|
+
pass
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
# ---------------------------------------------------------------------------
|
|
836
|
+
# Routes — web search
|
|
837
|
+
# ---------------------------------------------------------------------------
|
|
838
|
+
|
|
839
|
+
@app.route("/api/search/brave", methods=["GET"])
|
|
840
|
+
def brave_search():
|
|
841
|
+
"""Web search via Brave Search API. Requires BRAVE_API_KEY in .env."""
|
|
842
|
+
query = request.args.get("q", "").strip()
|
|
843
|
+
if not query:
|
|
844
|
+
return jsonify({"error": "No query provided"}), 400
|
|
845
|
+
|
|
846
|
+
brave_api_key = os.getenv("BRAVE_API_KEY")
|
|
847
|
+
if not brave_api_key:
|
|
848
|
+
return jsonify({"error": "BRAVE_API_KEY not configured"}), 500
|
|
849
|
+
|
|
850
|
+
try:
|
|
851
|
+
response = requests.get(
|
|
852
|
+
"https://api.search.brave.com/res/v1/web/search",
|
|
853
|
+
headers={"Accept": "application/json", "X-Subscription-Token": brave_api_key},
|
|
854
|
+
params={"q": query, "count": 10, "search_lang": "en", "freshness": "pw"},
|
|
855
|
+
timeout=10,
|
|
856
|
+
)
|
|
857
|
+
response.raise_for_status()
|
|
858
|
+
data = response.json()
|
|
859
|
+
results = [
|
|
860
|
+
{
|
|
861
|
+
"title": r.get("title", ""),
|
|
862
|
+
"url": r.get("url", ""),
|
|
863
|
+
"description": r.get("description", ""),
|
|
864
|
+
}
|
|
865
|
+
for r in data.get("web", {}).get("results", [])[:5]
|
|
866
|
+
]
|
|
867
|
+
return jsonify({"query": query, "results": results, "success": True})
|
|
868
|
+
except Exception as e:
|
|
869
|
+
logger.error(f"Brave Search error: {e}")
|
|
870
|
+
return jsonify({"error": "Search failed"}), 500
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
@app.route("/api/search", methods=["GET", "POST"])
|
|
874
|
+
def web_search():
|
|
875
|
+
"""Web search via DuckDuckGo (no API key required)."""
|
|
876
|
+
import urllib.request
|
|
877
|
+
import urllib.parse
|
|
878
|
+
from html.parser import HTMLParser
|
|
879
|
+
|
|
880
|
+
if request.method == "POST":
|
|
881
|
+
query = (request.get_json() or {}).get("query")
|
|
882
|
+
else:
|
|
883
|
+
query = request.args.get("query")
|
|
884
|
+
|
|
885
|
+
if not query:
|
|
886
|
+
return jsonify({"error": "query required"}), 400
|
|
887
|
+
|
|
888
|
+
class _DDGParser(HTMLParser):
|
|
889
|
+
def __init__(self):
|
|
890
|
+
super().__init__()
|
|
891
|
+
self.results = []
|
|
892
|
+
self._current = {}
|
|
893
|
+
self._capture = False
|
|
894
|
+
self._text = ""
|
|
895
|
+
|
|
896
|
+
def handle_starttag(self, tag, attrs):
|
|
897
|
+
d = dict(attrs)
|
|
898
|
+
if tag == "a" and d.get("class") == "result__a":
|
|
899
|
+
self._current = {"url": d.get("href", ""), "title": "", "snippet": ""}
|
|
900
|
+
self._capture = True
|
|
901
|
+
elif tag == "a" and d.get("class") == "result__snippet":
|
|
902
|
+
self._capture = True
|
|
903
|
+
|
|
904
|
+
def handle_endtag(self, tag):
|
|
905
|
+
if tag == "a" and self._capture:
|
|
906
|
+
if self._current and not self._current.get("title"):
|
|
907
|
+
self._current["title"] = self._text.strip()
|
|
908
|
+
elif self._current.get("title") and not self._current.get("snippet"):
|
|
909
|
+
self._current["snippet"] = self._text.strip()
|
|
910
|
+
if self._current["title"] and self._current["url"]:
|
|
911
|
+
self.results.append(self._current)
|
|
912
|
+
self._current = {}
|
|
913
|
+
self._capture = False
|
|
914
|
+
self._text = ""
|
|
915
|
+
|
|
916
|
+
def handle_data(self, data):
|
|
917
|
+
if self._capture:
|
|
918
|
+
self._text += data
|
|
919
|
+
|
|
920
|
+
try:
|
|
921
|
+
encoded = urllib.parse.quote_plus(query)
|
|
922
|
+
req = urllib.request.Request(
|
|
923
|
+
f"https://html.duckduckgo.com/html/?q={encoded}",
|
|
924
|
+
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"},
|
|
925
|
+
)
|
|
926
|
+
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
927
|
+
html = resp.read().decode("utf-8")
|
|
928
|
+
parser = _DDGParser()
|
|
929
|
+
parser.feed(html)
|
|
930
|
+
results = parser.results[:5]
|
|
931
|
+
return jsonify({"query": query, "results": results, "success": True})
|
|
932
|
+
except Exception as e:
|
|
933
|
+
logger.error(f"DuckDuckGo search error: {e}")
|
|
934
|
+
return jsonify({"error": "Search failed"}), 500
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
# ---------------------------------------------------------------------------
|
|
938
|
+
# Routes — usage quotas
|
|
939
|
+
# ---------------------------------------------------------------------------
|
|
940
|
+
|
|
941
|
+
@app.route("/api/usage/<user_id>", methods=["GET"])
|
|
942
|
+
def check_usage(user_id):
|
|
943
|
+
"""Return the current month's usage for a user."""
|
|
944
|
+
if user_id in UNLIMITED_USERS:
|
|
945
|
+
return jsonify({
|
|
946
|
+
"user_id": user_id,
|
|
947
|
+
"used": get_user_usage(user_id),
|
|
948
|
+
"limit": -1,
|
|
949
|
+
"remaining": -1,
|
|
950
|
+
"allowed": True,
|
|
951
|
+
"unlimited": True,
|
|
952
|
+
})
|
|
953
|
+
count = get_user_usage(user_id)
|
|
954
|
+
return jsonify({
|
|
955
|
+
"user_id": user_id,
|
|
956
|
+
"used": count,
|
|
957
|
+
"limit": MONTHLY_LIMIT,
|
|
958
|
+
"remaining": max(0, MONTHLY_LIMIT - count),
|
|
959
|
+
"allowed": count < MONTHLY_LIMIT,
|
|
960
|
+
})
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
@app.route("/api/usage/<user_id>/increment", methods=["POST"])
|
|
964
|
+
def track_usage(user_id):
|
|
965
|
+
"""Increment usage count for a user (called after each agent response)."""
|
|
966
|
+
if user_id in UNLIMITED_USERS:
|
|
967
|
+
increment_usage(user_id)
|
|
968
|
+
return jsonify({
|
|
969
|
+
"user_id": user_id,
|
|
970
|
+
"used": get_user_usage(user_id),
|
|
971
|
+
"limit": -1,
|
|
972
|
+
"remaining": -1,
|
|
973
|
+
"unlimited": True,
|
|
974
|
+
})
|
|
975
|
+
count = get_user_usage(user_id)
|
|
976
|
+
if count >= MONTHLY_LIMIT:
|
|
977
|
+
return jsonify({"error": "Monthly limit reached", "used": count, "limit": MONTHLY_LIMIT}), 429
|
|
978
|
+
increment_usage(user_id)
|
|
979
|
+
new_count = count + 1
|
|
980
|
+
return jsonify({
|
|
981
|
+
"user_id": user_id,
|
|
982
|
+
"used": new_count,
|
|
983
|
+
"limit": MONTHLY_LIMIT,
|
|
984
|
+
"remaining": max(0, MONTHLY_LIMIT - new_count),
|
|
985
|
+
})
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
# ---------------------------------------------------------------------------
|
|
989
|
+
# Routes — server commands (whitelisted)
|
|
990
|
+
# ---------------------------------------------------------------------------
|
|
991
|
+
|
|
992
|
+
ALLOWED_COMMANDS = {
|
|
993
|
+
"git_status": {"cmd": ["git", "status"], "desc": "Git working tree status"},
|
|
994
|
+
"git_log": {"cmd": ["git", "log", "--oneline", "-10"], "desc": "Last 10 commits"},
|
|
995
|
+
"disk_usage": {"cmd": ["df", "-h", "/"], "desc": "Disk usage"},
|
|
996
|
+
"memory": {"cmd": ["free", "-h"], "desc": "Memory usage"},
|
|
997
|
+
"uptime": {"cmd": ["uptime"], "desc": "System uptime"},
|
|
998
|
+
"date": {"cmd": ["date"], "desc": "Current date/time"},
|
|
999
|
+
"whoami": {"cmd": ["whoami"], "desc": "Current user"},
|
|
1000
|
+
"nginx_status": {"cmd": ["systemctl", "status", "nginx", "--no-pager"], "desc": "Nginx status"},
|
|
1001
|
+
"service_status": {"cmd": ["systemctl", "status", "openvoiceui", "--no-pager"], "desc": "OpenVoiceUI service status"},
|
|
1002
|
+
"network": {"cmd": ["ss", "-tuln"], "desc": "Active network listeners"},
|
|
1003
|
+
"processes": {"cmd": ["ps", "aux", "--sort=-%cpu"], "desc": "Running processes by CPU"},
|
|
1004
|
+
"hostname": {"cmd": ["hostname"], "desc": "Server hostname"},
|
|
1005
|
+
"ip_address": {"cmd": ["hostname", "-I"], "desc": "Server IP addresses"},
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
_COMMAND_KEYWORDS = {
|
|
1009
|
+
"git": "git_status", "commit": "git_log", "disk": "disk_usage",
|
|
1010
|
+
"space": "disk_usage", "memory": "memory", "ram": "memory",
|
|
1011
|
+
"time": "date", "date": "date", "nginx": "nginx_status",
|
|
1012
|
+
"web": "nginx_status", "service": "service_status",
|
|
1013
|
+
"openvoiceui": "service_status", "network": "network",
|
|
1014
|
+
"ports": "network", "process": "processes", "cpu": "processes",
|
|
1015
|
+
"running": "processes", "host": "hostname", "ip": "ip_address",
|
|
1016
|
+
"address": "ip_address", "uptime": "uptime",
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
|
|
1020
|
+
@app.route("/api/command", methods=["GET", "POST"])
|
|
1021
|
+
def run_command():
|
|
1022
|
+
"""Run a whitelisted server command. Accepts a command key or natural language."""
|
|
1023
|
+
if request.method == "POST":
|
|
1024
|
+
command = (request.get_json() or {}).get("command")
|
|
1025
|
+
else:
|
|
1026
|
+
command = request.args.get("command")
|
|
1027
|
+
|
|
1028
|
+
if not command:
|
|
1029
|
+
return jsonify({
|
|
1030
|
+
"available_commands": [{"name": k, "description": v["desc"]} for k, v in ALLOWED_COMMANDS.items()],
|
|
1031
|
+
})
|
|
1032
|
+
|
|
1033
|
+
key = command.lower().replace(" ", "_").replace("-", "_")
|
|
1034
|
+
matched = key if key in ALLOWED_COMMANDS else next(
|
|
1035
|
+
(v for k, v in _COMMAND_KEYWORDS.items() if k in key), None
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
if not matched:
|
|
1039
|
+
return jsonify({"error": "command not in whitelist", "available": list(ALLOWED_COMMANDS.keys())}), 400
|
|
1040
|
+
|
|
1041
|
+
cmd_info = ALLOWED_COMMANDS[matched]
|
|
1042
|
+
try:
|
|
1043
|
+
result = subprocess.run(
|
|
1044
|
+
cmd_info["cmd"],
|
|
1045
|
+
capture_output=True,
|
|
1046
|
+
text=True,
|
|
1047
|
+
timeout=30,
|
|
1048
|
+
cwd=str(Path(__file__).parent),
|
|
1049
|
+
)
|
|
1050
|
+
output = (result.stdout.strip() or result.stderr.strip())[:1500]
|
|
1051
|
+
return jsonify({
|
|
1052
|
+
"command": matched,
|
|
1053
|
+
"description": cmd_info["desc"],
|
|
1054
|
+
"output": output,
|
|
1055
|
+
"return_code": result.returncode,
|
|
1056
|
+
})
|
|
1057
|
+
except subprocess.TimeoutExpired:
|
|
1058
|
+
return jsonify({"error": f"'{matched}' timed out after 30s"}), 504
|
|
1059
|
+
except Exception as e:
|
|
1060
|
+
logger.error(f"Command error ({matched}): {e}")
|
|
1061
|
+
return jsonify({"error": "Command execution failed"}), 500
|
|
1062
|
+
|
|
1063
|
+
|
|
1064
|
+
@app.route("/api/commands", methods=["GET"])
|
|
1065
|
+
def list_commands():
|
|
1066
|
+
"""List all whitelisted commands."""
|
|
1067
|
+
return jsonify({
|
|
1068
|
+
"commands": [{"name": k, "description": v["desc"]} for k, v in ALLOWED_COMMANDS.items()]
|
|
1069
|
+
})
|
|
1070
|
+
|
|
1071
|
+
|
|
1072
|
+
# ---------------------------------------------------------------------------
|
|
1073
|
+
# Routes — file upload
|
|
1074
|
+
# ---------------------------------------------------------------------------
|
|
1075
|
+
|
|
1076
|
+
@app.route("/api/upload", methods=["POST"])
|
|
1077
|
+
def upload_file():
|
|
1078
|
+
"""Upload a file for the voice agent (images, text, code, etc.)."""
|
|
1079
|
+
if "file" not in request.files:
|
|
1080
|
+
return jsonify({"error": "No file provided"}), 400
|
|
1081
|
+
|
|
1082
|
+
file = request.files["file"]
|
|
1083
|
+
if not file.filename:
|
|
1084
|
+
return jsonify({"error": "No filename"}), 400
|
|
1085
|
+
|
|
1086
|
+
allowed_exts = {
|
|
1087
|
+
".png", ".jpg", ".jpeg", ".gif", ".webp",
|
|
1088
|
+
".pdf", ".txt", ".md", ".json", ".csv",
|
|
1089
|
+
".html", ".js", ".py", ".ts", ".css",
|
|
1090
|
+
}
|
|
1091
|
+
ext = Path(file.filename).suffix.lower()
|
|
1092
|
+
if ext not in allowed_exts:
|
|
1093
|
+
return jsonify({"error": f"File type '{ext}' not allowed"}), 400
|
|
1094
|
+
|
|
1095
|
+
safe_name = re.sub(r"[^\w\-.]", "_", file.filename)[:80]
|
|
1096
|
+
save_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{safe_name}"
|
|
1097
|
+
save_path = UPLOADS_DIR / save_name
|
|
1098
|
+
file.save(save_path)
|
|
1099
|
+
|
|
1100
|
+
is_image = ext in {".png", ".jpg", ".jpeg", ".gif", ".webp"}
|
|
1101
|
+
result = {
|
|
1102
|
+
"filename": save_name,
|
|
1103
|
+
"original_name": file.filename,
|
|
1104
|
+
"path": str(save_path),
|
|
1105
|
+
"type": "image" if is_image else "text",
|
|
1106
|
+
"size": save_path.stat().st_size,
|
|
1107
|
+
"url": f"/uploads/{save_name}",
|
|
1108
|
+
}
|
|
1109
|
+
if not is_image and ext != ".pdf":
|
|
1110
|
+
try:
|
|
1111
|
+
result["content_preview"] = save_path.read_text(encoding="utf-8", errors="replace")[:2000]
|
|
1112
|
+
except Exception:
|
|
1113
|
+
pass
|
|
1114
|
+
|
|
1115
|
+
logger.info(f"Upload: {file.filename} → {save_path} ({result['size']} bytes)")
|
|
1116
|
+
return jsonify(result)
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
@app.route("/api/uploads", methods=["GET"])
|
|
1120
|
+
def list_uploads():
|
|
1121
|
+
"""List uploaded files. Optional ?type=image|text filter."""
|
|
1122
|
+
file_type = request.args.get("type") # "image", "text", or None for all
|
|
1123
|
+
image_exts = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
|
|
1124
|
+
text_exts = {".pdf", ".txt", ".md", ".json", ".csv", ".html", ".js", ".py", ".ts", ".css"}
|
|
1125
|
+
|
|
1126
|
+
files = []
|
|
1127
|
+
for f in sorted(UPLOADS_DIR.iterdir(), key=lambda x: x.stat().st_mtime, reverse=True):
|
|
1128
|
+
if f.is_dir() or f.name.startswith("."):
|
|
1129
|
+
continue
|
|
1130
|
+
ext = f.suffix.lower()
|
|
1131
|
+
is_image = ext in image_exts
|
|
1132
|
+
kind = "image" if is_image else ("text" if ext in text_exts else "other")
|
|
1133
|
+
if file_type and kind != file_type:
|
|
1134
|
+
continue
|
|
1135
|
+
stat = f.stat()
|
|
1136
|
+
files.append({
|
|
1137
|
+
"filename": f.name,
|
|
1138
|
+
"url": f"/uploads/{f.name}",
|
|
1139
|
+
"type": kind,
|
|
1140
|
+
"size": stat.st_size,
|
|
1141
|
+
"modified": stat.st_mtime,
|
|
1142
|
+
})
|
|
1143
|
+
|
|
1144
|
+
return jsonify({"files": files, "count": len(files)})
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
# ---------------------------------------------------------------------------
|
|
1148
|
+
# WebSocket — Gateway proxy (/ws/clawdbot)
|
|
1149
|
+
# ---------------------------------------------------------------------------
|
|
1150
|
+
|
|
1151
|
+
from services.tts import generate_tts_b64 as _generate_tts_b64
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
def _tts_bytes(text: str) -> bytes:
|
|
1155
|
+
"""Generate TTS audio bytes for the WebSocket proxy."""
|
|
1156
|
+
b64 = _generate_tts_b64(text, voice="M1")
|
|
1157
|
+
if b64 is None:
|
|
1158
|
+
raise RuntimeError("TTS generation returned no audio")
|
|
1159
|
+
return base64.b64decode(b64)
|
|
1160
|
+
|
|
1161
|
+
|
|
1162
|
+
@sock.route("/ws/clawdbot")
|
|
1163
|
+
def clawdbot_websocket(ws):
|
|
1164
|
+
"""WebSocket proxy between the frontend and the OpenClaw Gateway.
|
|
1165
|
+
|
|
1166
|
+
Connects to CLAWDBOT_GATEWAY_URL, performs the protocol-3 handshake,
|
|
1167
|
+
then bridges messages bidirectionally — generating TTS audio for every
|
|
1168
|
+
assistant response before forwarding to the client.
|
|
1169
|
+
"""
|
|
1170
|
+
# --- Clerk auth check (uses same cookie/header as HTTP routes) ---
|
|
1171
|
+
from services.auth import verify_clerk_token, get_token_from_request
|
|
1172
|
+
token = get_token_from_request()
|
|
1173
|
+
user_id = verify_clerk_token(token) if token else None
|
|
1174
|
+
if not user_id:
|
|
1175
|
+
logger.warning("WebSocket rejected — no valid Clerk token")
|
|
1176
|
+
ws.send(json.dumps({"type": "error", "message": "Unauthorized"}))
|
|
1177
|
+
ws.close()
|
|
1178
|
+
return
|
|
1179
|
+
logger.info(f"WebSocket authenticated: user_id={user_id}")
|
|
1180
|
+
|
|
1181
|
+
gateway_url = os.getenv("CLAWDBOT_GATEWAY_URL", "ws://127.0.0.1:18791")
|
|
1182
|
+
auth_token = os.getenv("CLAWDBOT_AUTH_TOKEN")
|
|
1183
|
+
|
|
1184
|
+
if not auth_token:
|
|
1185
|
+
logger.error("CLAWDBOT_AUTH_TOKEN not set — WebSocket rejected")
|
|
1186
|
+
ws.send(json.dumps({"type": "error", "message": "Server configuration error"}))
|
|
1187
|
+
ws.close()
|
|
1188
|
+
return
|
|
1189
|
+
|
|
1190
|
+
async def _run():
|
|
1191
|
+
try:
|
|
1192
|
+
async with websockets.connect(gateway_url) as gw:
|
|
1193
|
+
logger.info(f"WebSocket connected to Gateway at {gateway_url}")
|
|
1194
|
+
|
|
1195
|
+
# Handshake
|
|
1196
|
+
challenge = json.loads(await asyncio.wait_for(gw.recv(), timeout=10.0))
|
|
1197
|
+
logger.debug(f"Gateway challenge: {challenge.get('event')}")
|
|
1198
|
+
|
|
1199
|
+
from services.gateways.compat import build_connect_params
|
|
1200
|
+
connect_params = build_connect_params(
|
|
1201
|
+
auth_token=auth_token,
|
|
1202
|
+
client_id="webchat",
|
|
1203
|
+
client_mode="webchat",
|
|
1204
|
+
platform="web",
|
|
1205
|
+
user_agent="openvoice-ui-webchat/1.0.0",
|
|
1206
|
+
caps=[],
|
|
1207
|
+
)
|
|
1208
|
+
await gw.send(json.dumps({
|
|
1209
|
+
"type": "req",
|
|
1210
|
+
"id": f"connect-{uuid.uuid4()}",
|
|
1211
|
+
"method": "connect",
|
|
1212
|
+
"params": connect_params,
|
|
1213
|
+
}))
|
|
1214
|
+
|
|
1215
|
+
resp = json.loads(await asyncio.wait_for(gw.recv(), timeout=10.0))
|
|
1216
|
+
if resp.get("type") != "res" or not resp.get("ok"):
|
|
1217
|
+
logger.error(f"Gateway handshake failed: {resp}")
|
|
1218
|
+
ws.send(json.dumps({"type": "error", "message": "Gateway handshake failed"}))
|
|
1219
|
+
ws.close()
|
|
1220
|
+
return
|
|
1221
|
+
|
|
1222
|
+
logger.info("Gateway handshake OK")
|
|
1223
|
+
ws.send(json.dumps({"type": "connected", "message": "Connected to OpenClaw Gateway"}))
|
|
1224
|
+
|
|
1225
|
+
async def _from_client():
|
|
1226
|
+
while True:
|
|
1227
|
+
msg = ws.receive()
|
|
1228
|
+
if not msg:
|
|
1229
|
+
break
|
|
1230
|
+
data = json.loads(msg)
|
|
1231
|
+
if data.get("type") == "chat.send":
|
|
1232
|
+
await gw.send(json.dumps({
|
|
1233
|
+
"type": "req",
|
|
1234
|
+
"id": f"chat-{uuid.uuid4()}",
|
|
1235
|
+
"method": "chat.send",
|
|
1236
|
+
"params": {
|
|
1237
|
+
"content": data.get("content", ""),
|
|
1238
|
+
"sessionKey": data.get("sessionKey", "main"),
|
|
1239
|
+
},
|
|
1240
|
+
}))
|
|
1241
|
+
|
|
1242
|
+
async def _from_gateway():
|
|
1243
|
+
while True:
|
|
1244
|
+
try:
|
|
1245
|
+
data = json.loads(await asyncio.wait_for(gw.recv(), timeout=120.0))
|
|
1246
|
+
except asyncio.TimeoutError:
|
|
1247
|
+
logger.warning("Gateway recv() timed out after 120s — closing connection")
|
|
1248
|
+
ws.send(json.dumps({"type": "error", "message": "Gateway connection timed out"}))
|
|
1249
|
+
return
|
|
1250
|
+
if data.get("type") != "event":
|
|
1251
|
+
continue
|
|
1252
|
+
event = data.get("event")
|
|
1253
|
+
payload = data.get("payload", {})
|
|
1254
|
+
|
|
1255
|
+
if event == "agent.message":
|
|
1256
|
+
content = payload.get("content", "")
|
|
1257
|
+
if content:
|
|
1258
|
+
try:
|
|
1259
|
+
audio_b64 = base64.b64encode(_tts_bytes(content)).decode()
|
|
1260
|
+
ws.send(json.dumps({
|
|
1261
|
+
"type": "assistant_message",
|
|
1262
|
+
"text": content,
|
|
1263
|
+
"audio": audio_b64,
|
|
1264
|
+
}))
|
|
1265
|
+
except Exception as e:
|
|
1266
|
+
logger.error(f"TTS failed in WebSocket handler: {e}")
|
|
1267
|
+
ws.send(json.dumps({"type": "assistant_message", "text": content}))
|
|
1268
|
+
|
|
1269
|
+
elif event == "agent.stream.delta":
|
|
1270
|
+
ws.send(json.dumps({
|
|
1271
|
+
"type": "text_delta",
|
|
1272
|
+
"delta": payload.get("delta", ""),
|
|
1273
|
+
}))
|
|
1274
|
+
|
|
1275
|
+
elif event == "agent.stream.end":
|
|
1276
|
+
ws.send(json.dumps({"type": "stream_end"}))
|
|
1277
|
+
|
|
1278
|
+
await asyncio.gather(_from_client(), _from_gateway())
|
|
1279
|
+
|
|
1280
|
+
except (ConnectionRefusedError, OSError) as e:
|
|
1281
|
+
logger.error(f"Cannot reach Gateway at {gateway_url}: {e}")
|
|
1282
|
+
ws.send(json.dumps({"type": "error", "message": "Cannot connect to Gateway"}))
|
|
1283
|
+
ws.close()
|
|
1284
|
+
except Exception as e:
|
|
1285
|
+
logger.error(f"WebSocket error: {e}")
|
|
1286
|
+
ws.send(json.dumps({"type": "error", "message": "Connection error"}))
|
|
1287
|
+
ws.close()
|
|
1288
|
+
|
|
1289
|
+
try:
|
|
1290
|
+
asyncio.run(_run())
|
|
1291
|
+
except Exception as e:
|
|
1292
|
+
logger.error(f"Fatal WebSocket error: {e}")
|
|
1293
|
+
|
|
1294
|
+
|
|
1295
|
+
# ---------------------------------------------------------------------------
|
|
1296
|
+
# OpenClaw Control UI — WebSocket proxy
|
|
1297
|
+
# ---------------------------------------------------------------------------
|
|
1298
|
+
# Accepts browser WS (Clerk-authed via __session cookie), connects to the
|
|
1299
|
+
# internal openclaw gateway, and relays all frames bidirectionally.
|
|
1300
|
+
# Transparently injects the gateway auth token into the connect handshake
|
|
1301
|
+
# so the user never has to enter it.
|
|
1302
|
+
|
|
1303
|
+
@sock.route("/openclaw-ui")
|
|
1304
|
+
def openclaw_ui_websocket(ws):
|
|
1305
|
+
"""WebSocket proxy for OpenClaw Control UI behind Clerk auth."""
|
|
1306
|
+
from services.auth import verify_clerk_token, get_token_from_request
|
|
1307
|
+
token = get_token_from_request()
|
|
1308
|
+
user_id = verify_clerk_token(token) if token else None
|
|
1309
|
+
if not user_id:
|
|
1310
|
+
logger.warning("OpenClaw UI WebSocket rejected — no valid Clerk token")
|
|
1311
|
+
ws.send(json.dumps({"type": "error", "message": "Unauthorized"}))
|
|
1312
|
+
ws.close()
|
|
1313
|
+
return
|
|
1314
|
+
logger.info(f"OpenClaw UI WebSocket authenticated: user_id={user_id}")
|
|
1315
|
+
|
|
1316
|
+
gateway_url = os.getenv("CLAWDBOT_GATEWAY_URL", "ws://127.0.0.1:18791")
|
|
1317
|
+
auth_token = os.getenv("CLAWDBOT_AUTH_TOKEN")
|
|
1318
|
+
|
|
1319
|
+
if not auth_token:
|
|
1320
|
+
logger.error("CLAWDBOT_AUTH_TOKEN not set — OpenClaw UI WebSocket rejected")
|
|
1321
|
+
ws.send(json.dumps({"type": "error", "message": "Server configuration error"}))
|
|
1322
|
+
ws.close()
|
|
1323
|
+
return
|
|
1324
|
+
|
|
1325
|
+
async def _run():
|
|
1326
|
+
try:
|
|
1327
|
+
async with websockets.connect(gateway_url) as gw:
|
|
1328
|
+
logger.info(f"OpenClaw UI: connected to Gateway at {gateway_url}")
|
|
1329
|
+
|
|
1330
|
+
async def _from_client():
|
|
1331
|
+
"""Relay browser → openclaw, injecting auth token on connect."""
|
|
1332
|
+
while True:
|
|
1333
|
+
msg = ws.receive()
|
|
1334
|
+
if not msg:
|
|
1335
|
+
break
|
|
1336
|
+
try:
|
|
1337
|
+
data = json.loads(msg)
|
|
1338
|
+
if data.get("type") == "req" and data.get("method") == "connect":
|
|
1339
|
+
if "params" not in data:
|
|
1340
|
+
data["params"] = {}
|
|
1341
|
+
data["params"]["auth"] = {"token": auth_token}
|
|
1342
|
+
msg = json.dumps(data)
|
|
1343
|
+
except (json.JSONDecodeError, TypeError):
|
|
1344
|
+
pass # Non-JSON frame — relay as-is
|
|
1345
|
+
await gw.send(msg)
|
|
1346
|
+
|
|
1347
|
+
async def _from_gateway():
|
|
1348
|
+
"""Relay openclaw → browser (raw, no transformation)."""
|
|
1349
|
+
while True:
|
|
1350
|
+
try:
|
|
1351
|
+
msg = await asyncio.wait_for(gw.recv(), timeout=300.0)
|
|
1352
|
+
ws.send(msg)
|
|
1353
|
+
except asyncio.TimeoutError:
|
|
1354
|
+
logger.warning("OpenClaw UI: gateway recv() timed out")
|
|
1355
|
+
ws.send(json.dumps({"type": "error", "message": "Gateway timeout"}))
|
|
1356
|
+
return
|
|
1357
|
+
|
|
1358
|
+
await asyncio.gather(_from_client(), _from_gateway())
|
|
1359
|
+
|
|
1360
|
+
except (ConnectionRefusedError, OSError) as e:
|
|
1361
|
+
logger.error(f"OpenClaw UI: cannot reach Gateway at {gateway_url}: {e}")
|
|
1362
|
+
ws.send(json.dumps({"type": "error", "message": "Cannot connect to Gateway"}))
|
|
1363
|
+
ws.close()
|
|
1364
|
+
except Exception as e:
|
|
1365
|
+
logger.error(f"OpenClaw UI WebSocket error: {e}")
|
|
1366
|
+
ws.send(json.dumps({"type": "error", "message": "Connection error"}))
|
|
1367
|
+
ws.close()
|
|
1368
|
+
|
|
1369
|
+
try:
|
|
1370
|
+
asyncio.run(_run())
|
|
1371
|
+
except Exception as e:
|
|
1372
|
+
logger.error(f"OpenClaw UI: fatal WebSocket error: {e}")
|
|
1373
|
+
|
|
1374
|
+
|
|
1375
|
+
# ---------------------------------------------------------------------------
|
|
1376
|
+
# Game Library API
|
|
1377
|
+
# ---------------------------------------------------------------------------
|
|
1378
|
+
# Serves game catalog data to canvas pages (same-origin, no CORS needed).
|
|
1379
|
+
# Reads /app/runtime/game-catalog.json (mounted from /mnt/game-drive/catalog.json).
|
|
1380
|
+
# Falls back to HTTP proxy at host:6360 if file not mounted.
|
|
1381
|
+
_GAME_CATALOG_PATH = os.getenv("GAME_CATALOG_PATH", "/app/runtime/game-catalog.json")
|
|
1382
|
+
_GAME_SERVER_URL = os.getenv("GAME_SERVER_URL", "http://172.19.0.1:6360")
|
|
1383
|
+
|
|
1384
|
+
def _load_game_catalog():
|
|
1385
|
+
"""Load catalog from mounted file. Returns list or None on failure."""
|
|
1386
|
+
# Check primary path, then uploads fallback (uploads/ is always mounted)
|
|
1387
|
+
for path in [_GAME_CATALOG_PATH, "/app/runtime/uploads/game-catalog.json"]:
|
|
1388
|
+
try:
|
|
1389
|
+
with open(path) as f:
|
|
1390
|
+
return json.load(f)
|
|
1391
|
+
except Exception:
|
|
1392
|
+
continue
|
|
1393
|
+
return None
|
|
1394
|
+
|
|
1395
|
+
@app.route("/api/games", methods=["GET"])
|
|
1396
|
+
def games_api():
|
|
1397
|
+
catalog = _load_game_catalog()
|
|
1398
|
+
if catalog is not None:
|
|
1399
|
+
system = request.args.get("system", "").lower()
|
|
1400
|
+
genre = request.args.get("genre", "").lower()
|
|
1401
|
+
search = request.args.get("search", "").lower()
|
|
1402
|
+
games = [g for g in catalog if g.get("status") == "downloaded"]
|
|
1403
|
+
if system:
|
|
1404
|
+
games = [g for g in games if g.get("system", "").lower() == system]
|
|
1405
|
+
if genre:
|
|
1406
|
+
games = [g for g in games if g.get("genre", "").lower() == genre]
|
|
1407
|
+
if search:
|
|
1408
|
+
games = [g for g in games if
|
|
1409
|
+
search in g.get("title", "").lower() or
|
|
1410
|
+
search in g.get("description", "").lower()]
|
|
1411
|
+
return jsonify(games)
|
|
1412
|
+
try:
|
|
1413
|
+
import requests as _req
|
|
1414
|
+
r = _req.get(f"{_GAME_SERVER_URL}/api/games", params=dict(request.args), timeout=10)
|
|
1415
|
+
return Response(r.content, status=r.status_code, mimetype="application/json")
|
|
1416
|
+
except Exception as e:
|
|
1417
|
+
return jsonify({"error": f"Game server unreachable: {e}"}), 502
|
|
1418
|
+
|
|
1419
|
+
@app.route("/api/games/stats", methods=["GET"])
|
|
1420
|
+
def games_stats_api():
|
|
1421
|
+
catalog = _load_game_catalog()
|
|
1422
|
+
if catalog is not None:
|
|
1423
|
+
total = len(catalog)
|
|
1424
|
+
downloaded = sum(1 for g in catalog if g.get("status") == "downloaded")
|
|
1425
|
+
pending = sum(1 for g in catalog if g.get("status") == "pending")
|
|
1426
|
+
failed = sum(1 for g in catalog if g.get("status") == "failed")
|
|
1427
|
+
by_system, by_genre = {}, {}
|
|
1428
|
+
for g in catalog:
|
|
1429
|
+
if g.get("status") == "downloaded":
|
|
1430
|
+
s = g.get("system", "unknown")
|
|
1431
|
+
by_system[s] = by_system.get(s, 0) + 1
|
|
1432
|
+
gr = g.get("genre", "unknown")
|
|
1433
|
+
by_genre[gr] = by_genre.get(gr, 0) + 1
|
|
1434
|
+
return jsonify({"total": total, "downloaded": downloaded,
|
|
1435
|
+
"pending": pending, "failed": failed,
|
|
1436
|
+
"bySystem": by_system, "byGenre": by_genre})
|
|
1437
|
+
try:
|
|
1438
|
+
import requests as _req
|
|
1439
|
+
r = _req.get(f"{_GAME_SERVER_URL}/api/stats", timeout=10)
|
|
1440
|
+
return Response(r.content, status=r.status_code, mimetype="application/json")
|
|
1441
|
+
except Exception as e:
|
|
1442
|
+
return jsonify({"error": f"Game server unreachable: {e}"}), 502
|
|
1443
|
+
|
|
1444
|
+
@app.route("/api/games/systems", methods=["GET"])
|
|
1445
|
+
def games_systems_api():
|
|
1446
|
+
catalog = _load_game_catalog()
|
|
1447
|
+
if catalog is not None:
|
|
1448
|
+
systems = sorted(set(g.get("system", "") for g in catalog if g.get("system")))
|
|
1449
|
+
return jsonify(systems)
|
|
1450
|
+
try:
|
|
1451
|
+
import requests as _req
|
|
1452
|
+
r = _req.get(f"{_GAME_SERVER_URL}/api/systems", timeout=10)
|
|
1453
|
+
return Response(r.content, status=r.status_code, mimetype="application/json")
|
|
1454
|
+
except Exception as e:
|
|
1455
|
+
return jsonify({"error": f"Game server unreachable: {e}"}), 502
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
# ---------------------------------------------------------------------------
|
|
1459
|
+
# Per-endpoint rate limits (applied after all routes are registered)
|
|
1460
|
+
# ---------------------------------------------------------------------------
|
|
1461
|
+
# limiter.limit() returns a wrapped function — must assign it back into
|
|
1462
|
+
# app.view_functions or the limit is silently discarded.
|
|
1463
|
+
_limiter = getattr(app, 'limiter', None)
|
|
1464
|
+
if _limiter:
|
|
1465
|
+
for _endpoint, _rate in {
|
|
1466
|
+
'conversation.conversation': '30/minute',
|
|
1467
|
+
'conversation.tts_generate': '10/minute',
|
|
1468
|
+
'conversation.tts_preview': '10/minute',
|
|
1469
|
+
'upload_file': '5/minute',
|
|
1470
|
+
'groq_stt': '60/minute',
|
|
1471
|
+
'local_stt': '60/minute',
|
|
1472
|
+
}.items():
|
|
1473
|
+
_view_fn = app.view_functions.get(_endpoint)
|
|
1474
|
+
if _view_fn:
|
|
1475
|
+
app.view_functions[_endpoint] = _limiter.limit(_rate)(_view_fn)
|
|
1476
|
+
else:
|
|
1477
|
+
logger.warning("Rate limit: endpoint %r not found — skipping", _endpoint)
|
|
1478
|
+
|
|
1479
|
+
|
|
1480
|
+
# ---------------------------------------------------------------------------
|
|
1481
|
+
# Entry point
|
|
1482
|
+
# ---------------------------------------------------------------------------
|
|
1483
|
+
|
|
1484
|
+
if __name__ == "__main__":
|
|
1485
|
+
port = int(os.getenv("PORT", 5001))
|
|
1486
|
+
|
|
1487
|
+
# Clean SIGTERM shutdown so systemd stop/restart works correctly.
|
|
1488
|
+
# Restart=on-failure only triggers on non-zero exit — os._exit(0) prevents that.
|
|
1489
|
+
def _handle_sigterm(signum, frame):
|
|
1490
|
+
logger.info("SIGTERM received — shutting down.")
|
|
1491
|
+
os._exit(0)
|
|
1492
|
+
|
|
1493
|
+
signal.signal(signal.SIGTERM, _handle_sigterm)
|
|
1494
|
+
signal.signal(signal.SIGHUP, signal.SIG_IGN)
|
|
1495
|
+
|
|
1496
|
+
try:
|
|
1497
|
+
from services.gateways.openclaw import OPENCLAW_TESTED_VERSION
|
|
1498
|
+
_oc_ver = OPENCLAW_TESTED_VERSION
|
|
1499
|
+
except ImportError:
|
|
1500
|
+
_oc_ver = "unknown"
|
|
1501
|
+
|
|
1502
|
+
logger.info(f"OpenVoiceUI starting on port {port}")
|
|
1503
|
+
logger.info(f" Frontend → http://localhost:{port}/")
|
|
1504
|
+
logger.info(f" Health → http://localhost:{port}/health/ready")
|
|
1505
|
+
logger.info(f" Admin → http://localhost:{port}/src/admin.html")
|
|
1506
|
+
logger.info(f" Gateway → {os.getenv('CLAWDBOT_GATEWAY_URL', 'ws://127.0.0.1:18791')}")
|
|
1507
|
+
logger.info(f" Tested OpenClaw version: {_oc_ver}")
|
|
1508
|
+
|
|
1509
|
+
host = os.getenv("HOST", "127.0.0.1") # Docker sets HOST=0.0.0.0; VPS stays loopback
|
|
1510
|
+
app.run(host=host, port=port, debug=False, threaded=True)
|