openvoiceui 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +104 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +638 -0
- package/SETUP.md +360 -0
- package/app.py +232 -0
- package/auto-approve-devices.js +111 -0
- package/cli/index.js +372 -0
- package/config/__init__.py +4 -0
- package/config/default.yaml +43 -0
- package/config/flags.yaml +67 -0
- package/config/loader.py +203 -0
- package/config/providers.yaml +71 -0
- package/config/speech_normalization.yaml +182 -0
- package/config/theme.json +4 -0
- package/data/greetings.json +25 -0
- package/default-pages/ai-image-creator.html +915 -0
- package/default-pages/bulk-image-uploader.html +492 -0
- package/default-pages/desktop.html +2865 -0
- package/default-pages/file-explorer.html +854 -0
- package/default-pages/interactive-map.html +655 -0
- package/default-pages/style-guide.html +1005 -0
- package/default-pages/website-setup.html +1623 -0
- package/deploy/openclaw/Dockerfile +46 -0
- package/deploy/openvoiceui.service +30 -0
- package/deploy/setup-nginx.sh +50 -0
- package/deploy/setup-sudo.sh +306 -0
- package/deploy/skill-runner/Dockerfile +19 -0
- package/deploy/skill-runner/requirements.txt +14 -0
- package/deploy/skill-runner/server.py +269 -0
- package/deploy/supertonic/Dockerfile +22 -0
- package/deploy/supertonic/server.py +79 -0
- package/docker-compose.pinokio.yml +11 -0
- package/docker-compose.yml +59 -0
- package/greetings.json +25 -0
- package/index.html +65 -0
- package/inject-device-identity.js +142 -0
- package/package.json +82 -0
- package/profiles/default.json +114 -0
- package/profiles/manager.py +354 -0
- package/profiles/schema.json +337 -0
- package/prompts/voice-system-prompt.md +149 -0
- package/providers/__init__.py +39 -0
- package/providers/base.py +63 -0
- package/providers/llm/__init__.py +12 -0
- package/providers/llm/base.py +71 -0
- package/providers/llm/clawdbot_provider.py +112 -0
- package/providers/llm/zai_provider.py +115 -0
- package/providers/registry.py +320 -0
- package/providers/stt/__init__.py +12 -0
- package/providers/stt/base.py +58 -0
- package/providers/stt/webspeech_provider.py +49 -0
- package/providers/stt/whisper_provider.py +100 -0
- package/providers/tts/__init__.py +20 -0
- package/providers/tts/base.py +91 -0
- package/providers/tts/groq_provider.py +74 -0
- package/providers/tts/supertonic_provider.py +72 -0
- package/requirements.txt +38 -0
- package/routes/__init__.py +10 -0
- package/routes/admin.py +515 -0
- package/routes/canvas.py +1315 -0
- package/routes/chat.py +51 -0
- package/routes/conversation.py +2158 -0
- package/routes/elevenlabs_hybrid.py +306 -0
- package/routes/greetings.py +98 -0
- package/routes/icons.py +279 -0
- package/routes/image_gen.py +364 -0
- package/routes/instructions.py +190 -0
- package/routes/music.py +838 -0
- package/routes/onboarding.py +43 -0
- package/routes/pi.py +62 -0
- package/routes/profiles.py +215 -0
- package/routes/report_issue.py +68 -0
- package/routes/static_files.py +533 -0
- package/routes/suno.py +664 -0
- package/routes/theme.py +81 -0
- package/routes/transcripts.py +199 -0
- package/routes/vision.py +348 -0
- package/routes/workspace.py +288 -0
- package/server.py +1510 -0
- package/services/__init__.py +1 -0
- package/services/auth.py +143 -0
- package/services/canvas_versioning.py +239 -0
- package/services/db_pool.py +107 -0
- package/services/gateway.py +16 -0
- package/services/gateway_manager.py +333 -0
- package/services/gateways/__init__.py +12 -0
- package/services/gateways/base.py +110 -0
- package/services/gateways/compat.py +264 -0
- package/services/gateways/openclaw.py +1134 -0
- package/services/health.py +100 -0
- package/services/memory_client.py +455 -0
- package/services/paths.py +26 -0
- package/services/speech_normalizer.py +285 -0
- package/services/tts.py +270 -0
- package/setup-config.js +262 -0
- package/sounds/air_horn.mp3 +0 -0
- package/sounds/bruh.mp3 +0 -0
- package/sounds/crowd_cheer.mp3 +0 -0
- package/sounds/gunshot.mp3 +0 -0
- package/sounds/impact.mp3 +0 -0
- package/sounds/lets_go.mp3 +0 -0
- package/sounds/record_stop.mp3 +0 -0
- package/sounds/rewind.mp3 +0 -0
- package/sounds/sad_trombone.mp3 +0 -0
- package/sounds/scratch_long.mp3 +0 -0
- package/sounds/yeah.mp3 +0 -0
- package/src/adapters/ClawdBotAdapter.js +264 -0
- package/src/adapters/_template.js +133 -0
- package/src/adapters/elevenlabs-classic.js +841 -0
- package/src/adapters/elevenlabs-hybrid.js +812 -0
- package/src/adapters/hume-evi.js +676 -0
- package/src/admin.html +1339 -0
- package/src/app.js +8802 -0
- package/src/core/Config.js +173 -0
- package/src/core/EmotionEngine.js +307 -0
- package/src/core/EventBridge.js +180 -0
- package/src/core/EventBus.js +117 -0
- package/src/core/VoiceSession.js +607 -0
- package/src/face/BaseFace.js +259 -0
- package/src/face/EyeFace.js +208 -0
- package/src/face/HaloSmokeFace.js +509 -0
- package/src/face/manifest.json +27 -0
- package/src/face/previews/eyes.svg +16 -0
- package/src/face/previews/orb.svg +29 -0
- package/src/features/MusicPlayer.js +620 -0
- package/src/features/Soundboard.js +128 -0
- package/src/providers/DeepgramSTT.js +472 -0
- package/src/providers/DeepgramStreamingSTT.js +766 -0
- package/src/providers/GroqSTT.js +559 -0
- package/src/providers/TTSPlayer.js +323 -0
- package/src/providers/WebSpeechSTT.js +479 -0
- package/src/providers/tts/BaseTTSProvider.js +81 -0
- package/src/providers/tts/HumeProvider.js +77 -0
- package/src/providers/tts/SupertonicProvider.js +174 -0
- package/src/providers/tts/index.js +140 -0
- package/src/shell/adapter-registry.js +154 -0
- package/src/shell/caller-bridge.js +35 -0
- package/src/shell/camera-bridge.js +28 -0
- package/src/shell/canvas-bridge.js +32 -0
- package/src/shell/commercial-bridge.js +44 -0
- package/src/shell/face-bridge.js +44 -0
- package/src/shell/music-bridge.js +60 -0
- package/src/shell/orchestrator.js +233 -0
- package/src/shell/profile-discovery.js +303 -0
- package/src/shell/sounds-bridge.js +28 -0
- package/src/shell/transcript-bridge.js +61 -0
- package/src/shell/waveform-bridge.js +33 -0
- package/src/styles/base.css +2862 -0
- package/src/styles/face.css +417 -0
- package/src/styles/pi-overrides.css +89 -0
- package/src/styles/theme-dark.css +67 -0
- package/src/test-tts.html +175 -0
- package/src/ui/AppShell.js +544 -0
- package/src/ui/ProfileSwitcher.js +228 -0
- package/src/ui/SessionControl.js +240 -0
- package/src/ui/face/FacePicker.js +195 -0
- package/src/ui/face/FaceRenderer.js +309 -0
- package/src/ui/settings/PlaylistEditor.js +366 -0
- package/src/ui/settings/SettingsPanel.css +684 -0
- package/src/ui/settings/SettingsPanel.js +419 -0
- package/src/ui/settings/TTSVoicePreview.js +210 -0
- package/src/ui/themes/ThemeManager.js +213 -0
- package/src/ui/visualizers/BaseVisualizer.js +29 -0
- package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
- package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
- package/static/emulators/jsdos/js-dos.css +1 -0
- package/static/emulators/jsdos/js-dos.js +22 -0
- package/static/favicon.svg +55 -0
- package/static/icons/apple-touch-icon.png +0 -0
- package/static/icons/favicon-32.png +0 -0
- package/static/icons/icon-192.png +0 -0
- package/static/icons/icon-512.png +0 -0
- package/static/install.html +449 -0
- package/static/manifest.json +26 -0
- package/static/sw.js +21 -0
- package/tts_providers/__init__.py +136 -0
- package/tts_providers/base_provider.py +319 -0
- package/tts_providers/groq_provider.py +155 -0
- package/tts_providers/hume_provider.py +226 -0
- package/tts_providers/providers_config.json +119 -0
- package/tts_providers/qwen3_provider.py +371 -0
- package/tts_providers/resemble_provider.py +315 -0
- package/tts_providers/supertonic_provider.py +557 -0
- package/tts_providers/supertonic_tts.py +399 -0
package/routes/theme.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
routes/theme.py — Theme API Blueprint (P4-T4)
|
|
3
|
+
|
|
4
|
+
Provides server-side persistence for user theme preferences.
|
|
5
|
+
|
|
6
|
+
GET /api/theme — return current saved theme colors
|
|
7
|
+
POST /api/theme — save theme colors (primary + accent)
|
|
8
|
+
POST /api/theme/reset — reset to default theme
|
|
9
|
+
|
|
10
|
+
Theme is stored in config/theme.json
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from flask import Blueprint, jsonify, request
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
theme_bp = Blueprint('theme', __name__)
|
|
22
|
+
|
|
23
|
+
_PROJECT_ROOT = Path(__file__).parent.parent
|
|
24
|
+
THEME_FILE = _PROJECT_ROOT / 'config' / 'theme.json'
|
|
25
|
+
|
|
26
|
+
DEFAULT_THEME = {
|
|
27
|
+
'primary': '#0088ff',
|
|
28
|
+
'accent': '#00ffff',
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _load_theme():
|
|
33
|
+
"""Load theme from file, returning defaults if not found."""
|
|
34
|
+
try:
|
|
35
|
+
if THEME_FILE.exists():
|
|
36
|
+
data = json.loads(THEME_FILE.read_text())
|
|
37
|
+
# Validate hex color format
|
|
38
|
+
if _valid_hex(data.get('primary')) and _valid_hex(data.get('accent')):
|
|
39
|
+
return data
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.warning('Failed to load theme: %s', e)
|
|
42
|
+
return dict(DEFAULT_THEME)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _save_theme(theme):
|
|
46
|
+
"""Save theme to file."""
|
|
47
|
+
THEME_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
THEME_FILE.write_text(json.dumps(theme, indent=2))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _valid_hex(color):
|
|
52
|
+
"""Return True if color is a valid #rrggbb hex string."""
|
|
53
|
+
if not isinstance(color, str):
|
|
54
|
+
return False
|
|
55
|
+
import re
|
|
56
|
+
return bool(re.fullmatch(r'#[0-9a-fA-F]{6}', color))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@theme_bp.get('/api/theme')
|
|
60
|
+
def get_theme():
|
|
61
|
+
return jsonify(_load_theme())
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@theme_bp.post('/api/theme')
|
|
65
|
+
def set_theme():
|
|
66
|
+
data = request.get_json(silent=True) or {}
|
|
67
|
+
primary = data.get('primary', '')
|
|
68
|
+
accent = data.get('accent', '')
|
|
69
|
+
|
|
70
|
+
if not _valid_hex(primary) or not _valid_hex(accent):
|
|
71
|
+
return jsonify({'error': 'Invalid color format. Use #rrggbb hex values.'}), 400
|
|
72
|
+
|
|
73
|
+
theme = {'primary': primary, 'accent': accent}
|
|
74
|
+
_save_theme(theme)
|
|
75
|
+
return jsonify(theme)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@theme_bp.post('/api/theme/reset')
|
|
79
|
+
def reset_theme():
|
|
80
|
+
_save_theme(dict(DEFAULT_THEME))
|
|
81
|
+
return jsonify(DEFAULT_THEME)
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Transcript storage — saves listen-mode transcriptions to disk.
|
|
3
|
+
|
|
4
|
+
Files are organized as:
|
|
5
|
+
transcripts/
|
|
6
|
+
YYYY-MM-DD/
|
|
7
|
+
HH-MM-SS_<slug>.txt
|
|
8
|
+
|
|
9
|
+
POST /api/transcripts/save — save a transcript
|
|
10
|
+
GET /api/transcripts — list saved transcripts (newest first)
|
|
11
|
+
GET /api/transcripts/<date>/<filename> — read one transcript
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
import json
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from flask import Blueprint, jsonify, request
|
|
20
|
+
|
|
21
|
+
transcripts_bp = Blueprint('transcripts', __name__)
|
|
22
|
+
|
|
23
|
+
from services.paths import TRANSCRIPTS_DIR as _TRANSCRIPTS_DIR_PATH
|
|
24
|
+
|
|
25
|
+
TRANSCRIPTS_DIR = str(_TRANSCRIPTS_DIR_PATH)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _slug(title: str) -> str:
|
|
29
|
+
"""Turn a title into a safe filename slug."""
|
|
30
|
+
s = title.strip().lower()
|
|
31
|
+
s = re.sub(r'[^\w\s-]', '', s)
|
|
32
|
+
s = re.sub(r'[\s_]+', '-', s)
|
|
33
|
+
s = s.strip('-')
|
|
34
|
+
return s[:60] or 'untitled'
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@transcripts_bp.route('/api/transcripts/save', methods=['POST'])
|
|
38
|
+
def save_transcript():
|
|
39
|
+
data = request.get_json(force=True, silent=True) or {}
|
|
40
|
+
title = (data.get('title') or 'Untitled').strip()
|
|
41
|
+
text = (data.get('text') or '').strip()
|
|
42
|
+
|
|
43
|
+
if not text:
|
|
44
|
+
return jsonify({'error': 'No transcript text provided'}), 400
|
|
45
|
+
|
|
46
|
+
now = datetime.now()
|
|
47
|
+
date_dir = now.strftime('%Y-%m-%d')
|
|
48
|
+
time_part = now.strftime('%H-%M-%S')
|
|
49
|
+
slug = _slug(title)
|
|
50
|
+
filename = f'{time_part}_{slug}.txt'
|
|
51
|
+
|
|
52
|
+
save_dir = os.path.join(TRANSCRIPTS_DIR, date_dir)
|
|
53
|
+
os.makedirs(save_dir, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
filepath = os.path.join(save_dir, filename)
|
|
56
|
+
|
|
57
|
+
word_count = len(text.split())
|
|
58
|
+
content = (
|
|
59
|
+
f'Title: {title}\n'
|
|
60
|
+
f'Date: {now.strftime("%Y-%m-%d %H:%M:%S")}\n'
|
|
61
|
+
f'Words: {word_count}\n'
|
|
62
|
+
f'\n---\n\n'
|
|
63
|
+
f'{text}\n'
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
67
|
+
f.write(content)
|
|
68
|
+
|
|
69
|
+
return jsonify({
|
|
70
|
+
'saved': True,
|
|
71
|
+
'path': f'transcripts/{date_dir}/{filename}',
|
|
72
|
+
'date': date_dir,
|
|
73
|
+
'filename': filename,
|
|
74
|
+
'words': word_count,
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@transcripts_bp.route('/api/transcripts', methods=['GET'])
|
|
79
|
+
def list_transcripts():
|
|
80
|
+
entries = []
|
|
81
|
+
if not os.path.isdir(TRANSCRIPTS_DIR):
|
|
82
|
+
return jsonify([])
|
|
83
|
+
|
|
84
|
+
for date_dir in sorted(os.listdir(TRANSCRIPTS_DIR), reverse=True):
|
|
85
|
+
day_path = os.path.join(TRANSCRIPTS_DIR, date_dir)
|
|
86
|
+
if not os.path.isdir(day_path):
|
|
87
|
+
continue
|
|
88
|
+
for fname in sorted(os.listdir(day_path), reverse=True):
|
|
89
|
+
if not fname.endswith('.txt'):
|
|
90
|
+
continue
|
|
91
|
+
fpath = os.path.join(day_path, fname)
|
|
92
|
+
# Read first few lines for metadata
|
|
93
|
+
meta = {'title': fname, 'date': date_dir, 'filename': fname, 'words': 0}
|
|
94
|
+
try:
|
|
95
|
+
with open(fpath, 'r', encoding='utf-8') as f:
|
|
96
|
+
for line in f:
|
|
97
|
+
if line.startswith('Title:'):
|
|
98
|
+
meta['title'] = line[6:].strip()
|
|
99
|
+
elif line.startswith('Date:'):
|
|
100
|
+
meta['timestamp'] = line[5:].strip()
|
|
101
|
+
elif line.startswith('Words:'):
|
|
102
|
+
meta['words'] = int(line[6:].strip())
|
|
103
|
+
elif line.strip() == '---':
|
|
104
|
+
break
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
entries.append(meta)
|
|
108
|
+
|
|
109
|
+
return jsonify(entries)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@transcripts_bp.route('/api/transcripts/<date_dir>/<filename>', methods=['GET'])
|
|
113
|
+
def get_transcript(date_dir, filename):
|
|
114
|
+
# Resolve and verify path stays within TRANSCRIPTS_DIR
|
|
115
|
+
base = Path(TRANSCRIPTS_DIR).resolve()
|
|
116
|
+
try:
|
|
117
|
+
resolved = (base / date_dir / filename).resolve()
|
|
118
|
+
except (ValueError, OSError):
|
|
119
|
+
return jsonify({'error': 'Invalid path'}), 400
|
|
120
|
+
if base not in resolved.parents and resolved != base:
|
|
121
|
+
return jsonify({'error': 'Invalid path'}), 400
|
|
122
|
+
if not resolved.is_file():
|
|
123
|
+
return jsonify({'error': 'Not found'}), 404
|
|
124
|
+
return resolved.read_text(encoding='utf-8'), 200, {'Content-Type': 'text/plain; charset=utf-8'}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
import logging as _transcript_logger
|
|
128
|
+
|
|
129
|
+
def save_conversation_turn(
|
|
130
|
+
user_msg: str,
|
|
131
|
+
ai_response: str,
|
|
132
|
+
session_id: str = 'default',
|
|
133
|
+
session_key: str = None,
|
|
134
|
+
tts_provider: str = None,
|
|
135
|
+
voice: str = None,
|
|
136
|
+
duration_ms: int = None,
|
|
137
|
+
actions: list = None,
|
|
138
|
+
identified_person: dict = None,
|
|
139
|
+
) -> 'str | None':
|
|
140
|
+
"""Save one conversation turn as a JSON transcript file.
|
|
141
|
+
|
|
142
|
+
Organized as: transcripts/YYYY-MM-DD/HH-MM-SS_<session_key>_<session_id>.json
|
|
143
|
+
|
|
144
|
+
Returns the relative file path on success, or None on failure.
|
|
145
|
+
Never raises — errors are logged at debug level so callers are never broken.
|
|
146
|
+
"""
|
|
147
|
+
try:
|
|
148
|
+
now = datetime.now()
|
|
149
|
+
date_str = now.strftime('%Y-%m-%d')
|
|
150
|
+
time_str = now.strftime('%H-%M-%S')
|
|
151
|
+
ms_str = f'{now.microsecond // 1000:03d}'
|
|
152
|
+
ts_iso = f'{now.strftime("%Y-%m-%dT%H:%M:%S")}.{ms_str}Z'
|
|
153
|
+
|
|
154
|
+
# Extract brief tool summaries from captured actions (phase=result only)
|
|
155
|
+
tools = []
|
|
156
|
+
if actions:
|
|
157
|
+
for action in actions:
|
|
158
|
+
if action.get('type') == 'tool' and action.get('phase') == 'result':
|
|
159
|
+
name = action.get('name', 'unknown')
|
|
160
|
+
result = action.get('result', '')
|
|
161
|
+
summary = str(result)[:120] if result else ''
|
|
162
|
+
tools.append({'name': name, 'phase': 'result', 'summary': summary})
|
|
163
|
+
|
|
164
|
+
user_words = len(user_msg.split()) if user_msg else 0
|
|
165
|
+
ai_words = len(ai_response.split()) if ai_response else 0
|
|
166
|
+
key = session_key or 'unknown'
|
|
167
|
+
|
|
168
|
+
payload = {
|
|
169
|
+
'schema': 'v1',
|
|
170
|
+
'session_id': session_id,
|
|
171
|
+
'session_key': key,
|
|
172
|
+
'timestamp': ts_iso,
|
|
173
|
+
'date': date_str,
|
|
174
|
+
'time': now.strftime('%H:%M:%S'),
|
|
175
|
+
'tts_provider': tts_provider,
|
|
176
|
+
'voice': voice,
|
|
177
|
+
'duration_ms': duration_ms,
|
|
178
|
+
'user': user_msg,
|
|
179
|
+
'assistant': ai_response,
|
|
180
|
+
'tools': tools,
|
|
181
|
+
'identified_person': identified_person,
|
|
182
|
+
'word_count': {'user': user_words, 'assistant': ai_words},
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
save_dir = os.path.join(TRANSCRIPTS_DIR, date_str)
|
|
186
|
+
os.makedirs(save_dir, exist_ok=True)
|
|
187
|
+
filename = f'{time_str}_{key}_{session_id}.json'
|
|
188
|
+
filepath = os.path.join(save_dir, filename)
|
|
189
|
+
|
|
190
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
191
|
+
json.dump(payload, f, ensure_ascii=False, indent=2)
|
|
192
|
+
|
|
193
|
+
return f'transcripts/{date_str}/{filename}'
|
|
194
|
+
|
|
195
|
+
except Exception as exc:
|
|
196
|
+
_transcript_logger.getLogger(__name__).debug(
|
|
197
|
+
f'save_conversation_turn failed (non-critical): {exc}'
|
|
198
|
+
)
|
|
199
|
+
return None
|
package/routes/vision.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""
|
|
2
|
+
routes/vision.py — Camera / Vision / Facial Recognition Blueprint
|
|
3
|
+
|
|
4
|
+
Endpoints:
|
|
5
|
+
POST /api/vision — analyze camera frame with vision LLM
|
|
6
|
+
POST /api/frame — receive live frame (stored as latest_frame)
|
|
7
|
+
POST /api/identify — identify person from camera frame (DeepFace)
|
|
8
|
+
GET /api/faces — list registered faces
|
|
9
|
+
POST /api/faces/<name> — register a face photo
|
|
10
|
+
DELETE /api/faces/<name> — delete a registered face
|
|
11
|
+
|
|
12
|
+
Face recognition: DeepFace (local, free, runs on-server — no API calls).
|
|
13
|
+
Vision analysis ("look at"): configurable vision LLM (default: glm-4.6v).
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import base64
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import re
|
|
21
|
+
import tempfile
|
|
22
|
+
import threading
|
|
23
|
+
import time
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
import requests
|
|
27
|
+
from flask import Blueprint, jsonify, request
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
vision_bp = Blueprint('vision', __name__)
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# Storage
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
from services.paths import KNOWN_FACES_DIR as FACES_DIR
|
|
38
|
+
|
|
39
|
+
FACES_DIR.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
|
|
41
|
+
# Latest frame received from browser (in-memory, ephemeral)
|
|
42
|
+
_latest_frame: dict = {'image': None, 'ts': 0}
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# DeepFace — lazy load (heavy import, downloads models on first use)
|
|
46
|
+
# Serialize all face recognition calls — concurrent TF/h5py calls crash the process.
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
|
|
49
|
+
_deepface = None
|
|
50
|
+
_deepface_lock = threading.Lock()
|
|
51
|
+
|
|
52
|
+
def _get_deepface():
|
|
53
|
+
global _deepface
|
|
54
|
+
if _deepface is None:
|
|
55
|
+
from deepface import DeepFace
|
|
56
|
+
_deepface = DeepFace
|
|
57
|
+
return _deepface
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _clear_deepface_cache():
|
|
61
|
+
"""Delete DeepFace's cached face index so newly registered/deleted faces are picked up."""
|
|
62
|
+
for pkl in FACES_DIR.glob('*.pkl'):
|
|
63
|
+
try:
|
|
64
|
+
pkl.unlink()
|
|
65
|
+
except OSError:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Vision model config
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
# Known vision-capable models (shown in admin UI dropdown)
|
|
73
|
+
VISION_MODELS = [
|
|
74
|
+
{'id': 'glm-4.6v', 'label': 'GLM-4.6V (128K · Paid)', 'provider': 'zai'},
|
|
75
|
+
{'id': 'glm-4v-plus', 'label': 'GLM-4V Plus (Legacy · Paid)', 'provider': 'zai'},
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
DEFAULT_VISION_MODEL = os.environ.get('VISION_MODEL', 'glm-4.6v')
|
|
79
|
+
DEFAULT_VISION_PROVIDER = 'zai'
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _get_vision_model() -> tuple[str, str]:
|
|
83
|
+
"""Return (model_id, provider) from active profile or env defaults."""
|
|
84
|
+
try:
|
|
85
|
+
from profiles.manager import get_profile_manager
|
|
86
|
+
mgr = get_profile_manager()
|
|
87
|
+
p = mgr.get_active_profile()
|
|
88
|
+
if p:
|
|
89
|
+
d = p.to_dict()
|
|
90
|
+
model = d.get('vision', {}).get('model') or DEFAULT_VISION_MODEL
|
|
91
|
+
provider = d.get('vision', {}).get('provider') or DEFAULT_VISION_PROVIDER
|
|
92
|
+
return model, provider
|
|
93
|
+
except Exception as exc:
|
|
94
|
+
logger.debug('Could not read vision config from profile: %s', exc)
|
|
95
|
+
return DEFAULT_VISION_MODEL, DEFAULT_VISION_PROVIDER
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _call_vision(image_b64: str, prompt: str, model: str | None = None) -> str:
|
|
99
|
+
"""
|
|
100
|
+
Send an image + prompt to the configured vision model and return the text response.
|
|
101
|
+
|
|
102
|
+
Uses Groq's Llama 4 Scout vision model — fast, free, and accurate.
|
|
103
|
+
Z.AI/GLM vision through api.z.ai is broken (returns hallucinated descriptions).
|
|
104
|
+
|
|
105
|
+
image_b64 may be a raw base64 string or a data-URI (data:image/jpeg;base64,...).
|
|
106
|
+
"""
|
|
107
|
+
# Strip data-URI prefix if present
|
|
108
|
+
if image_b64.startswith('data:'):
|
|
109
|
+
image_b64 = image_b64.split(',', 1)[1]
|
|
110
|
+
|
|
111
|
+
api_key = os.environ.get('GROQ_API_KEY', '')
|
|
112
|
+
if not api_key:
|
|
113
|
+
raise ValueError('GROQ_API_KEY is not set — cannot call vision model')
|
|
114
|
+
|
|
115
|
+
vision_model = 'meta-llama/llama-4-scout-17b-16e-instruct'
|
|
116
|
+
|
|
117
|
+
payload = {
|
|
118
|
+
'model': vision_model,
|
|
119
|
+
'messages': [{
|
|
120
|
+
'role': 'user',
|
|
121
|
+
'content': [
|
|
122
|
+
{'type': 'image_url',
|
|
123
|
+
'image_url': {'url': f'data:image/png;base64,{image_b64}'}},
|
|
124
|
+
{'type': 'text', 'text': prompt},
|
|
125
|
+
],
|
|
126
|
+
}],
|
|
127
|
+
'max_tokens': 600,
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
resp = requests.post(
|
|
131
|
+
'https://api.groq.com/openai/v1/chat/completions',
|
|
132
|
+
headers={
|
|
133
|
+
'Authorization': f'Bearer {api_key}',
|
|
134
|
+
'Content-Type': 'application/json',
|
|
135
|
+
},
|
|
136
|
+
json=payload,
|
|
137
|
+
timeout=30,
|
|
138
|
+
)
|
|
139
|
+
resp.raise_for_status()
|
|
140
|
+
return resp.json()['choices'][0]['message']['content'].strip()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ---------------------------------------------------------------------------
|
|
144
|
+
# POST /api/vision — agent "look at" tool
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
@vision_bp.route('/api/vision', methods=['POST'])
|
|
148
|
+
def vision_analyze():
|
|
149
|
+
"""Analyze a camera frame with the configured vision model."""
|
|
150
|
+
data = request.get_json(silent=True) or {}
|
|
151
|
+
image = data.get('image', '')
|
|
152
|
+
prompt = data.get('prompt', 'Describe what you see in this image in detail.')
|
|
153
|
+
model = data.get('model') # optional override
|
|
154
|
+
|
|
155
|
+
if not image:
|
|
156
|
+
return jsonify({'error': 'No image provided'}), 400
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
description = _call_vision(image, prompt, model)
|
|
160
|
+
return jsonify({'description': description, 'model': model or _get_vision_model()[0]})
|
|
161
|
+
except Exception as exc:
|
|
162
|
+
logger.error('Vision analysis failed: %s', exc)
|
|
163
|
+
return jsonify({'error': 'Internal server error'}), 500
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
# POST /api/frame — receive live frame stream from browser
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
_FRAME_MAX_BYTES = 5 * 1024 * 1024 # 5 MB max per frame
|
|
171
|
+
|
|
172
|
+
@vision_bp.route('/api/frame', methods=['POST'])
|
|
173
|
+
def receive_frame():
|
|
174
|
+
"""Store the latest camera frame in memory for use by other endpoints."""
|
|
175
|
+
if request.content_length and request.content_length > _FRAME_MAX_BYTES:
|
|
176
|
+
return jsonify({'ok': False, 'error': 'Frame too large'}), 413
|
|
177
|
+
data = request.get_json(silent=True) or {}
|
|
178
|
+
image = data.get('image', '')
|
|
179
|
+
if image:
|
|
180
|
+
if len(image) > _FRAME_MAX_BYTES:
|
|
181
|
+
return jsonify({'ok': False, 'error': 'Frame too large'}), 413
|
|
182
|
+
_latest_frame['image'] = image
|
|
183
|
+
_latest_frame['ts'] = time.time()
|
|
184
|
+
return jsonify({'ok': True})
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ---------------------------------------------------------------------------
|
|
188
|
+
# POST /api/identify — facial recognition
|
|
189
|
+
# ---------------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
@vision_bp.route('/api/identify', methods=['POST'])
|
|
192
|
+
def identify_face():
|
|
193
|
+
"""
|
|
194
|
+
Identify who is in the camera frame using DeepFace (local, free, no API calls).
|
|
195
|
+
|
|
196
|
+
Uses the SFace model — fast on CPU, ~100ms after first load.
|
|
197
|
+
Face database: known_faces/<PersonName>/*.jpg
|
|
198
|
+
"""
|
|
199
|
+
data = request.get_json(silent=True) or {}
|
|
200
|
+
image = data.get('image', '')
|
|
201
|
+
if not image:
|
|
202
|
+
image = _latest_frame.get('image', '')
|
|
203
|
+
if not image:
|
|
204
|
+
return jsonify({'name': 'unknown', 'confidence': 0, 'message': 'No image'}), 200
|
|
205
|
+
|
|
206
|
+
# Check if any faces are registered
|
|
207
|
+
known_people = [d.name for d in FACES_DIR.iterdir()
|
|
208
|
+
if d.is_dir() and any(d.iterdir())]
|
|
209
|
+
if not known_people:
|
|
210
|
+
return jsonify({'name': 'unknown', 'confidence': 0,
|
|
211
|
+
'message': 'No faces registered yet'}), 200
|
|
212
|
+
|
|
213
|
+
# Decode and save to temp file (DeepFace needs a file path)
|
|
214
|
+
image_data = image
|
|
215
|
+
if ',' in image_data:
|
|
216
|
+
image_data = image_data.split(',', 1)[1]
|
|
217
|
+
image_bytes = base64.b64decode(image_data)
|
|
218
|
+
|
|
219
|
+
tmp_path = None
|
|
220
|
+
try:
|
|
221
|
+
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
|
|
222
|
+
tmp.write(image_bytes)
|
|
223
|
+
tmp_path = tmp.name
|
|
224
|
+
|
|
225
|
+
DeepFace = _get_deepface()
|
|
226
|
+
with _deepface_lock:
|
|
227
|
+
results = DeepFace.find(
|
|
228
|
+
img_path=tmp_path,
|
|
229
|
+
db_path=str(FACES_DIR),
|
|
230
|
+
model_name='SFace',
|
|
231
|
+
enforce_detection=False,
|
|
232
|
+
silent=True,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if results and len(results) > 0 and len(results[0]) > 0:
|
|
236
|
+
df = results[0]
|
|
237
|
+
best = df.iloc[0]
|
|
238
|
+
identity_path = best['identity']
|
|
239
|
+
distance = float(best['distance'])
|
|
240
|
+
person_name = Path(identity_path).parent.name
|
|
241
|
+
|
|
242
|
+
# SFace cosine distance threshold ~0.5; convert to confidence %
|
|
243
|
+
confidence = max(0, round((1 - distance / 0.7) * 100, 1))
|
|
244
|
+
|
|
245
|
+
if distance < 0.5:
|
|
246
|
+
return jsonify({'name': person_name, 'confidence': confidence})
|
|
247
|
+
else:
|
|
248
|
+
return jsonify({'name': 'unknown', 'confidence': confidence,
|
|
249
|
+
'message': 'Face detected but not recognized'})
|
|
250
|
+
else:
|
|
251
|
+
return jsonify({'name': 'unknown', 'confidence': 0,
|
|
252
|
+
'message': 'No face detected in frame'})
|
|
253
|
+
|
|
254
|
+
except Exception as exc:
|
|
255
|
+
logger.error('Face identification failed: %s', exc)
|
|
256
|
+
return jsonify({'name': 'unknown', 'confidence': 0, 'message': 'Face identification failed'}), 200
|
|
257
|
+
finally:
|
|
258
|
+
if tmp_path:
|
|
259
|
+
try:
|
|
260
|
+
os.unlink(tmp_path)
|
|
261
|
+
except OSError:
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# ---------------------------------------------------------------------------
|
|
266
|
+
# GET /api/faces — list registered faces
|
|
267
|
+
# ---------------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
def _list_faces_data():
|
|
270
|
+
entries = []
|
|
271
|
+
for face_dir in sorted(FACES_DIR.iterdir()):
|
|
272
|
+
if not face_dir.is_dir():
|
|
273
|
+
continue
|
|
274
|
+
photos = list(face_dir.glob('*.jpg')) + list(face_dir.glob('*.jpeg')) + \
|
|
275
|
+
list(face_dir.glob('*.png'))
|
|
276
|
+
entries.append({'name': face_dir.name, 'photo_count': len(photos)})
|
|
277
|
+
return entries
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
@vision_bp.route('/api/faces', methods=['GET'])
|
|
281
|
+
def list_faces():
|
|
282
|
+
return jsonify({'faces': _list_faces_data()})
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# ---------------------------------------------------------------------------
|
|
286
|
+
# POST /api/faces/<name> — register a face photo
|
|
287
|
+
# ---------------------------------------------------------------------------
|
|
288
|
+
|
|
289
|
+
@vision_bp.route('/api/faces/<name>', methods=['POST'])
|
|
290
|
+
def register_face(name):
|
|
291
|
+
"""Save a face photo for a named person."""
|
|
292
|
+
# Sanitize name
|
|
293
|
+
safe_name = re.sub(r'[^a-zA-Z0-9_\- ]', '', name).strip()
|
|
294
|
+
if not safe_name:
|
|
295
|
+
return jsonify({'error': 'Invalid name'}), 400
|
|
296
|
+
|
|
297
|
+
data = request.get_json(silent=True) or {}
|
|
298
|
+
image_data = data.get('image', '')
|
|
299
|
+
if not image_data:
|
|
300
|
+
return jsonify({'error': 'No image provided'}), 400
|
|
301
|
+
|
|
302
|
+
face_dir = FACES_DIR / safe_name
|
|
303
|
+
face_dir.mkdir(exist_ok=True)
|
|
304
|
+
|
|
305
|
+
# Strip data-URI prefix
|
|
306
|
+
if image_data.startswith('data:'):
|
|
307
|
+
image_data = image_data.split(',', 1)[1]
|
|
308
|
+
|
|
309
|
+
# Save with incrementing filename
|
|
310
|
+
idx = len(list(face_dir.glob('*.jpg'))) + 1
|
|
311
|
+
out_path = face_dir / f'photo_{idx:03d}.jpg'
|
|
312
|
+
out_path.write_bytes(base64.b64decode(image_data))
|
|
313
|
+
|
|
314
|
+
logger.info('Registered face photo: %s (%s)', safe_name, out_path.name)
|
|
315
|
+
|
|
316
|
+
# Clear DeepFace's cached index so the new face is picked up immediately
|
|
317
|
+
with _deepface_lock:
|
|
318
|
+
_clear_deepface_cache()
|
|
319
|
+
|
|
320
|
+
return jsonify({'ok': True, 'name': safe_name, 'file': out_path.name})
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# ---------------------------------------------------------------------------
|
|
324
|
+
# DELETE /api/faces/<name> — remove a registered face
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
@vision_bp.route('/api/faces/<name>', methods=['DELETE'])
|
|
328
|
+
def delete_face(name):
|
|
329
|
+
safe_name = re.sub(r'[^a-zA-Z0-9_\- ]', '', name).strip()
|
|
330
|
+
face_dir = FACES_DIR / safe_name
|
|
331
|
+
if not face_dir.exists():
|
|
332
|
+
return jsonify({'error': 'Face not found'}), 404
|
|
333
|
+
|
|
334
|
+
import shutil
|
|
335
|
+
shutil.rmtree(face_dir)
|
|
336
|
+
with _deepface_lock:
|
|
337
|
+
_clear_deepface_cache()
|
|
338
|
+
return jsonify({'ok': True, 'deleted': safe_name})
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
# ---------------------------------------------------------------------------
|
|
342
|
+
# GET /api/vision/models — list available vision models (for admin UI)
|
|
343
|
+
# ---------------------------------------------------------------------------
|
|
344
|
+
|
|
345
|
+
@vision_bp.route('/api/vision/models', methods=['GET'])
|
|
346
|
+
def list_vision_models():
|
|
347
|
+
active_model, _ = _get_vision_model()
|
|
348
|
+
return jsonify({'models': VISION_MODELS, 'active': active_model})
|