openvoiceui 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/.env.example +104 -0
  2. package/Dockerfile +30 -0
  3. package/LICENSE +21 -0
  4. package/README.md +638 -0
  5. package/SETUP.md +360 -0
  6. package/app.py +232 -0
  7. package/auto-approve-devices.js +111 -0
  8. package/cli/index.js +372 -0
  9. package/config/__init__.py +4 -0
  10. package/config/default.yaml +43 -0
  11. package/config/flags.yaml +67 -0
  12. package/config/loader.py +203 -0
  13. package/config/providers.yaml +71 -0
  14. package/config/speech_normalization.yaml +182 -0
  15. package/config/theme.json +4 -0
  16. package/data/greetings.json +25 -0
  17. package/default-pages/ai-image-creator.html +915 -0
  18. package/default-pages/bulk-image-uploader.html +492 -0
  19. package/default-pages/desktop.html +2865 -0
  20. package/default-pages/file-explorer.html +854 -0
  21. package/default-pages/interactive-map.html +655 -0
  22. package/default-pages/style-guide.html +1005 -0
  23. package/default-pages/website-setup.html +1623 -0
  24. package/deploy/openclaw/Dockerfile +46 -0
  25. package/deploy/openvoiceui.service +30 -0
  26. package/deploy/setup-nginx.sh +50 -0
  27. package/deploy/setup-sudo.sh +306 -0
  28. package/deploy/skill-runner/Dockerfile +19 -0
  29. package/deploy/skill-runner/requirements.txt +14 -0
  30. package/deploy/skill-runner/server.py +269 -0
  31. package/deploy/supertonic/Dockerfile +22 -0
  32. package/deploy/supertonic/server.py +79 -0
  33. package/docker-compose.pinokio.yml +11 -0
  34. package/docker-compose.yml +59 -0
  35. package/greetings.json +25 -0
  36. package/index.html +65 -0
  37. package/inject-device-identity.js +142 -0
  38. package/package.json +82 -0
  39. package/profiles/default.json +114 -0
  40. package/profiles/manager.py +354 -0
  41. package/profiles/schema.json +337 -0
  42. package/prompts/voice-system-prompt.md +149 -0
  43. package/providers/__init__.py +39 -0
  44. package/providers/base.py +63 -0
  45. package/providers/llm/__init__.py +12 -0
  46. package/providers/llm/base.py +71 -0
  47. package/providers/llm/clawdbot_provider.py +112 -0
  48. package/providers/llm/zai_provider.py +115 -0
  49. package/providers/registry.py +320 -0
  50. package/providers/stt/__init__.py +12 -0
  51. package/providers/stt/base.py +58 -0
  52. package/providers/stt/webspeech_provider.py +49 -0
  53. package/providers/stt/whisper_provider.py +100 -0
  54. package/providers/tts/__init__.py +20 -0
  55. package/providers/tts/base.py +91 -0
  56. package/providers/tts/groq_provider.py +74 -0
  57. package/providers/tts/supertonic_provider.py +72 -0
  58. package/requirements.txt +38 -0
  59. package/routes/__init__.py +10 -0
  60. package/routes/admin.py +515 -0
  61. package/routes/canvas.py +1315 -0
  62. package/routes/chat.py +51 -0
  63. package/routes/conversation.py +2158 -0
  64. package/routes/elevenlabs_hybrid.py +306 -0
  65. package/routes/greetings.py +98 -0
  66. package/routes/icons.py +279 -0
  67. package/routes/image_gen.py +364 -0
  68. package/routes/instructions.py +190 -0
  69. package/routes/music.py +838 -0
  70. package/routes/onboarding.py +43 -0
  71. package/routes/pi.py +62 -0
  72. package/routes/profiles.py +215 -0
  73. package/routes/report_issue.py +68 -0
  74. package/routes/static_files.py +533 -0
  75. package/routes/suno.py +664 -0
  76. package/routes/theme.py +81 -0
  77. package/routes/transcripts.py +199 -0
  78. package/routes/vision.py +348 -0
  79. package/routes/workspace.py +288 -0
  80. package/server.py +1510 -0
  81. package/services/__init__.py +1 -0
  82. package/services/auth.py +143 -0
  83. package/services/canvas_versioning.py +239 -0
  84. package/services/db_pool.py +107 -0
  85. package/services/gateway.py +16 -0
  86. package/services/gateway_manager.py +333 -0
  87. package/services/gateways/__init__.py +12 -0
  88. package/services/gateways/base.py +110 -0
  89. package/services/gateways/compat.py +264 -0
  90. package/services/gateways/openclaw.py +1134 -0
  91. package/services/health.py +100 -0
  92. package/services/memory_client.py +455 -0
  93. package/services/paths.py +26 -0
  94. package/services/speech_normalizer.py +285 -0
  95. package/services/tts.py +270 -0
  96. package/setup-config.js +262 -0
  97. package/sounds/air_horn.mp3 +0 -0
  98. package/sounds/bruh.mp3 +0 -0
  99. package/sounds/crowd_cheer.mp3 +0 -0
  100. package/sounds/gunshot.mp3 +0 -0
  101. package/sounds/impact.mp3 +0 -0
  102. package/sounds/lets_go.mp3 +0 -0
  103. package/sounds/record_stop.mp3 +0 -0
  104. package/sounds/rewind.mp3 +0 -0
  105. package/sounds/sad_trombone.mp3 +0 -0
  106. package/sounds/scratch_long.mp3 +0 -0
  107. package/sounds/yeah.mp3 +0 -0
  108. package/src/adapters/ClawdBotAdapter.js +264 -0
  109. package/src/adapters/_template.js +133 -0
  110. package/src/adapters/elevenlabs-classic.js +841 -0
  111. package/src/adapters/elevenlabs-hybrid.js +812 -0
  112. package/src/adapters/hume-evi.js +676 -0
  113. package/src/admin.html +1339 -0
  114. package/src/app.js +8802 -0
  115. package/src/core/Config.js +173 -0
  116. package/src/core/EmotionEngine.js +307 -0
  117. package/src/core/EventBridge.js +180 -0
  118. package/src/core/EventBus.js +117 -0
  119. package/src/core/VoiceSession.js +607 -0
  120. package/src/face/BaseFace.js +259 -0
  121. package/src/face/EyeFace.js +208 -0
  122. package/src/face/HaloSmokeFace.js +509 -0
  123. package/src/face/manifest.json +27 -0
  124. package/src/face/previews/eyes.svg +16 -0
  125. package/src/face/previews/orb.svg +29 -0
  126. package/src/features/MusicPlayer.js +620 -0
  127. package/src/features/Soundboard.js +128 -0
  128. package/src/providers/DeepgramSTT.js +472 -0
  129. package/src/providers/DeepgramStreamingSTT.js +766 -0
  130. package/src/providers/GroqSTT.js +559 -0
  131. package/src/providers/TTSPlayer.js +323 -0
  132. package/src/providers/WebSpeechSTT.js +479 -0
  133. package/src/providers/tts/BaseTTSProvider.js +81 -0
  134. package/src/providers/tts/HumeProvider.js +77 -0
  135. package/src/providers/tts/SupertonicProvider.js +174 -0
  136. package/src/providers/tts/index.js +140 -0
  137. package/src/shell/adapter-registry.js +154 -0
  138. package/src/shell/caller-bridge.js +35 -0
  139. package/src/shell/camera-bridge.js +28 -0
  140. package/src/shell/canvas-bridge.js +32 -0
  141. package/src/shell/commercial-bridge.js +44 -0
  142. package/src/shell/face-bridge.js +44 -0
  143. package/src/shell/music-bridge.js +60 -0
  144. package/src/shell/orchestrator.js +233 -0
  145. package/src/shell/profile-discovery.js +303 -0
  146. package/src/shell/sounds-bridge.js +28 -0
  147. package/src/shell/transcript-bridge.js +61 -0
  148. package/src/shell/waveform-bridge.js +33 -0
  149. package/src/styles/base.css +2862 -0
  150. package/src/styles/face.css +417 -0
  151. package/src/styles/pi-overrides.css +89 -0
  152. package/src/styles/theme-dark.css +67 -0
  153. package/src/test-tts.html +175 -0
  154. package/src/ui/AppShell.js +544 -0
  155. package/src/ui/ProfileSwitcher.js +228 -0
  156. package/src/ui/SessionControl.js +240 -0
  157. package/src/ui/face/FacePicker.js +195 -0
  158. package/src/ui/face/FaceRenderer.js +309 -0
  159. package/src/ui/settings/PlaylistEditor.js +366 -0
  160. package/src/ui/settings/SettingsPanel.css +684 -0
  161. package/src/ui/settings/SettingsPanel.js +419 -0
  162. package/src/ui/settings/TTSVoicePreview.js +210 -0
  163. package/src/ui/themes/ThemeManager.js +213 -0
  164. package/src/ui/visualizers/BaseVisualizer.js +29 -0
  165. package/src/ui/visualizers/PartyFXVisualizer.css +291 -0
  166. package/src/ui/visualizers/PartyFXVisualizer.js +637 -0
  167. package/static/emulators/jsdos/js-dos.css +1 -0
  168. package/static/emulators/jsdos/js-dos.js +22 -0
  169. package/static/favicon.svg +55 -0
  170. package/static/icons/apple-touch-icon.png +0 -0
  171. package/static/icons/favicon-32.png +0 -0
  172. package/static/icons/icon-192.png +0 -0
  173. package/static/icons/icon-512.png +0 -0
  174. package/static/install.html +449 -0
  175. package/static/manifest.json +26 -0
  176. package/static/sw.js +21 -0
  177. package/tts_providers/__init__.py +136 -0
  178. package/tts_providers/base_provider.py +319 -0
  179. package/tts_providers/groq_provider.py +155 -0
  180. package/tts_providers/hume_provider.py +226 -0
  181. package/tts_providers/providers_config.json +119 -0
  182. package/tts_providers/qwen3_provider.py +371 -0
  183. package/tts_providers/resemble_provider.py +315 -0
  184. package/tts_providers/supertonic_provider.py +557 -0
  185. package/tts_providers/supertonic_tts.py +399 -0
@@ -0,0 +1,81 @@
1
+ """
2
+ routes/theme.py — Theme API Blueprint (P4-T4)
3
+
4
+ Provides server-side persistence for user theme preferences.
5
+
6
+ GET /api/theme — return current saved theme colors
7
+ POST /api/theme — save theme colors (primary + accent)
8
+ POST /api/theme/reset — reset to default theme
9
+
10
+ Theme is stored in config/theme.json
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from pathlib import Path
16
+
17
+ from flask import Blueprint, jsonify, request
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ theme_bp = Blueprint('theme', __name__)
22
+
23
+ _PROJECT_ROOT = Path(__file__).parent.parent
24
+ THEME_FILE = _PROJECT_ROOT / 'config' / 'theme.json'
25
+
26
+ DEFAULT_THEME = {
27
+ 'primary': '#0088ff',
28
+ 'accent': '#00ffff',
29
+ }
30
+
31
+
32
+ def _load_theme():
33
+ """Load theme from file, returning defaults if not found."""
34
+ try:
35
+ if THEME_FILE.exists():
36
+ data = json.loads(THEME_FILE.read_text())
37
+ # Validate hex color format
38
+ if _valid_hex(data.get('primary')) and _valid_hex(data.get('accent')):
39
+ return data
40
+ except Exception as e:
41
+ logger.warning('Failed to load theme: %s', e)
42
+ return dict(DEFAULT_THEME)
43
+
44
+
45
+ def _save_theme(theme):
46
+ """Save theme to file."""
47
+ THEME_FILE.parent.mkdir(parents=True, exist_ok=True)
48
+ THEME_FILE.write_text(json.dumps(theme, indent=2))
49
+
50
+
51
+ def _valid_hex(color):
52
+ """Return True if color is a valid #rrggbb hex string."""
53
+ if not isinstance(color, str):
54
+ return False
55
+ import re
56
+ return bool(re.fullmatch(r'#[0-9a-fA-F]{6}', color))
57
+
58
+
59
+ @theme_bp.get('/api/theme')
60
+ def get_theme():
61
+ return jsonify(_load_theme())
62
+
63
+
64
+ @theme_bp.post('/api/theme')
65
+ def set_theme():
66
+ data = request.get_json(silent=True) or {}
67
+ primary = data.get('primary', '')
68
+ accent = data.get('accent', '')
69
+
70
+ if not _valid_hex(primary) or not _valid_hex(accent):
71
+ return jsonify({'error': 'Invalid color format. Use #rrggbb hex values.'}), 400
72
+
73
+ theme = {'primary': primary, 'accent': accent}
74
+ _save_theme(theme)
75
+ return jsonify(theme)
76
+
77
+
78
+ @theme_bp.post('/api/theme/reset')
79
+ def reset_theme():
80
+ _save_theme(dict(DEFAULT_THEME))
81
+ return jsonify(DEFAULT_THEME)
@@ -0,0 +1,199 @@
1
+ """
2
+ Transcript storage — saves listen-mode transcriptions to disk.
3
+
4
+ Files are organized as:
5
+ transcripts/
6
+ YYYY-MM-DD/
7
+ HH-MM-SS_<slug>.txt
8
+
9
+ POST /api/transcripts/save — save a transcript
10
+ GET /api/transcripts — list saved transcripts (newest first)
11
+ GET /api/transcripts/<date>/<filename> — read one transcript
12
+ """
13
+
14
+ import os
15
+ import re
16
+ import json
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+ from flask import Blueprint, jsonify, request
20
+
21
+ transcripts_bp = Blueprint('transcripts', __name__)
22
+
23
+ from services.paths import TRANSCRIPTS_DIR as _TRANSCRIPTS_DIR_PATH
24
+
25
+ TRANSCRIPTS_DIR = str(_TRANSCRIPTS_DIR_PATH)
26
+
27
+
28
+ def _slug(title: str) -> str:
29
+ """Turn a title into a safe filename slug."""
30
+ s = title.strip().lower()
31
+ s = re.sub(r'[^\w\s-]', '', s)
32
+ s = re.sub(r'[\s_]+', '-', s)
33
+ s = s.strip('-')
34
+ return s[:60] or 'untitled'
35
+
36
+
37
+ @transcripts_bp.route('/api/transcripts/save', methods=['POST'])
38
+ def save_transcript():
39
+ data = request.get_json(force=True, silent=True) or {}
40
+ title = (data.get('title') or 'Untitled').strip()
41
+ text = (data.get('text') or '').strip()
42
+
43
+ if not text:
44
+ return jsonify({'error': 'No transcript text provided'}), 400
45
+
46
+ now = datetime.now()
47
+ date_dir = now.strftime('%Y-%m-%d')
48
+ time_part = now.strftime('%H-%M-%S')
49
+ slug = _slug(title)
50
+ filename = f'{time_part}_{slug}.txt'
51
+
52
+ save_dir = os.path.join(TRANSCRIPTS_DIR, date_dir)
53
+ os.makedirs(save_dir, exist_ok=True)
54
+
55
+ filepath = os.path.join(save_dir, filename)
56
+
57
+ word_count = len(text.split())
58
+ content = (
59
+ f'Title: {title}\n'
60
+ f'Date: {now.strftime("%Y-%m-%d %H:%M:%S")}\n'
61
+ f'Words: {word_count}\n'
62
+ f'\n---\n\n'
63
+ f'{text}\n'
64
+ )
65
+
66
+ with open(filepath, 'w', encoding='utf-8') as f:
67
+ f.write(content)
68
+
69
+ return jsonify({
70
+ 'saved': True,
71
+ 'path': f'transcripts/{date_dir}/{filename}',
72
+ 'date': date_dir,
73
+ 'filename': filename,
74
+ 'words': word_count,
75
+ })
76
+
77
+
78
+ @transcripts_bp.route('/api/transcripts', methods=['GET'])
79
+ def list_transcripts():
80
+ entries = []
81
+ if not os.path.isdir(TRANSCRIPTS_DIR):
82
+ return jsonify([])
83
+
84
+ for date_dir in sorted(os.listdir(TRANSCRIPTS_DIR), reverse=True):
85
+ day_path = os.path.join(TRANSCRIPTS_DIR, date_dir)
86
+ if not os.path.isdir(day_path):
87
+ continue
88
+ for fname in sorted(os.listdir(day_path), reverse=True):
89
+ if not fname.endswith('.txt'):
90
+ continue
91
+ fpath = os.path.join(day_path, fname)
92
+ # Read first few lines for metadata
93
+ meta = {'title': fname, 'date': date_dir, 'filename': fname, 'words': 0}
94
+ try:
95
+ with open(fpath, 'r', encoding='utf-8') as f:
96
+ for line in f:
97
+ if line.startswith('Title:'):
98
+ meta['title'] = line[6:].strip()
99
+ elif line.startswith('Date:'):
100
+ meta['timestamp'] = line[5:].strip()
101
+ elif line.startswith('Words:'):
102
+ meta['words'] = int(line[6:].strip())
103
+ elif line.strip() == '---':
104
+ break
105
+ except Exception:
106
+ pass
107
+ entries.append(meta)
108
+
109
+ return jsonify(entries)
110
+
111
+
112
+ @transcripts_bp.route('/api/transcripts/<date_dir>/<filename>', methods=['GET'])
113
+ def get_transcript(date_dir, filename):
114
+ # Resolve and verify path stays within TRANSCRIPTS_DIR
115
+ base = Path(TRANSCRIPTS_DIR).resolve()
116
+ try:
117
+ resolved = (base / date_dir / filename).resolve()
118
+ except (ValueError, OSError):
119
+ return jsonify({'error': 'Invalid path'}), 400
120
+ if base not in resolved.parents and resolved != base:
121
+ return jsonify({'error': 'Invalid path'}), 400
122
+ if not resolved.is_file():
123
+ return jsonify({'error': 'Not found'}), 404
124
+ return resolved.read_text(encoding='utf-8'), 200, {'Content-Type': 'text/plain; charset=utf-8'}
125
+
126
+
127
+ import logging as _transcript_logger
128
+
129
+ def save_conversation_turn(
130
+ user_msg: str,
131
+ ai_response: str,
132
+ session_id: str = 'default',
133
+ session_key: str = None,
134
+ tts_provider: str = None,
135
+ voice: str = None,
136
+ duration_ms: int = None,
137
+ actions: list = None,
138
+ identified_person: dict = None,
139
+ ) -> 'str | None':
140
+ """Save one conversation turn as a JSON transcript file.
141
+
142
+ Organized as: transcripts/YYYY-MM-DD/HH-MM-SS_<session_key>_<session_id>.json
143
+
144
+ Returns the relative file path on success, or None on failure.
145
+ Never raises — errors are logged at debug level so callers are never broken.
146
+ """
147
+ try:
148
+ now = datetime.now()
149
+ date_str = now.strftime('%Y-%m-%d')
150
+ time_str = now.strftime('%H-%M-%S')
151
+ ms_str = f'{now.microsecond // 1000:03d}'
152
+ ts_iso = f'{now.strftime("%Y-%m-%dT%H:%M:%S")}.{ms_str}Z'
153
+
154
+ # Extract brief tool summaries from captured actions (phase=result only)
155
+ tools = []
156
+ if actions:
157
+ for action in actions:
158
+ if action.get('type') == 'tool' and action.get('phase') == 'result':
159
+ name = action.get('name', 'unknown')
160
+ result = action.get('result', '')
161
+ summary = str(result)[:120] if result else ''
162
+ tools.append({'name': name, 'phase': 'result', 'summary': summary})
163
+
164
+ user_words = len(user_msg.split()) if user_msg else 0
165
+ ai_words = len(ai_response.split()) if ai_response else 0
166
+ key = session_key or 'unknown'
167
+
168
+ payload = {
169
+ 'schema': 'v1',
170
+ 'session_id': session_id,
171
+ 'session_key': key,
172
+ 'timestamp': ts_iso,
173
+ 'date': date_str,
174
+ 'time': now.strftime('%H:%M:%S'),
175
+ 'tts_provider': tts_provider,
176
+ 'voice': voice,
177
+ 'duration_ms': duration_ms,
178
+ 'user': user_msg,
179
+ 'assistant': ai_response,
180
+ 'tools': tools,
181
+ 'identified_person': identified_person,
182
+ 'word_count': {'user': user_words, 'assistant': ai_words},
183
+ }
184
+
185
+ save_dir = os.path.join(TRANSCRIPTS_DIR, date_str)
186
+ os.makedirs(save_dir, exist_ok=True)
187
+ filename = f'{time_str}_{key}_{session_id}.json'
188
+ filepath = os.path.join(save_dir, filename)
189
+
190
+ with open(filepath, 'w', encoding='utf-8') as f:
191
+ json.dump(payload, f, ensure_ascii=False, indent=2)
192
+
193
+ return f'transcripts/{date_str}/{filename}'
194
+
195
+ except Exception as exc:
196
+ _transcript_logger.getLogger(__name__).debug(
197
+ f'save_conversation_turn failed (non-critical): {exc}'
198
+ )
199
+ return None
@@ -0,0 +1,348 @@
1
+ """
2
+ routes/vision.py — Camera / Vision / Facial Recognition Blueprint
3
+
4
+ Endpoints:
5
+ POST /api/vision — analyze camera frame with vision LLM
6
+ POST /api/frame — receive live frame (stored as latest_frame)
7
+ POST /api/identify — identify person from camera frame (DeepFace)
8
+ GET /api/faces — list registered faces
9
+ POST /api/faces/<name> — register a face photo
10
+ DELETE /api/faces/<name> — delete a registered face
11
+
12
+ Face recognition: DeepFace (local, free, runs on-server — no API calls).
13
+ Vision analysis ("look at"): configurable vision LLM (default: glm-4.6v).
14
+ """
15
+
16
+ import base64
17
+ import json
18
+ import logging
19
+ import os
20
+ import re
21
+ import tempfile
22
+ import threading
23
+ import time
24
+ from pathlib import Path
25
+
26
+ import requests
27
+ from flask import Blueprint, jsonify, request
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ vision_bp = Blueprint('vision', __name__)
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Storage
35
+ # ---------------------------------------------------------------------------
36
+
37
+ from services.paths import KNOWN_FACES_DIR as FACES_DIR
38
+
39
+ FACES_DIR.mkdir(parents=True, exist_ok=True)
40
+
41
+ # Latest frame received from browser (in-memory, ephemeral)
42
+ _latest_frame: dict = {'image': None, 'ts': 0}
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # DeepFace — lazy load (heavy import, downloads models on first use)
46
+ # Serialize all face recognition calls — concurrent TF/h5py calls crash the process.
47
+ # ---------------------------------------------------------------------------
48
+
49
+ _deepface = None
50
+ _deepface_lock = threading.Lock()
51
+
52
+ def _get_deepface():
53
+ global _deepface
54
+ if _deepface is None:
55
+ from deepface import DeepFace
56
+ _deepface = DeepFace
57
+ return _deepface
58
+
59
+
60
+ def _clear_deepface_cache():
61
+ """Delete DeepFace's cached face index so newly registered/deleted faces are picked up."""
62
+ for pkl in FACES_DIR.glob('*.pkl'):
63
+ try:
64
+ pkl.unlink()
65
+ except OSError:
66
+ pass
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Vision model config
70
+ # ---------------------------------------------------------------------------
71
+
72
+ # Known vision-capable models (shown in admin UI dropdown)
73
+ VISION_MODELS = [
74
+ {'id': 'glm-4.6v', 'label': 'GLM-4.6V (128K · Paid)', 'provider': 'zai'},
75
+ {'id': 'glm-4v-plus', 'label': 'GLM-4V Plus (Legacy · Paid)', 'provider': 'zai'},
76
+ ]
77
+
78
+ DEFAULT_VISION_MODEL = os.environ.get('VISION_MODEL', 'glm-4.6v')
79
+ DEFAULT_VISION_PROVIDER = 'zai'
80
+
81
+
82
+ def _get_vision_model() -> tuple[str, str]:
83
+ """Return (model_id, provider) from active profile or env defaults."""
84
+ try:
85
+ from profiles.manager import get_profile_manager
86
+ mgr = get_profile_manager()
87
+ p = mgr.get_active_profile()
88
+ if p:
89
+ d = p.to_dict()
90
+ model = d.get('vision', {}).get('model') or DEFAULT_VISION_MODEL
91
+ provider = d.get('vision', {}).get('provider') or DEFAULT_VISION_PROVIDER
92
+ return model, provider
93
+ except Exception as exc:
94
+ logger.debug('Could not read vision config from profile: %s', exc)
95
+ return DEFAULT_VISION_MODEL, DEFAULT_VISION_PROVIDER
96
+
97
+
98
+ def _call_vision(image_b64: str, prompt: str, model: str | None = None) -> str:
99
+ """
100
+ Send an image + prompt to the configured vision model and return the text response.
101
+
102
+ Uses Groq's Llama 4 Scout vision model — fast, free, and accurate.
103
+ Z.AI/GLM vision through api.z.ai is broken (returns hallucinated descriptions).
104
+
105
+ image_b64 may be a raw base64 string or a data-URI (data:image/jpeg;base64,...).
106
+ """
107
+ # Strip data-URI prefix if present
108
+ if image_b64.startswith('data:'):
109
+ image_b64 = image_b64.split(',', 1)[1]
110
+
111
+ api_key = os.environ.get('GROQ_API_KEY', '')
112
+ if not api_key:
113
+ raise ValueError('GROQ_API_KEY is not set — cannot call vision model')
114
+
115
+ vision_model = 'meta-llama/llama-4-scout-17b-16e-instruct'
116
+
117
+ payload = {
118
+ 'model': vision_model,
119
+ 'messages': [{
120
+ 'role': 'user',
121
+ 'content': [
122
+ {'type': 'image_url',
123
+ 'image_url': {'url': f'data:image/png;base64,{image_b64}'}},
124
+ {'type': 'text', 'text': prompt},
125
+ ],
126
+ }],
127
+ 'max_tokens': 600,
128
+ }
129
+
130
+ resp = requests.post(
131
+ 'https://api.groq.com/openai/v1/chat/completions',
132
+ headers={
133
+ 'Authorization': f'Bearer {api_key}',
134
+ 'Content-Type': 'application/json',
135
+ },
136
+ json=payload,
137
+ timeout=30,
138
+ )
139
+ resp.raise_for_status()
140
+ return resp.json()['choices'][0]['message']['content'].strip()
141
+
142
+
143
+ # ---------------------------------------------------------------------------
144
+ # POST /api/vision — agent "look at" tool
145
+ # ---------------------------------------------------------------------------
146
+
147
+ @vision_bp.route('/api/vision', methods=['POST'])
148
+ def vision_analyze():
149
+ """Analyze a camera frame with the configured vision model."""
150
+ data = request.get_json(silent=True) or {}
151
+ image = data.get('image', '')
152
+ prompt = data.get('prompt', 'Describe what you see in this image in detail.')
153
+ model = data.get('model') # optional override
154
+
155
+ if not image:
156
+ return jsonify({'error': 'No image provided'}), 400
157
+
158
+ try:
159
+ description = _call_vision(image, prompt, model)
160
+ return jsonify({'description': description, 'model': model or _get_vision_model()[0]})
161
+ except Exception as exc:
162
+ logger.error('Vision analysis failed: %s', exc)
163
+ return jsonify({'error': 'Internal server error'}), 500
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # POST /api/frame — receive live frame stream from browser
168
+ # ---------------------------------------------------------------------------
169
+
170
+ _FRAME_MAX_BYTES = 5 * 1024 * 1024 # 5 MB max per frame
171
+
172
+ @vision_bp.route('/api/frame', methods=['POST'])
173
+ def receive_frame():
174
+ """Store the latest camera frame in memory for use by other endpoints."""
175
+ if request.content_length and request.content_length > _FRAME_MAX_BYTES:
176
+ return jsonify({'ok': False, 'error': 'Frame too large'}), 413
177
+ data = request.get_json(silent=True) or {}
178
+ image = data.get('image', '')
179
+ if image:
180
+ if len(image) > _FRAME_MAX_BYTES:
181
+ return jsonify({'ok': False, 'error': 'Frame too large'}), 413
182
+ _latest_frame['image'] = image
183
+ _latest_frame['ts'] = time.time()
184
+ return jsonify({'ok': True})
185
+
186
+
187
+ # ---------------------------------------------------------------------------
188
+ # POST /api/identify — facial recognition
189
+ # ---------------------------------------------------------------------------
190
+
191
+ @vision_bp.route('/api/identify', methods=['POST'])
192
+ def identify_face():
193
+ """
194
+ Identify who is in the camera frame using DeepFace (local, free, no API calls).
195
+
196
+ Uses the SFace model — fast on CPU, ~100ms after first load.
197
+ Face database: known_faces/<PersonName>/*.jpg
198
+ """
199
+ data = request.get_json(silent=True) or {}
200
+ image = data.get('image', '')
201
+ if not image:
202
+ image = _latest_frame.get('image', '')
203
+ if not image:
204
+ return jsonify({'name': 'unknown', 'confidence': 0, 'message': 'No image'}), 200
205
+
206
+ # Check if any faces are registered
207
+ known_people = [d.name for d in FACES_DIR.iterdir()
208
+ if d.is_dir() and any(d.iterdir())]
209
+ if not known_people:
210
+ return jsonify({'name': 'unknown', 'confidence': 0,
211
+ 'message': 'No faces registered yet'}), 200
212
+
213
+ # Decode and save to temp file (DeepFace needs a file path)
214
+ image_data = image
215
+ if ',' in image_data:
216
+ image_data = image_data.split(',', 1)[1]
217
+ image_bytes = base64.b64decode(image_data)
218
+
219
+ tmp_path = None
220
+ try:
221
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
222
+ tmp.write(image_bytes)
223
+ tmp_path = tmp.name
224
+
225
+ DeepFace = _get_deepface()
226
+ with _deepface_lock:
227
+ results = DeepFace.find(
228
+ img_path=tmp_path,
229
+ db_path=str(FACES_DIR),
230
+ model_name='SFace',
231
+ enforce_detection=False,
232
+ silent=True,
233
+ )
234
+
235
+ if results and len(results) > 0 and len(results[0]) > 0:
236
+ df = results[0]
237
+ best = df.iloc[0]
238
+ identity_path = best['identity']
239
+ distance = float(best['distance'])
240
+ person_name = Path(identity_path).parent.name
241
+
242
+ # SFace cosine distance threshold ~0.5; convert to confidence %
243
+ confidence = max(0, round((1 - distance / 0.7) * 100, 1))
244
+
245
+ if distance < 0.5:
246
+ return jsonify({'name': person_name, 'confidence': confidence})
247
+ else:
248
+ return jsonify({'name': 'unknown', 'confidence': confidence,
249
+ 'message': 'Face detected but not recognized'})
250
+ else:
251
+ return jsonify({'name': 'unknown', 'confidence': 0,
252
+ 'message': 'No face detected in frame'})
253
+
254
+ except Exception as exc:
255
+ logger.error('Face identification failed: %s', exc)
256
+ return jsonify({'name': 'unknown', 'confidence': 0, 'message': 'Face identification failed'}), 200
257
+ finally:
258
+ if tmp_path:
259
+ try:
260
+ os.unlink(tmp_path)
261
+ except OSError:
262
+ pass
263
+
264
+
265
+ # ---------------------------------------------------------------------------
266
+ # GET /api/faces — list registered faces
267
+ # ---------------------------------------------------------------------------
268
+
269
+ def _list_faces_data():
270
+ entries = []
271
+ for face_dir in sorted(FACES_DIR.iterdir()):
272
+ if not face_dir.is_dir():
273
+ continue
274
+ photos = list(face_dir.glob('*.jpg')) + list(face_dir.glob('*.jpeg')) + \
275
+ list(face_dir.glob('*.png'))
276
+ entries.append({'name': face_dir.name, 'photo_count': len(photos)})
277
+ return entries
278
+
279
+
280
+ @vision_bp.route('/api/faces', methods=['GET'])
281
+ def list_faces():
282
+ return jsonify({'faces': _list_faces_data()})
283
+
284
+
285
+ # ---------------------------------------------------------------------------
286
+ # POST /api/faces/<name> — register a face photo
287
+ # ---------------------------------------------------------------------------
288
+
289
+ @vision_bp.route('/api/faces/<name>', methods=['POST'])
290
+ def register_face(name):
291
+ """Save a face photo for a named person."""
292
+ # Sanitize name
293
+ safe_name = re.sub(r'[^a-zA-Z0-9_\- ]', '', name).strip()
294
+ if not safe_name:
295
+ return jsonify({'error': 'Invalid name'}), 400
296
+
297
+ data = request.get_json(silent=True) or {}
298
+ image_data = data.get('image', '')
299
+ if not image_data:
300
+ return jsonify({'error': 'No image provided'}), 400
301
+
302
+ face_dir = FACES_DIR / safe_name
303
+ face_dir.mkdir(exist_ok=True)
304
+
305
+ # Strip data-URI prefix
306
+ if image_data.startswith('data:'):
307
+ image_data = image_data.split(',', 1)[1]
308
+
309
+ # Save with incrementing filename
310
+ idx = len(list(face_dir.glob('*.jpg'))) + 1
311
+ out_path = face_dir / f'photo_{idx:03d}.jpg'
312
+ out_path.write_bytes(base64.b64decode(image_data))
313
+
314
+ logger.info('Registered face photo: %s (%s)', safe_name, out_path.name)
315
+
316
+ # Clear DeepFace's cached index so the new face is picked up immediately
317
+ with _deepface_lock:
318
+ _clear_deepface_cache()
319
+
320
+ return jsonify({'ok': True, 'name': safe_name, 'file': out_path.name})
321
+
322
+
323
+ # ---------------------------------------------------------------------------
324
+ # DELETE /api/faces/<name> — remove a registered face
325
+ # ---------------------------------------------------------------------------
326
+
327
+ @vision_bp.route('/api/faces/<name>', methods=['DELETE'])
328
+ def delete_face(name):
329
+ safe_name = re.sub(r'[^a-zA-Z0-9_\- ]', '', name).strip()
330
+ face_dir = FACES_DIR / safe_name
331
+ if not face_dir.exists():
332
+ return jsonify({'error': 'Face not found'}), 404
333
+
334
+ import shutil
335
+ shutil.rmtree(face_dir)
336
+ with _deepface_lock:
337
+ _clear_deepface_cache()
338
+ return jsonify({'ok': True, 'deleted': safe_name})
339
+
340
+
341
+ # ---------------------------------------------------------------------------
342
+ # GET /api/vision/models — list available vision models (for admin UI)
343
+ # ---------------------------------------------------------------------------
344
+
345
+ @vision_bp.route('/api/vision/models', methods=['GET'])
346
+ def list_vision_models():
347
+ active_model, _ = _get_vision_model()
348
+ return jsonify({'models': VISION_MODELS, 'active': active_model})