nixorb 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nixorb/__init__.py +6 -0
- nixorb/action/__init__.py +0 -0
- nixorb/action/clipboard.py +51 -0
- nixorb/action/executor.py +209 -0
- nixorb/asr/__init__.py +0 -0
- nixorb/asr/wake_word.py +93 -0
- nixorb/asr/whisper_engine.py +191 -0
- nixorb/cli.py +416 -0
- nixorb/core/__init__.py +0 -0
- nixorb/core/aur_checker.py +44 -0
- nixorb/core/event_bus.py +178 -0
- nixorb/core/vram_manager.py +185 -0
- nixorb/llm/__init__.py +0 -0
- nixorb/llm/backends.py +338 -0
- nixorb/main.py +299 -0
- nixorb/memory/__init__.py +0 -0
- nixorb/memory/vector_store.py +59 -0
- nixorb/plugins/__init__.py +0 -0
- nixorb/plugins/builtin/__init__.py +0 -0
- nixorb/plugins/builtin/kdeconnect_plugin.py +95 -0
- nixorb/plugins/builtin/systemd_plugin.py +45 -0
- nixorb/plugins/loader.py +63 -0
- nixorb/settings.py +95 -0
- nixorb/tts/__init__.py +0 -0
- nixorb/tts/hf_tts.py +69 -0
- nixorb/tts/openai_tts.py +56 -0
- nixorb/tts/piper_tts.py +84 -0
- nixorb/tts/tts_factory.py +22 -0
- nixorb/ui/__init__.py +0 -0
- nixorb/ui/hotkey.py +54 -0
- nixorb/ui/orb_window.py +183 -0
- nixorb/ui/settings_window.py +350 -0
- nixorb/ui/tray_icon.py +65 -0
- nixorb/utils/__init__.py +0 -0
- nixorb/utils/audio.py +18 -0
- nixorb/utils/crypto.py +82 -0
- nixorb/utils/hypernix_client.py +54 -0
- nixorb/utils/web_search.py +80 -0
- nixorb/vision/__init__.py +0 -0
- nixorb/vision/screen_capture.py +101 -0
- nixorb-0.1.0.data/data/share/applications/nixorb.desktop +12 -0
- nixorb-0.1.0.data/data/share/nixorb/assets/generate_icon.py +28 -0
- nixorb-0.1.0.data/data/share/nixorb/assets/nixorb_256.png +0 -0
- nixorb-0.1.0.data/data/share/nixorb/assets/orb.qml +109 -0
- nixorb-0.1.0.data/data/share/nixorb/assets/shaders/orb_glow.frag +108 -0
- nixorb-0.1.0.data/data/share/nixorb/assets/shaders/orb_glow.vert +20 -0
- nixorb-0.1.0.data/data/share/nixorb/assets/tray_icon.png +0 -0
- nixorb-0.1.0.data/data/share/nixorb/config/default.toml +66 -0
- nixorb-0.1.0.dist-info/METADATA +278 -0
- nixorb-0.1.0.dist-info/RECORD +53 -0
- nixorb-0.1.0.dist-info/WHEEL +4 -0
- nixorb-0.1.0.dist-info/entry_points.txt +5 -0
- nixorb-0.1.0.dist-info/licenses/LICENSE +21 -0
nixorb/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""nixorb/action/clipboard.py — Wayland clipboard integration."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import logging
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
_HAS_WL_PASTE = bool(shutil.which("wl-paste"))
|
|
12
|
+
_HAS_WL_COPY = bool(shutil.which("wl-copy"))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def read_clipboard() -> str | None:
|
|
16
|
+
"""Read current Wayland clipboard text content."""
|
|
17
|
+
if not _HAS_WL_PASTE:
|
|
18
|
+
log.warning("wl-paste not found — install wl-clipboard")
|
|
19
|
+
return None
|
|
20
|
+
try:
|
|
21
|
+
proc = await asyncio.create_subprocess_exec(
|
|
22
|
+
"wl-paste", "--no-newline",
|
|
23
|
+
stdout=asyncio.subprocess.PIPE,
|
|
24
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
25
|
+
)
|
|
26
|
+
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
|
|
27
|
+
return stdout.decode(errors="replace").strip() or None
|
|
28
|
+
except Exception as exc:
|
|
29
|
+
log.error("Clipboard read failed: %s", exc)
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def write_clipboard(text: str) -> bool:
|
|
34
|
+
"""Write *text* to the Wayland clipboard."""
|
|
35
|
+
if not _HAS_WL_COPY:
|
|
36
|
+
log.warning("wl-copy not found — install wl-clipboard")
|
|
37
|
+
return False
|
|
38
|
+
try:
|
|
39
|
+
proc = await asyncio.create_subprocess_exec(
|
|
40
|
+
"wl-copy",
|
|
41
|
+
stdin=asyncio.subprocess.PIPE,
|
|
42
|
+
stdout=asyncio.subprocess.DEVNULL,
|
|
43
|
+
stderr=asyncio.subprocess.DEVNULL,
|
|
44
|
+
)
|
|
45
|
+
await asyncio.wait_for(
|
|
46
|
+
proc.communicate(input=text.encode("utf-8")), timeout=5
|
|
47
|
+
)
|
|
48
|
+
return proc.returncode == 0
|
|
49
|
+
except Exception as exc:
|
|
50
|
+
log.error("Clipboard write failed: %s", exc)
|
|
51
|
+
return False
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nixorb/action/executor.py
|
|
3
|
+
|
|
4
|
+
Sandboxed bash/system command executor.
|
|
5
|
+
|
|
6
|
+
Security model:
|
|
7
|
+
1. Hard-deny list blocks destructive patterns unconditionally.
|
|
8
|
+
2. Sensitive prefixes require interactive user confirmation via EventBus.
|
|
9
|
+
3. All commands run as the current user in a subprocess with timeout.
|
|
10
|
+
4. Optional bubblewrap (bwrap) filesystem sandbox when available.
|
|
11
|
+
5. NixOrb refuses to run as root.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
import shutil
|
|
20
|
+
import subprocess
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import TYPE_CHECKING
|
|
23
|
+
|
|
24
|
+
from nixorb.core.event_bus import Event, EventPayload, bus
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from nixorb.settings import Settings
|
|
28
|
+
|
|
29
|
+
log = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
ACTION_PATTERN = re.compile(r"<ACTION>(.*?)</ACTION>", re.DOTALL | re.IGNORECASE)
|
|
32
|
+
|
|
33
|
+
TIMEOUT_SECONDS = 30
|
|
34
|
+
USE_BUBBLEWRAP = shutil.which("bwrap") is not None
|
|
35
|
+
|
|
36
|
+
ALWAYS_DENY: list[str] = [
|
|
37
|
+
"rm -rf /",
|
|
38
|
+
"rm -rf ~",
|
|
39
|
+
"dd if=",
|
|
40
|
+
"mkfs",
|
|
41
|
+
":(){ :|:& };:",
|
|
42
|
+
"chmod -R 777 /",
|
|
43
|
+
"passwd",
|
|
44
|
+
"> /dev/sda",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
REQUIRE_CONFIRM: list[str] = [
|
|
48
|
+
"rm ",
|
|
49
|
+
"mv ",
|
|
50
|
+
"sudo ",
|
|
51
|
+
"systemctl ",
|
|
52
|
+
"pacman ",
|
|
53
|
+
"yay ",
|
|
54
|
+
"pip install",
|
|
55
|
+
"curl ",
|
|
56
|
+
"wget ",
|
|
57
|
+
"git push",
|
|
58
|
+
"chmod",
|
|
59
|
+
"chown",
|
|
60
|
+
"mktemp",
|
|
61
|
+
"shutdown",
|
|
62
|
+
"reboot",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class ActionResult:
|
|
68
|
+
command: str
|
|
69
|
+
stdout: str
|
|
70
|
+
stderr: str
|
|
71
|
+
returncode: int
|
|
72
|
+
timed_out: bool = False
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def success(self) -> bool:
|
|
76
|
+
return self.returncode == 0 and not self.timed_out
|
|
77
|
+
|
|
78
|
+
def __str__(self) -> str:
|
|
79
|
+
lines = [f"$ {self.command}"]
|
|
80
|
+
if self.stdout.strip():
|
|
81
|
+
lines.append(self.stdout.rstrip())
|
|
82
|
+
if self.stderr.strip():
|
|
83
|
+
lines.append(f"[stderr] {self.stderr.rstrip()}")
|
|
84
|
+
lines.append(f"[exit {self.returncode}{' TIMEOUT' if self.timed_out else ''}]")
|
|
85
|
+
return "\n".join(lines)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class ActionExecutor:
|
|
89
|
+
def __init__(self, settings: Settings) -> None:
|
|
90
|
+
self._settings = settings
|
|
91
|
+
self._pending: dict[str, asyncio.Future[bool]] = {}
|
|
92
|
+
bus.subscribe(Event.ACTION_RESULT, self._on_confirmation)
|
|
93
|
+
|
|
94
|
+
if os.geteuid() == 0:
|
|
95
|
+
raise RuntimeError(
|
|
96
|
+
"NixOrb must NOT run as root. "
|
|
97
|
+
"Drop privileges before starting."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
async def _on_confirmation(self, payload: EventPayload) -> None:
|
|
101
|
+
data = payload.data or {}
|
|
102
|
+
cmd = data.get("command", "")
|
|
103
|
+
approved = bool(data.get("approved", False))
|
|
104
|
+
fut = self._pending.pop(cmd, None)
|
|
105
|
+
if fut and not fut.done():
|
|
106
|
+
fut.set_result(approved)
|
|
107
|
+
|
|
108
|
+
async def handle_llm_output(self, text: str) -> list[ActionResult]:
|
|
109
|
+
matches = ACTION_PATTERN.findall(text)
|
|
110
|
+
results: list[ActionResult] = []
|
|
111
|
+
for raw_cmd in matches:
|
|
112
|
+
cmd = raw_cmd.strip()
|
|
113
|
+
result = await self._run_action(cmd)
|
|
114
|
+
results.append(result)
|
|
115
|
+
await bus.emit(
|
|
116
|
+
Event.LOG,
|
|
117
|
+
data={"level": "exec", "msg": str(result)},
|
|
118
|
+
source="ActionExecutor",
|
|
119
|
+
)
|
|
120
|
+
return results
|
|
121
|
+
|
|
122
|
+
async def _run_action(self, cmd: str) -> ActionResult:
|
|
123
|
+
# 1. Hard deny
|
|
124
|
+
for pattern in ALWAYS_DENY:
|
|
125
|
+
if pattern in cmd:
|
|
126
|
+
msg = f"Command hard-denied (matched '{pattern}'): {cmd}"
|
|
127
|
+
log.warning(msg)
|
|
128
|
+
return ActionResult(command=cmd, stdout="", stderr=msg, returncode=-1)
|
|
129
|
+
|
|
130
|
+
# 2. Confirmation for sensitive commands
|
|
131
|
+
needs_confirm = self._settings.require_action_confirmation or any(
|
|
132
|
+
(cmd.startswith(p) or p in cmd) for p in REQUIRE_CONFIRM
|
|
133
|
+
)
|
|
134
|
+
if needs_confirm:
|
|
135
|
+
approved = await self._request_confirmation(cmd)
|
|
136
|
+
if not approved:
|
|
137
|
+
return ActionResult(
|
|
138
|
+
command=cmd, stdout="", stderr="User denied", returncode=-1
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return await self._execute(cmd)
|
|
142
|
+
|
|
143
|
+
async def _request_confirmation(self, cmd: str) -> bool:
|
|
144
|
+
loop = asyncio.get_running_loop()
|
|
145
|
+
fut: asyncio.Future[bool] = loop.create_future()
|
|
146
|
+
self._pending[cmd] = fut
|
|
147
|
+
|
|
148
|
+
await bus.emit(
|
|
149
|
+
Event.ACTION_REQUESTED,
|
|
150
|
+
data={"command": cmd},
|
|
151
|
+
source="ActionExecutor",
|
|
152
|
+
priority=1,
|
|
153
|
+
)
|
|
154
|
+
try:
|
|
155
|
+
return await asyncio.wait_for(asyncio.shield(fut), timeout=30.0)
|
|
156
|
+
except asyncio.TimeoutError:
|
|
157
|
+
self._pending.pop(cmd, None)
|
|
158
|
+
log.warning("Confirmation timed out for: %s", cmd)
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
async def _execute(self, cmd: str) -> ActionResult:
|
|
162
|
+
log.info("Executing: %s", cmd)
|
|
163
|
+
cmd_args = self._wrap_bwrap(cmd) if USE_BUBBLEWRAP else ["bash", "-c", cmd]
|
|
164
|
+
try:
|
|
165
|
+
proc = await asyncio.create_subprocess_exec(
|
|
166
|
+
*cmd_args,
|
|
167
|
+
stdout=asyncio.subprocess.PIPE,
|
|
168
|
+
stderr=asyncio.subprocess.PIPE,
|
|
169
|
+
env={**os.environ},
|
|
170
|
+
)
|
|
171
|
+
try:
|
|
172
|
+
stdout, stderr = await asyncio.wait_for(
|
|
173
|
+
proc.communicate(), timeout=TIMEOUT_SECONDS
|
|
174
|
+
)
|
|
175
|
+
except asyncio.TimeoutError:
|
|
176
|
+
try:
|
|
177
|
+
proc.kill()
|
|
178
|
+
except ProcessLookupError:
|
|
179
|
+
pass
|
|
180
|
+
return ActionResult(
|
|
181
|
+
command=cmd, stdout="", stderr="Execution timed out",
|
|
182
|
+
returncode=-1, timed_out=True,
|
|
183
|
+
)
|
|
184
|
+
return ActionResult(
|
|
185
|
+
command=cmd,
|
|
186
|
+
stdout=stdout.decode(errors="replace"),
|
|
187
|
+
stderr=stderr.decode(errors="replace"),
|
|
188
|
+
returncode=proc.returncode or 0,
|
|
189
|
+
)
|
|
190
|
+
except Exception as exc:
|
|
191
|
+
return ActionResult(command=cmd, stdout="", stderr=str(exc), returncode=-1)
|
|
192
|
+
|
|
193
|
+
@staticmethod
|
|
194
|
+
def _wrap_bwrap(cmd: str) -> list[str]:
|
|
195
|
+
home = os.path.expanduser("~")
|
|
196
|
+
return [
|
|
197
|
+
"bwrap",
|
|
198
|
+
"--ro-bind", "/usr", "/usr",
|
|
199
|
+
"--ro-bind", "/lib", "/lib",
|
|
200
|
+
"--ro-bind", "/lib64", "/lib64",
|
|
201
|
+
"--ro-bind", "/etc", "/etc",
|
|
202
|
+
"--bind", home, home,
|
|
203
|
+
"--bind", "/tmp", "/tmp",
|
|
204
|
+
"--dev", "/dev",
|
|
205
|
+
"--proc", "/proc",
|
|
206
|
+
"--unshare-net",
|
|
207
|
+
"--die-with-parent",
|
|
208
|
+
"bash", "-c", cmd,
|
|
209
|
+
]
|
nixorb/asr/__init__.py
ADDED
|
File without changes
|
nixorb/asr/wake_word.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nixorb/asr/wake_word.py
|
|
3
|
+
|
|
4
|
+
OpenWakeWord always-on detector.
|
|
5
|
+
|
|
6
|
+
BUG FIX PASS 1:
|
|
7
|
+
- Previous version called asyncio.ensure_future() from the sounddevice
|
|
8
|
+
callback thread. ensure_future() requires a running event loop in the
|
|
9
|
+
calling thread, which the audio callback thread does not have.
|
|
10
|
+
Fixed by capturing the running loop at startup and using
|
|
11
|
+
loop.call_soon_threadsafe() with asyncio.run_coroutine_threadsafe().
|
|
12
|
+
|
|
13
|
+
BUG FIX PASS 2:
|
|
14
|
+
- Cooldown guard added. Without it, a single wake word detection fires
|
|
15
|
+
dozens of events during the ~80 ms chunk window while confidence is high.
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import logging
|
|
21
|
+
import time
|
|
22
|
+
from typing import TYPE_CHECKING
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
import sounddevice as sd
|
|
26
|
+
|
|
27
|
+
from nixorb.core.event_bus import Event, bus
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from nixorb.settings import Settings
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
CHUNK = 1_280 # ~80 ms at 16 kHz (OpenWakeWord requirement)
|
|
35
|
+
CONFIDENCE = 0.70 # minimum score to fire
|
|
36
|
+
COOLDOWN_S = 2.0 # seconds before another wake-word fires
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class WakeWordDetector:
|
|
40
|
+
def __init__(self, settings: Settings) -> None:
|
|
41
|
+
from openwakeword.model import Model
|
|
42
|
+
self._model = Model(
|
|
43
|
+
wakeword_models=[settings.wake_word_model],
|
|
44
|
+
inference_framework="onnx",
|
|
45
|
+
)
|
|
46
|
+
self._settings = settings
|
|
47
|
+
self._last_fired = 0.0
|
|
48
|
+
self._loop: asyncio.AbstractEventLoop | None = None
|
|
49
|
+
|
|
50
|
+
async def run_forever(self) -> None:
|
|
51
|
+
# BUG FIX: capture loop here, on the async thread, before starting
|
|
52
|
+
# the sounddevice stream whose callbacks run on a C audio thread.
|
|
53
|
+
self._loop = asyncio.get_running_loop()
|
|
54
|
+
log.info(
|
|
55
|
+
"Wake-word detector started (model=%s, threshold=%.2f)",
|
|
56
|
+
self._settings.wake_word_model, CONFIDENCE,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _callback(
|
|
60
|
+
indata: np.ndarray, frames: int, time_info, status
|
|
61
|
+
) -> None:
|
|
62
|
+
if status:
|
|
63
|
+
log.debug("sounddevice status: %s", status)
|
|
64
|
+
pcm = (indata[:, 0] * 32_767).astype(np.int16)
|
|
65
|
+
preds = self._model.predict(pcm)
|
|
66
|
+
for name, score in preds.items():
|
|
67
|
+
if score >= CONFIDENCE:
|
|
68
|
+
now = time.monotonic()
|
|
69
|
+
# BUG FIX: cooldown so we don't flood the bus
|
|
70
|
+
if (now - self._last_fired) < COOLDOWN_S:
|
|
71
|
+
return
|
|
72
|
+
self._last_fired = now
|
|
73
|
+
log.info("Wake word: %s (score=%.3f)", name, score)
|
|
74
|
+
# BUG FIX: use run_coroutine_threadsafe from audio thread
|
|
75
|
+
asyncio.run_coroutine_threadsafe(
|
|
76
|
+
bus.emit(
|
|
77
|
+
Event.WAKE_WORD_DETECTED,
|
|
78
|
+
data={"word": name, "score": float(score)},
|
|
79
|
+
source="WakeWordDetector",
|
|
80
|
+
),
|
|
81
|
+
self._loop,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
with sd.InputStream(
|
|
85
|
+
samplerate=16_000,
|
|
86
|
+
channels=1,
|
|
87
|
+
dtype="float32",
|
|
88
|
+
blocksize=CHUNK,
|
|
89
|
+
callback=_callback,
|
|
90
|
+
):
|
|
91
|
+
# Keep the coroutine alive; the callback does the real work.
|
|
92
|
+
while True:
|
|
93
|
+
await asyncio.sleep(0.5)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nixorb/asr/whisper_engine.py
|
|
3
|
+
|
|
4
|
+
faster-whisper Large v3 with INT8 quantisation and VRAM paging.
|
|
5
|
+
|
|
6
|
+
BUG FIX PASS 1:
|
|
7
|
+
- _transcribe_blocking called asyncio.get_event_loop() from inside a
|
|
8
|
+
ThreadPoolExecutor worker. In Python 3.10+ this issues a DeprecationWarning
|
|
9
|
+
and in 3.12 it raises RuntimeError because there is no running loop in the
|
|
10
|
+
thread. Fixed by capturing the loop at construction time and storing it.
|
|
11
|
+
|
|
12
|
+
BUG FIX PASS 2:
|
|
13
|
+
- asyncio.run_coroutine_threadsafe result was awaited inside the executor
|
|
14
|
+
thread with .result(timeout=60), but the coroutine itself acquires
|
|
15
|
+
VRAMManager locks which are asyncio.Lock objects — awaitable only on
|
|
16
|
+
the main loop. Restructured: recording runs in thread pool, transcription
|
|
17
|
+
is fully async on the main loop, no cross-thread coroutine submission.
|
|
18
|
+
|
|
19
|
+
BUG FIX PASS 3:
|
|
20
|
+
- sounddevice InputStream blocksize parameter was passed as CHUNK_FRAMES but
|
|
21
|
+
the callback variant of InputStream was mixed up with the read() API.
|
|
22
|
+
Corrected to use explicit stream.read() in a loop (non-callback mode).
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import asyncio
|
|
27
|
+
import logging
|
|
28
|
+
import time
|
|
29
|
+
from typing import TYPE_CHECKING
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
import sounddevice as sd
|
|
33
|
+
|
|
34
|
+
from nixorb.core.event_bus import Event, bus
|
|
35
|
+
from nixorb.core.vram_manager import ModelPriority, vram
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
from nixorb.settings import Settings
|
|
39
|
+
|
|
40
|
+
log = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
SAMPLE_RATE = 16_000
|
|
43
|
+
CHANNELS = 1
|
|
44
|
+
DTYPE = "float32"
|
|
45
|
+
CHUNK_FRAMES = 1_024
|
|
46
|
+
SILENCE_DB = -38.0 # dBFS; above this = speech
|
|
47
|
+
SILENCE_SECS = 1.2 # seconds of consecutive silence = end of utterance
|
|
48
|
+
MAX_RECORD_S = 30.0 # hard cap
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _load_whisper():
|
|
52
|
+
from faster_whisper import WhisperModel
|
|
53
|
+
model = WhisperModel(
|
|
54
|
+
"large-v3",
|
|
55
|
+
device="cuda",
|
|
56
|
+
compute_type="int8_float16", # INT8 weights, FP16 compute ≈ 2 GB VRAM
|
|
57
|
+
cpu_threads=4,
|
|
58
|
+
num_workers=2,
|
|
59
|
+
)
|
|
60
|
+
log.info("Whisper Large v3 (int8_float16) loaded")
|
|
61
|
+
return model
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _unload_whisper(model) -> None:
|
|
65
|
+
del model
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Register once at import time
|
|
69
|
+
vram.register(
|
|
70
|
+
name="whisper",
|
|
71
|
+
vram_mb=2_100,
|
|
72
|
+
priority=ModelPriority.LOW,
|
|
73
|
+
load_fn=_load_whisper,
|
|
74
|
+
unload_fn=_unload_whisper,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class WhisperEngine:
|
|
79
|
+
def __init__(self, settings: Settings) -> None:
|
|
80
|
+
self._settings = settings
|
|
81
|
+
|
|
82
|
+
# ---------------------------------------------------------------- #
|
|
83
|
+
# Public entry point #
|
|
84
|
+
# ---------------------------------------------------------------- #
|
|
85
|
+
async def record_and_transcribe(self) -> str | None:
|
|
86
|
+
"""
|
|
87
|
+
Record from microphone until silence, then transcribe.
|
|
88
|
+
|
|
89
|
+
Recording is blocking I/O — runs in the default thread pool.
|
|
90
|
+
Transcription runs on the async loop using vram.lease().
|
|
91
|
+
"""
|
|
92
|
+
await bus.emit(Event.RECORDING_START, source="whisper")
|
|
93
|
+
loop = asyncio.get_running_loop()
|
|
94
|
+
|
|
95
|
+
# BUG FIX: record in thread, transcribe on async loop (no cross-thread
|
|
96
|
+
# coroutine submission needed)
|
|
97
|
+
audio = await loop.run_in_executor(None, self._record_blocking)
|
|
98
|
+
|
|
99
|
+
await bus.emit(Event.RECORDING_STOP, source="whisper")
|
|
100
|
+
|
|
101
|
+
if audio is None or len(audio) < int(SAMPLE_RATE * 0.3):
|
|
102
|
+
log.debug("Recording too short or empty — skipping transcription")
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
return await self._transcribe_async(audio)
|
|
106
|
+
|
|
107
|
+
# ---------------------------------------------------------------- #
|
|
108
|
+
# Recording (blocking, runs in thread pool) #
|
|
109
|
+
# ---------------------------------------------------------------- #
|
|
110
|
+
def _record_blocking(self) -> np.ndarray | None:
|
|
111
|
+
chunks: list[np.ndarray] = []
|
|
112
|
+
silence_start: float | None = None
|
|
113
|
+
start = time.monotonic()
|
|
114
|
+
device = self._settings.microphone_index
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
# BUG FIX: use non-callback InputStream.read() correctly
|
|
118
|
+
with sd.InputStream(
|
|
119
|
+
samplerate=SAMPLE_RATE,
|
|
120
|
+
channels=CHANNELS,
|
|
121
|
+
dtype=DTYPE,
|
|
122
|
+
blocksize=CHUNK_FRAMES,
|
|
123
|
+
device=device,
|
|
124
|
+
) as stream:
|
|
125
|
+
log.info("Recording started (device=%s)", device)
|
|
126
|
+
while True:
|
|
127
|
+
# read() returns (data: ndarray, overflowed: bool)
|
|
128
|
+
chunk, _overflowed = stream.read(CHUNK_FRAMES)
|
|
129
|
+
chunks.append(chunk.copy())
|
|
130
|
+
|
|
131
|
+
rms = float(np.sqrt(np.mean(chunk ** 2)) + 1e-10)
|
|
132
|
+
rms_db = 20.0 * np.log10(rms)
|
|
133
|
+
elapsed = time.monotonic() - start
|
|
134
|
+
|
|
135
|
+
if rms_db > SILENCE_DB:
|
|
136
|
+
silence_start = None # reset silence timer on speech
|
|
137
|
+
else:
|
|
138
|
+
if silence_start is None:
|
|
139
|
+
silence_start = time.monotonic()
|
|
140
|
+
elif (time.monotonic() - silence_start) >= SILENCE_SECS:
|
|
141
|
+
log.info("End of speech detected (%.1f s)", elapsed)
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
if elapsed >= MAX_RECORD_S:
|
|
145
|
+
log.warning("Max recording duration reached")
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
except sd.PortAudioError as exc:
|
|
149
|
+
log.error("PortAudio error during recording: %s", exc)
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
if not chunks:
|
|
153
|
+
return None
|
|
154
|
+
return np.concatenate(chunks, axis=0).flatten()
|
|
155
|
+
|
|
156
|
+
# ---------------------------------------------------------------- #
|
|
157
|
+
# Transcription (fully async, runs on main event loop) #
|
|
158
|
+
# ---------------------------------------------------------------- #
|
|
159
|
+
async def _transcribe_async(self, audio: np.ndarray) -> str | None:
|
|
160
|
+
"""Acquire Whisper from VRAM manager and transcribe."""
|
|
161
|
+
async with vram.lease("whisper") as model:
|
|
162
|
+
loop = asyncio.get_running_loop()
|
|
163
|
+
# Run the synchronous faster-whisper call in the thread pool
|
|
164
|
+
text = await loop.run_in_executor(
|
|
165
|
+
None, self._transcribe_sync, model, audio
|
|
166
|
+
)
|
|
167
|
+
if text:
|
|
168
|
+
await bus.emit(
|
|
169
|
+
Event.TRANSCRIPT_READY,
|
|
170
|
+
data={"text": text},
|
|
171
|
+
source="whisper",
|
|
172
|
+
priority=2,
|
|
173
|
+
)
|
|
174
|
+
log.info("Transcript: %s", text[:120])
|
|
175
|
+
return text or None
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _transcribe_sync(model, audio: np.ndarray) -> str:
|
|
179
|
+
segments, info = model.transcribe(
|
|
180
|
+
audio,
|
|
181
|
+
beam_size=5,
|
|
182
|
+
language=None, # auto-detect
|
|
183
|
+
vad_filter=True,
|
|
184
|
+
vad_parameters={
|
|
185
|
+
"min_silence_duration_ms": 500,
|
|
186
|
+
"speech_pad_ms": 200,
|
|
187
|
+
},
|
|
188
|
+
word_timestamps=False,
|
|
189
|
+
condition_on_previous_text=False,
|
|
190
|
+
)
|
|
191
|
+
return " ".join(seg.text.strip() for seg in segments).strip()
|