easyspeak-linux 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- easyspeak/core/__init__.py +1 -0
- easyspeak/core/__main__.py +12 -0
- easyspeak/core/main.py +312 -0
- easyspeak/plugins/00_eyetrack.py +511 -0
- easyspeak/plugins/00_mousegrid.py +516 -0
- easyspeak/plugins/__init__.py +1 -0
- easyspeak/plugins/apps.py +93 -0
- easyspeak/plugins/browser.py +573 -0
- easyspeak/plugins/dictation.py +243 -0
- easyspeak/plugins/files.py +64 -0
- easyspeak/plugins/media.py +92 -0
- easyspeak/plugins/system.py +122 -0
- easyspeak/plugins/zz_base.py +50 -0
- easyspeak_linux-0.1.0.dist-info/METADATA +527 -0
- easyspeak_linux-0.1.0.dist-info/RECORD +19 -0
- easyspeak_linux-0.1.0.dist-info/WHEEL +5 -0
- easyspeak_linux-0.1.0.dist-info/entry_points.txt +2 -0
- easyspeak_linux-0.1.0.dist-info/licenses/LICENSE +674 -0
- easyspeak_linux-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""EasySpeak core functionality."""
|
easyspeak/core/main.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
EasySpeak Core - Voice Control for Linux
|
|
4
|
+
|
|
5
|
+
Loads plugins from plugins/ folder automatically.
|
|
6
|
+
Uses OpenWakeWord for fast wake detection.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import importlib
|
|
10
|
+
import os
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
import tempfile
|
|
14
|
+
import time
|
|
15
|
+
import wave
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pyaudio
|
|
20
|
+
from faster_whisper import WhisperModel
|
|
21
|
+
from openwakeword.model import Model as WakeWordModel
|
|
22
|
+
|
|
23
|
+
# =============================================================================
|
|
24
|
+
# CONFIGURATION
|
|
25
|
+
# =============================================================================
|
|
26
|
+
|
|
27
|
+
WAKE_WORD = "hey_jarvis" # OpenWakeWord model name
|
|
28
|
+
PIPER_MODEL = os.path.expanduser("~/.local/share/piper/en_US-amy-medium.onnx")
|
|
29
|
+
WHISPER_MODEL = "base.en"
|
|
30
|
+
SILENCE_THRESHOLD = 300
|
|
31
|
+
SILENCE_DURATION = 0.3
|
|
32
|
+
WAKE_THRESHOLD = 0.5
|
|
33
|
+
WAKE_COOLDOWN = 3.0 # Seconds to ignore wake word after trigger
|
|
34
|
+
|
|
35
|
+
# Prompt to help Whisper recognize common commands
|
|
36
|
+
COMMAND_PROMPT = (
|
|
37
|
+
"numbers, scroll, click, open, close, back, forward, volume, brightness, stop"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# =============================================================================
|
|
41
|
+
# CORE CLASS
|
|
42
|
+
# =============================================================================
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class EasySpeak:
|
|
46
|
+
def __init__(self):
|
|
47
|
+
self.plugins = []
|
|
48
|
+
self.whisper = None
|
|
49
|
+
self.wakeword = None
|
|
50
|
+
self.audio = None
|
|
51
|
+
self.stream = None
|
|
52
|
+
self.last_wake_time = 0
|
|
53
|
+
|
|
54
|
+
# --- Utilities for plugins ---
|
|
55
|
+
|
|
56
|
+
def host_run(self, cmd, background=False):
|
|
57
|
+
"""Run a shell command."""
|
|
58
|
+
if background:
|
|
59
|
+
return subprocess.Popen(
|
|
60
|
+
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
|
|
61
|
+
)
|
|
62
|
+
return subprocess.run(cmd, capture_output=True, text=True)
|
|
63
|
+
|
|
64
|
+
def speak(self, text):
|
|
65
|
+
"""Text-to-speech output."""
|
|
66
|
+
print(f"💬 {text}")
|
|
67
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
68
|
+
output_file = f.name
|
|
69
|
+
|
|
70
|
+
subprocess.Popen(
|
|
71
|
+
["piper", "--model", PIPER_MODEL, "--output_file", output_file],
|
|
72
|
+
stdin=subprocess.PIPE,
|
|
73
|
+
stdout=subprocess.PIPE,
|
|
74
|
+
stderr=subprocess.PIPE,
|
|
75
|
+
).communicate(input=text.encode())
|
|
76
|
+
|
|
77
|
+
subprocess.run(
|
|
78
|
+
["ffplay", "-nodisp", "-autoexit", output_file],
|
|
79
|
+
stdout=subprocess.DEVNULL,
|
|
80
|
+
stderr=subprocess.DEVNULL,
|
|
81
|
+
)
|
|
82
|
+
os.remove(output_file)
|
|
83
|
+
|
|
84
|
+
# --- Plugin management ---
|
|
85
|
+
|
|
86
|
+
def load_plugins(self):
|
|
87
|
+
plugins_dir = Path(__file__).parent / "plugins"
|
|
88
|
+
if not plugins_dir.exists():
|
|
89
|
+
print("No plugins directory found")
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
sys.path.insert(0, str(plugins_dir.parent))
|
|
93
|
+
|
|
94
|
+
for file in sorted(plugins_dir.glob("*.py")):
|
|
95
|
+
if file.name.startswith("_"):
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
module_name = f"plugins.{file.stem}"
|
|
99
|
+
try:
|
|
100
|
+
module = importlib.import_module(module_name)
|
|
101
|
+
|
|
102
|
+
if hasattr(module, "NAME") and hasattr(module, "handle"):
|
|
103
|
+
if hasattr(module, "setup"):
|
|
104
|
+
module.setup(self)
|
|
105
|
+
|
|
106
|
+
self.plugins.append(module)
|
|
107
|
+
print(f" ✓ Loaded: {module.NAME}")
|
|
108
|
+
else:
|
|
109
|
+
print(f" ✗ Invalid plugin: {file.name} (missing NAME or handle)")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
print(f" ✗ Failed to load {file.name}: {e}")
|
|
112
|
+
|
|
113
|
+
def get_all_commands(self):
|
|
114
|
+
"""Get all commands from all plugins for help text"""
|
|
115
|
+
commands = []
|
|
116
|
+
for plugin in self.plugins:
|
|
117
|
+
if hasattr(plugin, "COMMANDS"):
|
|
118
|
+
commands.extend(plugin.COMMANDS)
|
|
119
|
+
return commands
|
|
120
|
+
|
|
121
|
+
def route_command(self, cmd):
|
|
122
|
+
"""Route command to appropriate plugin. Returns False to exit."""
|
|
123
|
+
cmd = cmd.lower()
|
|
124
|
+
for wake in [
|
|
125
|
+
"hey jarvis",
|
|
126
|
+
"hey jarvis,",
|
|
127
|
+
"hey, jarvis",
|
|
128
|
+
"hey, jarvis,",
|
|
129
|
+
"hey jarvis.",
|
|
130
|
+
"jarvis",
|
|
131
|
+
"jarvis,",
|
|
132
|
+
]:
|
|
133
|
+
cmd = cmd.replace(wake, "").strip()
|
|
134
|
+
cmd = cmd.strip(".,!? ")
|
|
135
|
+
|
|
136
|
+
if not cmd:
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
for plugin in self.plugins:
|
|
140
|
+
try:
|
|
141
|
+
result = plugin.handle(cmd, self)
|
|
142
|
+
if result is True:
|
|
143
|
+
return True
|
|
144
|
+
elif result is False:
|
|
145
|
+
return False
|
|
146
|
+
except Exception as e:
|
|
147
|
+
print(f"Plugin error ({plugin.NAME}): {e}")
|
|
148
|
+
|
|
149
|
+
self.speak("I didn't understand. Say help for commands.")
|
|
150
|
+
return True
|
|
151
|
+
|
|
152
|
+
# --- Audio ---
|
|
153
|
+
|
|
154
|
+
def flush_stream(self):
|
|
155
|
+
"""Flush any remaining audio data from the stream buffer."""
|
|
156
|
+
try:
|
|
157
|
+
self.stream.read(
|
|
158
|
+
self.stream.get_read_available(),
|
|
159
|
+
exception_on_overflow=False,
|
|
160
|
+
)
|
|
161
|
+
except:
|
|
162
|
+
pass # intentionally catch all to prevent cleanup failures
|
|
163
|
+
|
|
164
|
+
def is_silence(self, audio_chunk):
|
|
165
|
+
return np.abs(audio_chunk).mean() < SILENCE_THRESHOLD
|
|
166
|
+
|
|
167
|
+
def record_until_silence(self):
|
|
168
|
+
frames = []
|
|
169
|
+
silent_chunks = 0
|
|
170
|
+
chunks_needed = int(SILENCE_DURATION * 16000 / 1600)
|
|
171
|
+
|
|
172
|
+
for i in range(int(5 * 16000 / 1600)):
|
|
173
|
+
pcm = self.stream.read(1600, exception_on_overflow=False)
|
|
174
|
+
frames.append(pcm)
|
|
175
|
+
|
|
176
|
+
if i >= 5:
|
|
177
|
+
if self.is_silence(np.frombuffer(pcm, dtype=np.int16)):
|
|
178
|
+
silent_chunks += 1
|
|
179
|
+
if silent_chunks >= chunks_needed:
|
|
180
|
+
break
|
|
181
|
+
else:
|
|
182
|
+
silent_chunks = 0
|
|
183
|
+
|
|
184
|
+
return b"".join(frames)
|
|
185
|
+
|
|
186
|
+
def wait_for_speech(self, timeout=5):
|
|
187
|
+
for _ in range(int(timeout * 16000 / 1600)):
|
|
188
|
+
pcm = self.stream.read(1600, exception_on_overflow=False)
|
|
189
|
+
if not self.is_silence(np.frombuffer(pcm, dtype=np.int16)):
|
|
190
|
+
return pcm
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
def transcribe(self, audio_data, prompt=None):
|
|
194
|
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
|
195
|
+
wf = wave.open(f.name, "wb")
|
|
196
|
+
wf.setnchannels(1)
|
|
197
|
+
wf.setsampwidth(2)
|
|
198
|
+
wf.setframerate(16000)
|
|
199
|
+
wf.writeframes(audio_data)
|
|
200
|
+
wf.close()
|
|
201
|
+
|
|
202
|
+
use_prompt = prompt if prompt else COMMAND_PROMPT
|
|
203
|
+
segments, _ = self.whisper.transcribe(
|
|
204
|
+
f.name, initial_prompt=use_prompt, beam_size=1, vad_filter=True
|
|
205
|
+
)
|
|
206
|
+
text = " ".join([s.text for s in segments]).strip()
|
|
207
|
+
os.remove(f.name)
|
|
208
|
+
return text
|
|
209
|
+
|
|
210
|
+
# --- Main loop ---
|
|
211
|
+
|
|
212
|
+
def run(self):
|
|
213
|
+
print("Loading OpenWakeWord...")
|
|
214
|
+
self.wakeword = WakeWordModel()
|
|
215
|
+
|
|
216
|
+
print("Loading Whisper...")
|
|
217
|
+
self.whisper = WhisperModel(WHISPER_MODEL, compute_type="int8")
|
|
218
|
+
|
|
219
|
+
print("\nLoading plugins...")
|
|
220
|
+
self.load_plugins()
|
|
221
|
+
|
|
222
|
+
if not self.plugins:
|
|
223
|
+
print("No plugins loaded. Exiting.")
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
print("""
|
|
227
|
+
╔══════════════════════════════════════════╗
|
|
228
|
+
║ EasySpeak ║
|
|
229
|
+
╠══════════════════════════════════════════╣
|
|
230
|
+
║ Wake word: "Hey Jarvis" ║
|
|
231
|
+
║ Say "help" for available commands ║
|
|
232
|
+
╚══════════════════════════════════════════╝
|
|
233
|
+
""")
|
|
234
|
+
|
|
235
|
+
self.audio = pyaudio.PyAudio()
|
|
236
|
+
self.stream = self.audio.open(
|
|
237
|
+
format=pyaudio.paInt16,
|
|
238
|
+
channels=1,
|
|
239
|
+
rate=16000,
|
|
240
|
+
input=True,
|
|
241
|
+
frames_per_buffer=1280,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
print("Listening for wake word...")
|
|
246
|
+
audio_buffer = []
|
|
247
|
+
|
|
248
|
+
while True:
|
|
249
|
+
pcm = self.stream.read(1280, exception_on_overflow=False)
|
|
250
|
+
audio_data = np.frombuffer(pcm, dtype=np.int16)
|
|
251
|
+
|
|
252
|
+
audio_buffer.append(pcm)
|
|
253
|
+
if len(audio_buffer) > 50:
|
|
254
|
+
audio_buffer.pop(0)
|
|
255
|
+
|
|
256
|
+
prediction = self.wakeword.predict(audio_data)
|
|
257
|
+
score = prediction.get(WAKE_WORD, 0)
|
|
258
|
+
|
|
259
|
+
if score > WAKE_THRESHOLD:
|
|
260
|
+
# Cooldown check - ignore if triggered recently
|
|
261
|
+
now = time.time()
|
|
262
|
+
if now - self.last_wake_time < WAKE_COOLDOWN:
|
|
263
|
+
continue
|
|
264
|
+
self.last_wake_time = now
|
|
265
|
+
|
|
266
|
+
print(f"🎤 Wake! (confidence: {score:.2f})")
|
|
267
|
+
|
|
268
|
+
# Reset everything
|
|
269
|
+
self.wakeword.reset()
|
|
270
|
+
audio_buffer = []
|
|
271
|
+
self.flush_stream()
|
|
272
|
+
|
|
273
|
+
# Audio feedback first
|
|
274
|
+
subprocess.run(
|
|
275
|
+
["paplay", "/usr/share/sounds/freedesktop/stereo/message.oga"],
|
|
276
|
+
capture_output=True,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Flush any audio captured during beep
|
|
280
|
+
self.flush_stream()
|
|
281
|
+
|
|
282
|
+
# Wait for user to start speaking (up to 5 seconds)
|
|
283
|
+
first = self.wait_for_speech(timeout=5)
|
|
284
|
+
|
|
285
|
+
if first:
|
|
286
|
+
# User started speaking, record until they stop
|
|
287
|
+
audio = first + self.record_until_silence()
|
|
288
|
+
cmd = self.transcribe(audio)
|
|
289
|
+
if cmd:
|
|
290
|
+
print(f"👂 {cmd}")
|
|
291
|
+
if not self.route_command(cmd.lower().strip(".,!? ")):
|
|
292
|
+
break
|
|
293
|
+
self.wakeword.reset()
|
|
294
|
+
self.flush_stream()
|
|
295
|
+
else:
|
|
296
|
+
self.speak("I didn't hear anything.")
|
|
297
|
+
|
|
298
|
+
audio_buffer = []
|
|
299
|
+
print("Listening for wake word...")
|
|
300
|
+
|
|
301
|
+
except KeyboardInterrupt:
|
|
302
|
+
print("\nBye!")
|
|
303
|
+
finally:
|
|
304
|
+
self.stream.stop_stream()
|
|
305
|
+
self.stream.close()
|
|
306
|
+
self.audio.terminate()
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def run():
|
|
310
|
+
"""Start the application."""
|
|
311
|
+
app = EasySpeak()
|
|
312
|
+
app.run()
|