easyspeak-linux 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ """EasySpeak core functionality."""
@@ -0,0 +1,12 @@
1
+ """
2
+ Application entry point when executed as a module, e.g.
3
+
4
+ .. code:: console
5
+
6
+ python -m easyspeak.core
7
+ """
8
+
9
+ from .main import run
10
+
11
+ if __name__ == "__main__":
12
+ run()
easyspeak/core/main.py ADDED
@@ -0,0 +1,312 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ EasySpeak Core - Voice Control for Linux
4
+
5
+ Loads plugins from plugins/ folder automatically.
6
+ Uses OpenWakeWord for fast wake detection.
7
+ """
8
+
9
+ import importlib
10
+ import os
11
+ import subprocess
12
+ import sys
13
+ import tempfile
14
+ import time
15
+ import wave
16
+ from pathlib import Path
17
+
18
+ import numpy as np
19
+ import pyaudio
20
+ from faster_whisper import WhisperModel
21
+ from openwakeword.model import Model as WakeWordModel
22
+
23
+ # =============================================================================
24
+ # CONFIGURATION
25
+ # =============================================================================
26
+
27
+ WAKE_WORD = "hey_jarvis" # OpenWakeWord model name
28
+ PIPER_MODEL = os.path.expanduser("~/.local/share/piper/en_US-amy-medium.onnx")
29
+ WHISPER_MODEL = "base.en"
30
+ SILENCE_THRESHOLD = 300
31
+ SILENCE_DURATION = 0.3
32
+ WAKE_THRESHOLD = 0.5
33
+ WAKE_COOLDOWN = 3.0 # Seconds to ignore wake word after trigger
34
+
35
+ # Prompt to help Whisper recognize common commands
36
+ COMMAND_PROMPT = (
37
+ "numbers, scroll, click, open, close, back, forward, volume, brightness, stop"
38
+ )
39
+
40
+ # =============================================================================
41
+ # CORE CLASS
42
+ # =============================================================================
43
+
44
+
45
+ class EasySpeak:
46
+ def __init__(self):
47
+ self.plugins = []
48
+ self.whisper = None
49
+ self.wakeword = None
50
+ self.audio = None
51
+ self.stream = None
52
+ self.last_wake_time = 0
53
+
54
+ # --- Utilities for plugins ---
55
+
56
+ def host_run(self, cmd, background=False):
57
+ """Run a shell command."""
58
+ if background:
59
+ return subprocess.Popen(
60
+ cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
61
+ )
62
+ return subprocess.run(cmd, capture_output=True, text=True)
63
+
64
+ def speak(self, text):
65
+ """Text-to-speech output."""
66
+ print(f"💬 {text}")
67
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
68
+ output_file = f.name
69
+
70
+ subprocess.Popen(
71
+ ["piper", "--model", PIPER_MODEL, "--output_file", output_file],
72
+ stdin=subprocess.PIPE,
73
+ stdout=subprocess.PIPE,
74
+ stderr=subprocess.PIPE,
75
+ ).communicate(input=text.encode())
76
+
77
+ subprocess.run(
78
+ ["ffplay", "-nodisp", "-autoexit", output_file],
79
+ stdout=subprocess.DEVNULL,
80
+ stderr=subprocess.DEVNULL,
81
+ )
82
+ os.remove(output_file)
83
+
84
+ # --- Plugin management ---
85
+
86
+ def load_plugins(self):
87
+ plugins_dir = Path(__file__).parent / "plugins"
88
+ if not plugins_dir.exists():
89
+ print("No plugins directory found")
90
+ return
91
+
92
+ sys.path.insert(0, str(plugins_dir.parent))
93
+
94
+ for file in sorted(plugins_dir.glob("*.py")):
95
+ if file.name.startswith("_"):
96
+ continue
97
+
98
+ module_name = f"plugins.{file.stem}"
99
+ try:
100
+ module = importlib.import_module(module_name)
101
+
102
+ if hasattr(module, "NAME") and hasattr(module, "handle"):
103
+ if hasattr(module, "setup"):
104
+ module.setup(self)
105
+
106
+ self.plugins.append(module)
107
+ print(f" ✓ Loaded: {module.NAME}")
108
+ else:
109
+ print(f" ✗ Invalid plugin: {file.name} (missing NAME or handle)")
110
+ except Exception as e:
111
+ print(f" ✗ Failed to load {file.name}: {e}")
112
+
113
+ def get_all_commands(self):
114
+ """Get all commands from all plugins for help text"""
115
+ commands = []
116
+ for plugin in self.plugins:
117
+ if hasattr(plugin, "COMMANDS"):
118
+ commands.extend(plugin.COMMANDS)
119
+ return commands
120
+
121
+ def route_command(self, cmd):
122
+ """Route command to appropriate plugin. Returns False to exit."""
123
+ cmd = cmd.lower()
124
+ for wake in [
125
+ "hey jarvis",
126
+ "hey jarvis,",
127
+ "hey, jarvis",
128
+ "hey, jarvis,",
129
+ "hey jarvis.",
130
+ "jarvis",
131
+ "jarvis,",
132
+ ]:
133
+ cmd = cmd.replace(wake, "").strip()
134
+ cmd = cmd.strip(".,!? ")
135
+
136
+ if not cmd:
137
+ return True
138
+
139
+ for plugin in self.plugins:
140
+ try:
141
+ result = plugin.handle(cmd, self)
142
+ if result is True:
143
+ return True
144
+ elif result is False:
145
+ return False
146
+ except Exception as e:
147
+ print(f"Plugin error ({plugin.NAME}): {e}")
148
+
149
+ self.speak("I didn't understand. Say help for commands.")
150
+ return True
151
+
152
+ # --- Audio ---
153
+
154
+ def flush_stream(self):
155
+ """Flush any remaining audio data from the stream buffer."""
156
+ try:
157
+ self.stream.read(
158
+ self.stream.get_read_available(),
159
+ exception_on_overflow=False,
160
+ )
161
+ except:
162
+ pass # intentionally catch all to prevent cleanup failures
163
+
164
+ def is_silence(self, audio_chunk):
165
+ return np.abs(audio_chunk).mean() < SILENCE_THRESHOLD
166
+
167
+ def record_until_silence(self):
168
+ frames = []
169
+ silent_chunks = 0
170
+ chunks_needed = int(SILENCE_DURATION * 16000 / 1600)
171
+
172
+ for i in range(int(5 * 16000 / 1600)):
173
+ pcm = self.stream.read(1600, exception_on_overflow=False)
174
+ frames.append(pcm)
175
+
176
+ if i >= 5:
177
+ if self.is_silence(np.frombuffer(pcm, dtype=np.int16)):
178
+ silent_chunks += 1
179
+ if silent_chunks >= chunks_needed:
180
+ break
181
+ else:
182
+ silent_chunks = 0
183
+
184
+ return b"".join(frames)
185
+
186
+ def wait_for_speech(self, timeout=5):
187
+ for _ in range(int(timeout * 16000 / 1600)):
188
+ pcm = self.stream.read(1600, exception_on_overflow=False)
189
+ if not self.is_silence(np.frombuffer(pcm, dtype=np.int16)):
190
+ return pcm
191
+ return None
192
+
193
+ def transcribe(self, audio_data, prompt=None):
194
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
195
+ wf = wave.open(f.name, "wb")
196
+ wf.setnchannels(1)
197
+ wf.setsampwidth(2)
198
+ wf.setframerate(16000)
199
+ wf.writeframes(audio_data)
200
+ wf.close()
201
+
202
+ use_prompt = prompt if prompt else COMMAND_PROMPT
203
+ segments, _ = self.whisper.transcribe(
204
+ f.name, initial_prompt=use_prompt, beam_size=1, vad_filter=True
205
+ )
206
+ text = " ".join([s.text for s in segments]).strip()
207
+ os.remove(f.name)
208
+ return text
209
+
210
+ # --- Main loop ---
211
+
212
+ def run(self):
213
+ print("Loading OpenWakeWord...")
214
+ self.wakeword = WakeWordModel()
215
+
216
+ print("Loading Whisper...")
217
+ self.whisper = WhisperModel(WHISPER_MODEL, compute_type="int8")
218
+
219
+ print("\nLoading plugins...")
220
+ self.load_plugins()
221
+
222
+ if not self.plugins:
223
+ print("No plugins loaded. Exiting.")
224
+ return
225
+
226
+ print("""
227
+ ╔══════════════════════════════════════════╗
228
+ ║ EasySpeak ║
229
+ ╠══════════════════════════════════════════╣
230
+ ║ Wake word: "Hey Jarvis" ║
231
+ ║ Say "help" for available commands ║
232
+ ╚══════════════════════════════════════════╝
233
+ """)
234
+
235
+ self.audio = pyaudio.PyAudio()
236
+ self.stream = self.audio.open(
237
+ format=pyaudio.paInt16,
238
+ channels=1,
239
+ rate=16000,
240
+ input=True,
241
+ frames_per_buffer=1280,
242
+ )
243
+
244
+ try:
245
+ print("Listening for wake word...")
246
+ audio_buffer = []
247
+
248
+ while True:
249
+ pcm = self.stream.read(1280, exception_on_overflow=False)
250
+ audio_data = np.frombuffer(pcm, dtype=np.int16)
251
+
252
+ audio_buffer.append(pcm)
253
+ if len(audio_buffer) > 50:
254
+ audio_buffer.pop(0)
255
+
256
+ prediction = self.wakeword.predict(audio_data)
257
+ score = prediction.get(WAKE_WORD, 0)
258
+
259
+ if score > WAKE_THRESHOLD:
260
+ # Cooldown check - ignore if triggered recently
261
+ now = time.time()
262
+ if now - self.last_wake_time < WAKE_COOLDOWN:
263
+ continue
264
+ self.last_wake_time = now
265
+
266
+ print(f"🎤 Wake! (confidence: {score:.2f})")
267
+
268
+ # Reset everything
269
+ self.wakeword.reset()
270
+ audio_buffer = []
271
+ self.flush_stream()
272
+
273
+ # Audio feedback first
274
+ subprocess.run(
275
+ ["paplay", "/usr/share/sounds/freedesktop/stereo/message.oga"],
276
+ capture_output=True,
277
+ )
278
+
279
+ # Flush any audio captured during beep
280
+ self.flush_stream()
281
+
282
+ # Wait for user to start speaking (up to 5 seconds)
283
+ first = self.wait_for_speech(timeout=5)
284
+
285
+ if first:
286
+ # User started speaking, record until they stop
287
+ audio = first + self.record_until_silence()
288
+ cmd = self.transcribe(audio)
289
+ if cmd:
290
+ print(f"👂 {cmd}")
291
+ if not self.route_command(cmd.lower().strip(".,!? ")):
292
+ break
293
+ self.wakeword.reset()
294
+ self.flush_stream()
295
+ else:
296
+ self.speak("I didn't hear anything.")
297
+
298
+ audio_buffer = []
299
+ print("Listening for wake word...")
300
+
301
+ except KeyboardInterrupt:
302
+ print("\nBye!")
303
+ finally:
304
+ self.stream.stop_stream()
305
+ self.stream.close()
306
+ self.audio.terminate()
307
+
308
+
309
+ def run():
310
+ """Start the application."""
311
+ app = EasySpeak()
312
+ app.run()