scribe-cli 0.12.0__tar.gz → 0.12.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scribe_cli-0.12.0/scribe_cli.egg-info → scribe_cli-0.12.1}/PKG-INFO +1 -1
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/_version.py +2 -2
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/app.py +41 -7
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/models.py +44 -30
- {scribe_cli-0.12.0 → scribe_cli-0.12.1/scribe_cli.egg-info}/PKG-INFO +1 -1
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/.github/workflows/pypi.yml +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/.gitignore +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/LICENSE +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/README.md +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/icon.xcf +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/pyproject.toml +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/__init__.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/audio.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/install_desktop.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/keyboard.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/models.toml +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/saverecording.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/testpynput.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/util.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/SOURCES.txt +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/dependency_links.txt +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/entry_points.txt +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/requires.txt +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/top_level.txt +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/__init__.py +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/share/icon.png +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/share/icon_recording.png +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/share/icon_writing.png +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/templates/scribe.desktop +0 -0
- {scribe_cli-0.12.0 → scribe_cli-0.12.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: scribe-cli
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.1
|
|
4
4
|
Summary: scribe is a local speech recognition tool that provides real-time transcription using vosk and whisper AI, with the goal of serving as a virtual keyboard on a computer
|
|
5
5
|
Author-email: Mahé Perrette <mahe.perrette@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -3,6 +3,7 @@ import tomllib
|
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
5
|
import argparse
|
|
6
|
+
from typing import Iterable
|
|
6
7
|
from scribe.audio import Microphone
|
|
7
8
|
from scribe.util import print_partial, clear_line, prompt_choices, ansi_link, colored
|
|
8
9
|
from scribe.models import VoskTranscriber, WhisperTranscriber, OpenaiAPITranscriber
|
|
@@ -255,7 +256,7 @@ def start_recording(micro, transcriber, clipboard=True, keyboard=False, latency=
|
|
|
255
256
|
callback()
|
|
256
257
|
|
|
257
258
|
|
|
258
|
-
def create_app(micro, transcriber, other_transcribers=None, **kwargs):
|
|
259
|
+
def create_app(micro, transcriber, other_transcribers=None, transcriber_options=[], **kwargs):
|
|
259
260
|
import pystray
|
|
260
261
|
from pystray import Menu as pystrayMenu, MenuItem as Item
|
|
261
262
|
from PIL import Image
|
|
@@ -344,6 +345,9 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
|
|
|
344
345
|
|
|
345
346
|
def callback_set_model(icon, item):
|
|
346
347
|
transcriber = icon._transcriber
|
|
348
|
+
if transcriber.model_name == str(item):
|
|
349
|
+
transcriber.log(f"Already using model {str(item)}")
|
|
350
|
+
return
|
|
347
351
|
callback_stop_recording(icon, item)
|
|
348
352
|
model_name = str(item)
|
|
349
353
|
meta = other_transcribers_dict[model_name]
|
|
@@ -356,7 +360,23 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
|
|
|
356
360
|
|
|
357
361
|
def callback_toggle_option(icon, item):
|
|
358
362
|
callback_stop_recording(icon, item)
|
|
359
|
-
|
|
363
|
+
if str(item) in transcriber_options:
|
|
364
|
+
# toggle the option on the current transcriber
|
|
365
|
+
if str(item) in icon._transcriber._frozen_options or type(getattr(icon._transcriber, str(item), None)) is not bool:
|
|
366
|
+
print("Skipped setting option", item)
|
|
367
|
+
return
|
|
368
|
+
newvalue = not getattr(icon._transcriber, str(item))
|
|
369
|
+
setattr(icon._transcriber, str(item), newvalue)
|
|
370
|
+
# set the option on the other transcribers as well
|
|
371
|
+
if other_transcribers:
|
|
372
|
+
for name in other_transcribers_dict:
|
|
373
|
+
meta = other_transcribers_dict[name]
|
|
374
|
+
if str(item) in meta:
|
|
375
|
+
meta[str(item)] = newvalue
|
|
376
|
+
|
|
377
|
+
else:
|
|
378
|
+
kwargs[str(item)] = not kwargs[str(item)]
|
|
379
|
+
print("Option set [", item, "] to", kwargs[str(item)])
|
|
360
380
|
|
|
361
381
|
def is_model_selection(item):
|
|
362
382
|
return icon._model_selection
|
|
@@ -367,23 +387,34 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
|
|
|
367
387
|
def is_not_recording(item):
|
|
368
388
|
return not is_recording(item) and not is_model_selection(item)
|
|
369
389
|
|
|
370
|
-
def
|
|
390
|
+
def is_checked_model(item):
|
|
371
391
|
return icon._transcriber.model_name == str(item)
|
|
372
392
|
|
|
373
393
|
def is_checked_option(item):
|
|
394
|
+
if not is_option_visible(item):
|
|
395
|
+
return False
|
|
396
|
+
if str(item) in transcriber_options:
|
|
397
|
+
return getattr(icon._transcriber, str(item))
|
|
374
398
|
return kwargs[str(item)]
|
|
375
399
|
|
|
400
|
+
def is_option_visible(item):
|
|
401
|
+
if str(item) in transcriber_options:
|
|
402
|
+
return str(item) not in icon._transcriber._frozen_options
|
|
403
|
+
return True
|
|
404
|
+
|
|
376
405
|
modeltitle = f"{transcriber.backend} :: {transcriber.model_name}"
|
|
377
406
|
title = f"scribe :: {modeltitle}"
|
|
378
407
|
|
|
408
|
+
options = [name for name in kwargs if isinstance(kwargs[name], bool)] + [name for name in transcriber_options if isinstance(getattr(transcriber, name), bool)]
|
|
409
|
+
|
|
379
410
|
menus = []
|
|
380
411
|
menus.append(Item(f"Record", callback_record, visible=is_not_recording, default=True))
|
|
381
412
|
menus.append(Item("Stop", callback_stop_recording, visible=is_recording))
|
|
382
413
|
menus.append(Item("Choose Model", pystrayMenu(
|
|
383
|
-
*(Item(f"{name}", callback_set_model, checked=
|
|
414
|
+
*(Item(f"{name}", callback_set_model, checked=is_checked_model) for name in other_transcribers_dict)))
|
|
384
415
|
)
|
|
385
416
|
menus.append(Item("Toggle Options", pystrayMenu(
|
|
386
|
-
*(Item(f"{name}", callback_toggle_option, checked=is_checked_option) for name in
|
|
417
|
+
*(Item(f"{name}", callback_toggle_option, checked=is_checked_option, visible=is_option_visible) for name in options)))
|
|
387
418
|
)
|
|
388
419
|
menus.append(Item('Quit', callback_quit))
|
|
389
420
|
|
|
@@ -398,6 +429,8 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
|
|
|
398
429
|
|
|
399
430
|
return icon
|
|
400
431
|
|
|
432
|
+
def _filter_options(d: dict, exclude: Iterable) -> dict:
|
|
433
|
+
return {k: v for k, v in d.items() if k not in exclude}
|
|
401
434
|
|
|
402
435
|
def main(args=None):
|
|
403
436
|
|
|
@@ -531,9 +564,10 @@ def main(args=None):
|
|
|
531
564
|
app = create_app(micro, transcriber, other_transcribers=[
|
|
532
565
|
{**vars(o), "backend": "openaiapi", "model": "whisper-1"},
|
|
533
566
|
*[{**vars(o), "backend": "whisper", "model": model} for model in o.whisper_models],
|
|
534
|
-
*[{**vars(o), "backend": "vosk", "model": model} for model in o.vosk_models]],
|
|
567
|
+
*[{**_filter_options(vars(o), exclude=VoskTranscriber._frozen_options), "backend": "vosk", "model": model} for model in o.vosk_models]],
|
|
535
568
|
clipboard=o.clipboard, output_file=o.output_file,
|
|
536
|
-
keyboard=o.keyboard, latency=o.latency, ascii=o.ascii,
|
|
569
|
+
keyboard=o.keyboard, latency=o.latency, ascii=o.ascii,
|
|
570
|
+
transcriber_options=["restart_after_silence"], **greetings)
|
|
537
571
|
print("Starting app...")
|
|
538
572
|
app.run()
|
|
539
573
|
else:
|
|
@@ -16,11 +16,15 @@ HOME = os.environ.get('HOME', os.path.expanduser('~'))
|
|
|
16
16
|
XDG_CACHE_HOME = os.environ.get('XDG_CACHE_HOME', os.path.join(HOME, '.cache'))
|
|
17
17
|
VOSK_MODELS_FOLDER = os.path.join(XDG_CACHE_HOME, "vosk")
|
|
18
18
|
|
|
19
|
+
class SilenceDetected(Exception):
|
|
20
|
+
pass
|
|
21
|
+
|
|
19
22
|
class StopRecording(Exception):
|
|
20
23
|
pass
|
|
21
24
|
|
|
22
25
|
class AbstractTranscriber:
|
|
23
26
|
backend = None
|
|
27
|
+
_frozen_options = frozenset()
|
|
24
28
|
def __init__(self, model, model_name=None, language=None, samplerate=16000, timeout=None, model_kwargs={},
|
|
25
29
|
silence_thresh=-40, silence_duration=2, restart_after_silence=False, logger=None):
|
|
26
30
|
self.model_name = model_name
|
|
@@ -50,7 +54,29 @@ class AbstractTranscriber:
|
|
|
50
54
|
return self.timeout is not None and time.time() - self.start_time > self.timeout
|
|
51
55
|
|
|
52
56
|
def transcribe_realtime_audio(self, audio_bytes=b""):
|
|
53
|
-
|
|
57
|
+
|
|
58
|
+
# Vérifier si le segment est un silence
|
|
59
|
+
if is_silent(audio_bytes, self.silence_thresh):
|
|
60
|
+
self.silence_buffer += audio_bytes
|
|
61
|
+
silence_duration = time.time() - self.last_sound_time
|
|
62
|
+
self.waiting = self.silence_duration is not None and silence_duration >= self.silence_duration
|
|
63
|
+
|
|
64
|
+
if self.waiting and len(self.audio_buffer) > 0:
|
|
65
|
+
if self.restart_after_silence:
|
|
66
|
+
raise SilenceDetected("Silence detected: {:.2f} seconds".format(silence_duration))
|
|
67
|
+
else:
|
|
68
|
+
raise StopRecording("Silence detected: {:.2f} seconds".format(silence_duration))
|
|
69
|
+
|
|
70
|
+
else:
|
|
71
|
+
self.last_sound_time = time.time()
|
|
72
|
+
self.waiting = False
|
|
73
|
+
silence_buffer_data = np.frombuffer(self.silence_buffer, dtype=np.int16)
|
|
74
|
+
# add 0.5 seconds worth of silent data back to the audio buffer
|
|
75
|
+
half_a_second = 0.5
|
|
76
|
+
length_of_half_a_second = int(half_a_second * self.samplerate)
|
|
77
|
+
self.audio_buffer += silence_buffer_data[-length_of_half_a_second:].tobytes() + audio_bytes
|
|
78
|
+
self.silence_buffer = b''
|
|
79
|
+
|
|
54
80
|
return {"partial": f"{len(self.audio_buffer)} bytes received (duration: {self.get_elapsed()} seconds)"}
|
|
55
81
|
|
|
56
82
|
def transcribe_audio(self, audio_data):
|
|
@@ -59,6 +85,7 @@ class AbstractTranscriber:
|
|
|
59
85
|
def reset(self):
|
|
60
86
|
self.audio_buffer = b''
|
|
61
87
|
self.start_time = time.time()
|
|
88
|
+
self.silence_buffer = b''
|
|
62
89
|
|
|
63
90
|
def log(self, text):
|
|
64
91
|
if text.startswith("\n"):
|
|
@@ -82,7 +109,7 @@ class AbstractTranscriber:
|
|
|
82
109
|
self.last_sound_time = time.time() - self.silence_duration
|
|
83
110
|
else:
|
|
84
111
|
self.last_sound_time = time.time()
|
|
85
|
-
|
|
112
|
+
# self.silence_buffer = b'' # already reset in self.reset()
|
|
86
113
|
|
|
87
114
|
try:
|
|
88
115
|
|
|
@@ -93,35 +120,20 @@ class AbstractTranscriber:
|
|
|
93
120
|
while not microphone.q.empty():
|
|
94
121
|
data = microphone.q.get()
|
|
95
122
|
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
silence_duration = time.time() - self.last_sound_time
|
|
99
|
-
|
|
100
|
-
previous_waiting = self.waiting
|
|
101
|
-
self.waiting = self.silence_duration is not None and silence_duration >= self.silence_duration
|
|
102
|
-
|
|
103
|
-
if self.waiting and len(self.audio_buffer) > 0:
|
|
104
|
-
if self.restart_after_silence:
|
|
105
|
-
self.recording = False # for the system tray icon
|
|
106
|
-
result = self.finalize()
|
|
107
|
-
microphone.q.queue.clear()
|
|
108
|
-
self.reset()
|
|
109
|
-
yield result
|
|
110
|
-
self.recording = True # for the system tray icon
|
|
111
|
-
else:
|
|
112
|
-
raise StopRecording("Silence detected: {:.2f} seconds".format(silence_duration))
|
|
113
|
-
|
|
114
|
-
else:
|
|
115
|
-
self.last_sound_time = time.time()
|
|
116
|
-
self.waiting = False
|
|
117
|
-
|
|
118
|
-
# don't accumulate very long silences
|
|
119
|
-
if not self.waiting:
|
|
123
|
+
# leave it to each transcriber to handle the silence in audio data
|
|
124
|
+
try:
|
|
120
125
|
yield self.transcribe_realtime_audio(data)
|
|
121
126
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
127
|
+
# This exception triggers a pause in recording to allow for a transcription of the audio buffer
|
|
128
|
+
except SilenceDetected as e:
|
|
129
|
+
self.log(str(e))
|
|
130
|
+
self.recording = False # for the system tray icon
|
|
131
|
+
result = self.finalize()
|
|
132
|
+
microphone.q.queue.clear()
|
|
133
|
+
self.reset()
|
|
134
|
+
yield result
|
|
135
|
+
self.recording = True # for the system tray icon
|
|
136
|
+
self.start_time = time.time() # reset the start time to avoid timeout
|
|
125
137
|
|
|
126
138
|
if self.is_overtime():
|
|
127
139
|
raise StopRecording("Overtime: {:.2f} seconds".format(self.get_elapsed()))
|
|
@@ -165,8 +177,10 @@ def get_vosk_recognizer(model, samplerate=16000):
|
|
|
165
177
|
|
|
166
178
|
class VoskTranscriber(AbstractTranscriber):
|
|
167
179
|
backend = "vosk"
|
|
180
|
+
_frozen_options = frozenset(["restart_after_silence", "silence_duration", "silence_thresh"])
|
|
168
181
|
|
|
169
182
|
def __init__(self, model_name, model=None, model_kwargs={}, **kwargs):
|
|
183
|
+
kwargs["silence_thresh"] = -np.inf # disable silence detection (this is handled by Vosk)
|
|
170
184
|
if model is None:
|
|
171
185
|
model = get_vosk_model(model_name, **model_kwargs)
|
|
172
186
|
super().__init__(model, model_name, model_kwargs=model_kwargs, **kwargs)
|
|
@@ -222,7 +236,7 @@ class WhisperTranscriber(AbstractTranscriber):
|
|
|
222
236
|
if len(self.audio_buffer) == 0:
|
|
223
237
|
return {"text": ""}
|
|
224
238
|
result = self.transcribe_audio(self.audio_buffer)
|
|
225
|
-
self.
|
|
239
|
+
self.reset()
|
|
226
240
|
return result
|
|
227
241
|
|
|
228
242
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: scribe-cli
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.1
|
|
4
4
|
Summary: scribe is a local speech recognition tool that provides real-time transcription using vosk and whisper AI, with the goal of serving as a virtual keyboard on a computer
|
|
5
5
|
Author-email: Mahé Perrette <mahe.perrette@gmail.com>
|
|
6
6
|
License: MIT License
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|