scribe-cli 0.12.0__tar.gz → 0.12.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {scribe_cli-0.12.0/scribe_cli.egg-info → scribe_cli-0.12.1}/PKG-INFO +1 -1
  2. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/_version.py +2 -2
  3. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/app.py +41 -7
  4. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/models.py +44 -30
  5. {scribe_cli-0.12.0 → scribe_cli-0.12.1/scribe_cli.egg-info}/PKG-INFO +1 -1
  6. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/.github/workflows/pypi.yml +0 -0
  7. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/.gitignore +0 -0
  8. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/LICENSE +0 -0
  9. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/README.md +0 -0
  10. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/icon.xcf +0 -0
  11. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/pyproject.toml +0 -0
  12. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/__init__.py +0 -0
  13. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/audio.py +0 -0
  14. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/install_desktop.py +0 -0
  15. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/keyboard.py +0 -0
  16. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/models.toml +0 -0
  17. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/saverecording.py +0 -0
  18. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/testpynput.py +0 -0
  19. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe/util.py +0 -0
  20. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/SOURCES.txt +0 -0
  21. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/dependency_links.txt +0 -0
  22. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/entry_points.txt +0 -0
  23. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/requires.txt +0 -0
  24. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_cli.egg-info/top_level.txt +0 -0
  25. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/__init__.py +0 -0
  26. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/share/icon.png +0 -0
  27. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/share/icon_recording.png +0 -0
  28. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/share/icon_writing.png +0 -0
  29. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/scribe_data/templates/scribe.desktop +0 -0
  30. {scribe_cli-0.12.0 → scribe_cli-0.12.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scribe-cli
3
- Version: 0.12.0
3
+ Version: 0.12.1
4
4
  Summary: scribe is a local speech recognition tool that provides real-time transcription using vosk and whisper AI, with the goal of serving as a virtual keyboard on a computer
5
5
  Author-email: Mahé Perrette <mahe.perrette@gmail.com>
6
6
  License: MIT License
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.12.0'
16
- __version_tuple__ = version_tuple = (0, 12, 0)
15
+ __version__ = version = '0.12.1'
16
+ __version_tuple__ = version_tuple = (0, 12, 1)
@@ -3,6 +3,7 @@ import tomllib
3
3
  import re
4
4
  import time
5
5
  import argparse
6
+ from typing import Iterable
6
7
  from scribe.audio import Microphone
7
8
  from scribe.util import print_partial, clear_line, prompt_choices, ansi_link, colored
8
9
  from scribe.models import VoskTranscriber, WhisperTranscriber, OpenaiAPITranscriber
@@ -255,7 +256,7 @@ def start_recording(micro, transcriber, clipboard=True, keyboard=False, latency=
255
256
  callback()
256
257
 
257
258
 
258
- def create_app(micro, transcriber, other_transcribers=None, **kwargs):
259
+ def create_app(micro, transcriber, other_transcribers=None, transcriber_options=[], **kwargs):
259
260
  import pystray
260
261
  from pystray import Menu as pystrayMenu, MenuItem as Item
261
262
  from PIL import Image
@@ -344,6 +345,9 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
344
345
 
345
346
  def callback_set_model(icon, item):
346
347
  transcriber = icon._transcriber
348
+ if transcriber.model_name == str(item):
349
+ transcriber.log(f"Already using model {str(item)}")
350
+ return
347
351
  callback_stop_recording(icon, item)
348
352
  model_name = str(item)
349
353
  meta = other_transcribers_dict[model_name]
@@ -356,7 +360,23 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
356
360
 
357
361
  def callback_toggle_option(icon, item):
358
362
  callback_stop_recording(icon, item)
359
- kwargs[str(item)] = not kwargs[str(item)]
363
+ if str(item) in transcriber_options:
364
+ # toggle the option on the current transcriber
365
+ if str(item) in icon._transcriber._frozen_options or type(getattr(icon._transcriber, str(item), None)) is not bool:
366
+ print("Skipped setting option", item)
367
+ return
368
+ newvalue = not getattr(icon._transcriber, str(item))
369
+ setattr(icon._transcriber, str(item), newvalue)
370
+ # set the option on the other transcribers as well
371
+ if other_transcribers:
372
+ for name in other_transcribers_dict:
373
+ meta = other_transcribers_dict[name]
374
+ if str(item) in meta:
375
+ meta[str(item)] = newvalue
376
+
377
+ else:
378
+ kwargs[str(item)] = not kwargs[str(item)]
379
+ print("Option set [", item, "] to", kwargs[str(item)])
360
380
 
361
381
  def is_model_selection(item):
362
382
  return icon._model_selection
@@ -367,23 +387,34 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
367
387
  def is_not_recording(item):
368
388
  return not is_recording(item) and not is_model_selection(item)
369
389
 
370
- def is_checked(item):
390
+ def is_checked_model(item):
371
391
  return icon._transcriber.model_name == str(item)
372
392
 
373
393
  def is_checked_option(item):
394
+ if not is_option_visible(item):
395
+ return False
396
+ if str(item) in transcriber_options:
397
+ return getattr(icon._transcriber, str(item))
374
398
  return kwargs[str(item)]
375
399
 
400
+ def is_option_visible(item):
401
+ if str(item) in transcriber_options:
402
+ return str(item) not in icon._transcriber._frozen_options
403
+ return True
404
+
376
405
  modeltitle = f"{transcriber.backend} :: {transcriber.model_name}"
377
406
  title = f"scribe :: {modeltitle}"
378
407
 
408
+ options = [name for name in kwargs if isinstance(kwargs[name], bool)] + [name for name in transcriber_options if isinstance(getattr(transcriber, name), bool)]
409
+
379
410
  menus = []
380
411
  menus.append(Item(f"Record", callback_record, visible=is_not_recording, default=True))
381
412
  menus.append(Item("Stop", callback_stop_recording, visible=is_recording))
382
413
  menus.append(Item("Choose Model", pystrayMenu(
383
- *(Item(f"{name}", callback_set_model, checked=is_checked) for name in other_transcribers_dict)))
414
+ *(Item(f"{name}", callback_set_model, checked=is_checked_model) for name in other_transcribers_dict)))
384
415
  )
385
416
  menus.append(Item("Toggle Options", pystrayMenu(
386
- *(Item(f"{name}", callback_toggle_option, checked=is_checked_option) for name in kwargs if isinstance(kwargs[name], bool))))
417
+ *(Item(f"{name}", callback_toggle_option, checked=is_checked_option, visible=is_option_visible) for name in options)))
387
418
  )
388
419
  menus.append(Item('Quit', callback_quit))
389
420
 
@@ -398,6 +429,8 @@ def create_app(micro, transcriber, other_transcribers=None, **kwargs):
398
429
 
399
430
  return icon
400
431
 
432
+ def _filter_options(d: dict, exclude: Iterable) -> dict:
433
+ return {k: v for k, v in d.items() if k not in exclude}
401
434
 
402
435
  def main(args=None):
403
436
 
@@ -531,9 +564,10 @@ def main(args=None):
531
564
  app = create_app(micro, transcriber, other_transcribers=[
532
565
  {**vars(o), "backend": "openaiapi", "model": "whisper-1"},
533
566
  *[{**vars(o), "backend": "whisper", "model": model} for model in o.whisper_models],
534
- *[{**vars(o), "backend": "vosk", "model": model} for model in o.vosk_models]],
567
+ *[{**_filter_options(vars(o), exclude=VoskTranscriber._frozen_options), "backend": "vosk", "model": model} for model in o.vosk_models]],
535
568
  clipboard=o.clipboard, output_file=o.output_file,
536
- keyboard=o.keyboard, latency=o.latency, ascii=o.ascii, **greetings)
569
+ keyboard=o.keyboard, latency=o.latency, ascii=o.ascii,
570
+ transcriber_options=["restart_after_silence"], **greetings)
537
571
  print("Starting app...")
538
572
  app.run()
539
573
  else:
@@ -16,11 +16,15 @@ HOME = os.environ.get('HOME', os.path.expanduser('~'))
16
16
  XDG_CACHE_HOME = os.environ.get('XDG_CACHE_HOME', os.path.join(HOME, '.cache'))
17
17
  VOSK_MODELS_FOLDER = os.path.join(XDG_CACHE_HOME, "vosk")
18
18
 
19
+ class SilenceDetected(Exception):
20
+ pass
21
+
19
22
  class StopRecording(Exception):
20
23
  pass
21
24
 
22
25
  class AbstractTranscriber:
23
26
  backend = None
27
+ _frozen_options = frozenset()
24
28
  def __init__(self, model, model_name=None, language=None, samplerate=16000, timeout=None, model_kwargs={},
25
29
  silence_thresh=-40, silence_duration=2, restart_after_silence=False, logger=None):
26
30
  self.model_name = model_name
@@ -50,7 +54,29 @@ class AbstractTranscriber:
50
54
  return self.timeout is not None and time.time() - self.start_time > self.timeout
51
55
 
52
56
  def transcribe_realtime_audio(self, audio_bytes=b""):
53
- self.audio_buffer += audio_bytes
57
+
58
+ # Vérifier si le segment est un silence
59
+ if is_silent(audio_bytes, self.silence_thresh):
60
+ self.silence_buffer += audio_bytes
61
+ silence_duration = time.time() - self.last_sound_time
62
+ self.waiting = self.silence_duration is not None and silence_duration >= self.silence_duration
63
+
64
+ if self.waiting and len(self.audio_buffer) > 0:
65
+ if self.restart_after_silence:
66
+ raise SilenceDetected("Silence detected: {:.2f} seconds".format(silence_duration))
67
+ else:
68
+ raise StopRecording("Silence detected: {:.2f} seconds".format(silence_duration))
69
+
70
+ else:
71
+ self.last_sound_time = time.time()
72
+ self.waiting = False
73
+ silence_buffer_data = np.frombuffer(self.silence_buffer, dtype=np.int16)
74
+ # add 0.5 seconds worth of silent data back to the audio buffer
75
+ half_a_second = 0.5
76
+ length_of_half_a_second = int(half_a_second * self.samplerate)
77
+ self.audio_buffer += silence_buffer_data[-length_of_half_a_second:].tobytes() + audio_bytes
78
+ self.silence_buffer = b''
79
+
54
80
  return {"partial": f"{len(self.audio_buffer)} bytes received (duration: {self.get_elapsed()} seconds)"}
55
81
 
56
82
  def transcribe_audio(self, audio_data):
@@ -59,6 +85,7 @@ class AbstractTranscriber:
59
85
  def reset(self):
60
86
  self.audio_buffer = b''
61
87
  self.start_time = time.time()
88
+ self.silence_buffer = b''
62
89
 
63
90
  def log(self, text):
64
91
  if text.startswith("\n"):
@@ -82,7 +109,7 @@ class AbstractTranscriber:
82
109
  self.last_sound_time = time.time() - self.silence_duration
83
110
  else:
84
111
  self.last_sound_time = time.time()
85
- previous_waiting = self.waiting
112
+ # self.silence_buffer = b'' # already reset in self.reset()
86
113
 
87
114
  try:
88
115
 
@@ -93,35 +120,20 @@ class AbstractTranscriber:
93
120
  while not microphone.q.empty():
94
121
  data = microphone.q.get()
95
122
 
96
- # Vérifier si le segment est un silence
97
- if is_silent(data, self.silence_thresh):
98
- silence_duration = time.time() - self.last_sound_time
99
-
100
- previous_waiting = self.waiting
101
- self.waiting = self.silence_duration is not None and silence_duration >= self.silence_duration
102
-
103
- if self.waiting and len(self.audio_buffer) > 0:
104
- if self.restart_after_silence:
105
- self.recording = False # for the system tray icon
106
- result = self.finalize()
107
- microphone.q.queue.clear()
108
- self.reset()
109
- yield result
110
- self.recording = True # for the system tray icon
111
- else:
112
- raise StopRecording("Silence detected: {:.2f} seconds".format(silence_duration))
113
-
114
- else:
115
- self.last_sound_time = time.time()
116
- self.waiting = False
117
-
118
- # don't accumulate very long silences
119
- if not self.waiting:
123
+ # leave it to each transcriber to handle the silence in audio data
124
+ try:
120
125
  yield self.transcribe_realtime_audio(data)
121
126
 
122
- else:
123
- if not previous_waiting:
124
- self.log("Silence detected...waiting for more audio")
127
+ # This exception triggers a pause in recording to allow for a transcription of the audio buffer
128
+ except SilenceDetected as e:
129
+ self.log(str(e))
130
+ self.recording = False # for the system tray icon
131
+ result = self.finalize()
132
+ microphone.q.queue.clear()
133
+ self.reset()
134
+ yield result
135
+ self.recording = True # for the system tray icon
136
+ self.start_time = time.time() # reset the start time to avoid timeout
125
137
 
126
138
  if self.is_overtime():
127
139
  raise StopRecording("Overtime: {:.2f} seconds".format(self.get_elapsed()))
@@ -165,8 +177,10 @@ def get_vosk_recognizer(model, samplerate=16000):
165
177
 
166
178
  class VoskTranscriber(AbstractTranscriber):
167
179
  backend = "vosk"
180
+ _frozen_options = frozenset(["restart_after_silence", "silence_duration", "silence_thresh"])
168
181
 
169
182
  def __init__(self, model_name, model=None, model_kwargs={}, **kwargs):
183
+ kwargs["silence_thresh"] = -np.inf # disable silence detection (this is handled by Vosk)
170
184
  if model is None:
171
185
  model = get_vosk_model(model_name, **model_kwargs)
172
186
  super().__init__(model, model_name, model_kwargs=model_kwargs, **kwargs)
@@ -222,7 +236,7 @@ class WhisperTranscriber(AbstractTranscriber):
222
236
  if len(self.audio_buffer) == 0:
223
237
  return {"text": ""}
224
238
  result = self.transcribe_audio(self.audio_buffer)
225
- self.audio_buffer = b''
239
+ self.reset()
226
240
  return result
227
241
 
228
242
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: scribe-cli
3
- Version: 0.12.0
3
+ Version: 0.12.1
4
4
  Summary: scribe is a local speech recognition tool that provides real-time transcription using vosk and whisper AI, with the goal of serving as a virtual keyboard on a computer
5
5
  Author-email: Mahé Perrette <mahe.perrette@gmail.com>
6
6
  License: MIT License
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes