PyPI - scribe-cli - Versions diffs - 0.7.7__tar.gz → 0.7.9__tar.gz - Mend

scribe-cli 0.7.7tar.gz → 0.7.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{scribe_cli-0.7.7/scribe_cli.egg-info → scribe_cli-0.7.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: scribe-cli
-Version: 0.7.7
+Version: 0.7.9
 Summary: scribe is a local speech recognition tool that provides real-time transcription using vosk and whisper AI, with the goal of serving as a virtual keyboard on a computer
 Author-email: Mahé Perrette <mahe.perrette@gmail.com>
 License: MIT License
@@ -45,6 +45,7 @@ Requires-Dist: tqdm
 Requires-Dist: requests
 Requires-Dist: pyperclip
 Requires-Dist: unidecode
+Requires-Dist: termcolor
 Provides-Extra: keyboard
 Requires-Dist: pynput; extra == "keyboard"
 Provides-Extra: whisper
@@ -61,7 +62,7 @@ Requires-Dist: vosk; extra == "all"
 Requires-Dist: pystray; extra == "all"
 [![python](https://img.shields.io/badge/python-3.12-blue.svg)]()
-[![pypi](https://github.com/perrette/scribe/actions/workflows/pypi.yml/badge.svg)](https://pypi.org/project/scribe-cli)
+[![pypi](https://img.shields.io/pypi/v/scribe-cli)](https://pypi.org/project/scribe-cli)
 # Scribe

{scribe_cli-0.7.7 → scribe_cli-0.7.9}/README.md RENAMED Viewed

@@ -1,5 +1,5 @@
 [![python](https://img.shields.io/badge/python-3.12-blue.svg)]()
-[![pypi](https://github.com/perrette/scribe/actions/workflows/pypi.yml/badge.svg)](https://pypi.org/project/scribe-cli)
+[![pypi](https://img.shields.io/pypi/v/scribe-cli)](https://pypi.org/project/scribe-cli)
 # Scribe

scribe_cli-0.7.9/icon.xcf ADDED Viewed

Binary file

{scribe_cli-0.7.7 → scribe_cli-0.7.9}/pyproject.toml RENAMED Viewed

@@ -19,6 +19,7 @@ dependencies = [
     "requests",
     "pyperclip",
     "unidecode",
+    "termcolor",
 ]
 classifiers = [

{scribe_cli-0.7.7 → scribe_cli-0.7.9}/scribe/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.7.7'
-__version_tuple__ = version_tuple = (0, 7, 7)
+__version__ = version = '0.7.9'
+__version_tuple__ = version_tuple = (0, 7, 9)

{scribe_cli-0.7.7 → scribe_cli-0.7.9}/scribe/app.py RENAMED Viewed

@@ -1,9 +1,10 @@
 from pathlib import Path
 import tomllib
+import time
 import argparse
 from scribe.audio import Microphone
 from scribe.util import print_partial, clear_line, prompt_choices, check_dependencies, ansi_link, colored
-from scribe.models import VoskTranscriber, WhisperTranscriber, StopRecording
+from scribe.models import VoskTranscriber, WhisperTranscriber
 with open(Path(__file__).parent / "models.toml", "rb") as f:
     language_config_default = tomllib.load(f)
@@ -37,8 +38,36 @@ def pick_specialist_model(model, language, backend):
     return model
+class DummyTranscriber:
+    def __init__(self, backend, model_name):
+        self.backend = backend
+        self.model_name = model_name
+    def start_recording(self, micro, **kwargs):
+        while True:
+            try:
+                yield {"text": input()}
+            except KeyboardInterrupt:
+                break
+    def __getattr__(self, item):
+        return None
 def get_transcriber(o, prompt=True):
+    whisper_models = ["tiny", "base", "small", "medium", "large", "turbo"]
+    whisper_english_models = ["tiny.en", "base.en", "small.en", "medium.en"]
+    if o.dummy:
+        return DummyTranscriber("whisper", "dummy")
+    if o.model and not o.backend:
+        if o.model.startswith("vosk-"):
+            o.backend = "vosk"
+        elif o.model in whisper_models + whisper_english_models:
+            o.backend = "whisper"
     if o.backend:
         checked_backend = check_dependencies(o.backend)
         if not checked_backend:
@@ -76,29 +105,25 @@ def get_transcriber(o, prompt=True):
                     print(f"Or pick one of the pre-defined languages: ", " ".join(available_languages))
                     exit(1)
                 choices = [language_config[backend][o.language]["model"]]
-                default_model = choices[0]
+                default_model = choices[0] # this is a string
             else:
                 available_models = [language_config[backend][lang]["model"] for lang in available_languages]
                 choices = list(zip(available_models, available_languages)) + [f" * [Any model from {ansi_link('https://alphacephei.com/vosk/models')}]"]
-                default_model = choices[0]
+                default_model = choices[0]  # this is a tuple !!
             print(f"For information about vosk models see: {ansi_link('https://alphacephei.com/vosk/models')}")
             if prompt:
-                model = prompt_choices(choices, default=default_model, label="model")
+                model = prompt_choices(choices, default=default_model, label="model")  # this always returns a string
             else:
-                model = default_model
+                model = default_model[0] if isinstance(default_model, tuple) else default_model  # tuple -> string
         elif backend == "whisper":
-            models = ["tiny", "base", "small", "medium", "large", "turbo"]
-            english_models = ["tiny.en", "base.en", "small.en", "medium.en"]
             default_model = "small"
             print("Some models have a specialized English version (.en) which will be selected as default is `-l en` was requested, but can also be requested explicitly below (option not listed). See [documentation](https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages).")
             if prompt:
-                model = prompt_choices(models, default=default_model, label="model",
-                                        hidden_models=english_models)
+                model = prompt_choices(whisper_models, default=default_model, label="model",
+                                        hidden_models=whisper_english_models)
             else:
                 model = default_model
@@ -143,6 +168,8 @@ def get_parser():
     parser.add_argument("-l", "--language", choices=list(language_config["vosk"]),
                         help="An alias for preselected models when using the vosk backend, or 'en' for the English version of whisper models.")
+    parser.add_argument("--dummy", action="store_true", help=argparse.SUPPRESS)
     parser.add_argument("--no-prompt", action="store_false", dest="prompt", help="Disable prompts for backend and model selection and jump to recording")
     parser.add_argument("--app", action="store_true", help="Start in app mode (relies on pystray)")
@@ -165,7 +192,7 @@ def get_parser():
 # Commencer l'enregistrement
-def start_recording(micro, transcriber, clipboard=True, keyboard=False, latency=0, ascii=False, **greetings):
+def start_recording(micro, transcriber, clipboard=True, keyboard=False, latency=0, ascii=False, callback=None, **greetings):
     if keyboard:
         from scribe.keyboard import type_text
@@ -189,7 +216,7 @@ def start_recording(micro, transcriber, clipboard=True, keyboard=False, latency=
             if clipboard:
                 fulltext += result['text'] + " "
-                pyperclip.copy(fulltext)
+                pyperclip.copy(fulltext.strip())
         else:
             print_partial(result.get('partial', ''))
@@ -197,22 +224,8 @@ def start_recording(micro, transcriber, clipboard=True, keyboard=False, latency=
     if clipboard:
         print("Copied to clipboard.")
-def interrupt_app_thread(icon):
-    """Thanks Le Chat for this solution: https://stackoverflow.com/a/325528/2192272
-    """
-    import ctypes
-    thread = icon._recording_thread
-    # Raise an exception in the thread using ctypes
-    thread_id = thread.ident
-    if thread_id is not None:
-        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
-            ctypes.c_long(thread_id),
-            ctypes.py_object(StopRecording)
-        )
-        if res > 1:
-            ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id, 0)
-            print("Failure to raise exception in thread")
+    if callback:
+        callback()
 def create_app(micro, transcriber, **kwargs):
@@ -225,7 +238,42 @@ def create_app(micro, transcriber, **kwargs):
     import threading
     # Load an image from a file
-    image = Image.open(Path(scribe_data.__file__).parent / "share" / "icon.jpg")
+    image = Image.open(Path(scribe_data.__file__).parent / "share" / "icon.png")
+    image_recording = Image.open(Path(scribe_data.__file__).parent / "share" / "icon_recording.png")
+    image_writing = Image.open(Path(scribe_data.__file__).parent / "share" / "icon_writing.png")
+    if transcriber.backend == "vosk":
+        # Recording and writing happen at the same time in this backend
+        # Overlay the writing image on top of the base image
+        image_recording = Image.alpha_composite(image_recording.convert("RGBA"), image_writing.convert("RGBA"))
+    def update_icon(icon, force=False):
+        if transcriber.recording:
+            if force or getattr(icon, "_icon_label", None) != "recording":
+                icon.icon = image_recording
+                icon._icon_label = "recording"
+                icon.update_menu()
+        elif transcriber.busy:
+            if force or getattr(icon, "_icon_label", None) != "busy":
+                icon.icon = image_writing
+                icon._icon_label = "busy"
+                icon.update_menu()
+        else:
+            if force or getattr(icon, "_icon_label", None) != None:
+                icon.icon = image
+                icon._icon_label = None
+                icon.update_menu()
+    def start_monitoring(icon):
+        try:
+            while transcriber.busy:
+                update_icon(icon)
+                time.sleep(0.1)
+        finally:
+            update_icon(icon)
     def callback_quit(icon, item):
         icon.visible = False
@@ -234,16 +282,34 @@ def create_app(micro, transcriber, **kwargs):
         icon.stop()
     def callback_stop_recording(icon, item):
-        ## Here we need to stop the recording thread
-        interrupt_app_thread(icon)
-        icon._recording_thread.join()
+        # Here we need to stop the recording thread
+        transcriber.recording = False
+        if hasattr(icon, "_recording_thread"):
+            icon._recording_thread.join()
+        if hasattr(icon, "_monitoring_thread"):
+            icon._monitoring_thread.join()
     def callback_record(icon, item):
+        # kwargs["callback"] = icon.update_menu   # NOTE: the thread will finish AFTER the callback is complete
+        if transcriber.busy:
+            print("Still busy recording or transcribing.")
+            return
+        if hasattr(icon, "_recording_thread") and icon._recording_thread.is_alive():
+            icon._recording_thread.join()
+        if hasattr(icon, "_monitoring_thread") and icon._monitoring_thread.is_alive():
+            icon._monitoring_thread.join()
+        transcriber.busy = True  # this is a hack to prevent race conditions between the below threads
         icon._recording_thread = threading.Thread(target=start_recording, args=(micro, transcriber), kwargs=kwargs)
         icon._recording_thread.start()
+        icon._monitoring_thread = threading.Thread(target=start_monitoring, args=(icon,))
+        icon._monitoring_thread.start()
     def is_recording(item):
-        return hasattr(icon, "_recording_thread") and icon._recording_thread.is_alive()
+        return transcriber.busy
     def is_not_recording(item):
         return not is_recording(item)
@@ -251,7 +317,6 @@ def create_app(micro, transcriber, **kwargs):
     # Create a menu
     menu = pystrayMenu(
-        # Item('Record', callback_record),
         Item("Record", callback_record, visible=is_not_recording),
         Item("Stop", callback_stop_recording, visible=is_recording),
         Item('Quit', callback_quit),
@@ -279,25 +344,37 @@ def main(args=None):
     while True:
         if transcriber is None:
             transcriber = get_transcriber(o, prompt=o.prompt)
-        print(f">>> Model {transcriber.model_name} from {transcriber.backend} selected. Keyboard [{'on' if o.keyboard else 'off'}]. Clipboard [{'on' if o.clipboard else 'off'}] <<<")
+        print(f"Model [{colored(transcriber.model_name, 'light_blue', attrs=['bold'])}] from [{colored(transcriber.backend, 'light_blue', attrs=['bold'])}] selected.")
         if o.prompt:
-            print(f"Choose any of the following actions (or any command-line toggle flag by name)")
-            print(f"[q] quit")
-            print(f"[e] change model")
-            print(f"[x] toggle app [{toggle[o.app]}] -> [{toggle[not o.app]}]")
-            print(f"[k] toggle keyboard [{toggle[o.keyboard]}] -> [{toggle[not o.keyboard]}]")
-            print(f"[c] toggle clipboard [{toggle[o.clipboard]}] -> [{toggle[not o.clipboard]}]")
+            print(f"Choose any of the following actions")
+            print(f"{colored('[q]', 'light_yellow')} quit")
+            print(f"{colored('[e]', 'light_yellow')} change model")
+            print(f"{colored('[x]', 'light_yellow')} app is {colored(o.app, 'light_blue')} toggle?")
+            print(f"{colored('[c]', 'light_yellow')} clipboard is {colored(o.clipboard, 'light_blue')} toggle?")
+            print(f"{colored('[k]', 'light_yellow')} keyboard is {colored(o.keyboard, 'light_blue')} toggle?")
+            if o.keyboard:
+                print(f"{colored('[latency]', 'light_yellow')} between keystrokes is {colored(o.latency, 'light_blue')} s")
             if transcriber.backend == "whisper":
-                print(f"[t] change duration (currently {transcriber.timeout}s)")
-                print(f"[b] change silence duration (currently {transcriber.silence_duration}s)")
-                print(f"[a] toggle auto-restart after silence [{toggle[transcriber.restart_after_silence]}] -> [{toggle[not transcriber.restart_after_silence]}]")
-            print(colored(f"Press [Enter] to start recording.", "BOLD"))
+                print(f"{colored('[t]', 'light_yellow')} change duration (currently {colored(transcriber.timeout, 'light_blue')} s)")
+                print(f"{colored('[b]', 'light_yellow')} change silence duration (currently {colored(transcriber.silence_duration, 'light_blue')} s)")
+                print(f"{colored('[a]', 'light_yellow')} auto-restart after silence is {colored(transcriber.restart_after_silence, 'light_blue')} toggle?")
+            exclude_flags = ["keyboard", "clipboard", "app", "prompt", "restart_after_silence"]
+            display_flags = [a.dest for a in parser._actions if a.help != argparse.SUPPRESS]
+            for key, value in vars(o).items():
+                if key not in display_flags or key in exclude_flags or not isinstance(value, bool):
+                    continue
+                print(f"{colored(f'[{key}]', 'light_yellow')} is {colored(value, 'light_blue')} toggle?")
+            print(colored(f"Press [Enter] to start recording.", attrs=["bold"]))
             key = input()
             if key == "q":
                 exit(0)
             if key == "e":
                 transcriber = None
+                o.model = None
+                o.backend = None
+                o.language = None
                 continue
             if key == "k":
                 o.keyboard = not o.keyboard
@@ -318,6 +395,13 @@ def main(args=None):
                 except:
                     print("Invalid duration. Must be an integer.")
                 continue
+            if key == "latency":
+                ans = input(f"Enter new keyboard latency in seconds (current: {o.latency}): ")
+                try:
+                    o.latency = float(ans)
+                except:
+                    print("Invalid latency. Must be a float.")
+                continue
             if key == "b":
                 ans = input(f"Enter new silence break duration in seconds (current: {transcriber.silence_duration}): ")
                 try:

{scribe_cli-0.7.7 → scribe_cli-0.7.9}/scribe/models.py RENAMED Viewed

@@ -32,6 +32,8 @@ class AbstractTranscriber:
         self.silence_thresh = silence_thresh
         self.silence_duration = silence_duration
         self.restart_after_silence = restart_after_silence
+        self.recording = False
+        self.busy = False
         self.reset()
     def get_elapsed(self):
@@ -54,16 +56,18 @@ class AbstractTranscriber:
     def start_recording(self, microphone,
                         start_message="Recording... Press Ctrl+C to stop.",
-                        stop_message="Stopped recording."):
+                        stop_message="Done transcribing."):
         self.reset()
+        self.recording = True
+        self.busy = True
         try:
             with microphone.open_stream():
                 print(start_message)
-                while True:
+                while self.recording:
                     while not microphone.q.empty():
                         data = microphone.q.get()
@@ -78,7 +82,7 @@ class AbstractTranscriber:
                                     self.reset()
                                     yield result
                                 else:
-                                    raise KeyboardInterrupt("Silence detected: {:.2f} seconds".format(silence_duration))
+                                    raise StopRecording("Silence detected: {:.2f} seconds".format(silence_duration))
                         else:
                             self.last_sound_time = time.time()
@@ -86,14 +90,18 @@ class AbstractTranscriber:
                         yield self.transcribe_realtime_audio(data)
                         if self.is_overtime():
-                            raise KeyboardInterrupt("Overtime: {:.2f} seconds".format(self.get_elapsed()))
+                            raise StopRecording("Overtime: {:.2f} seconds".format(self.get_elapsed()))
+                    time.sleep(0.1) # avoid overheating
         except (KeyboardInterrupt, StopRecording):
             pass
         finally:
+            self.recording = False
             result = self.finalize()
             microphone.q.queue.clear()
+            self.busy = False
             yield result
         print(stop_message)
@@ -102,6 +110,7 @@ class AbstractTranscriber:
 def get_vosk_model(model, download_root=None, url=None):
     """Load the Vosk recognizer"""
     import vosk
+    vosk.SetLogLevel(-1)
     if download_root is None:
         download_root = VOSK_MODELS_FOLDER
     model_path = os.path.join(download_root, model)

{scribe_cli-0.7.7 → scribe_cli-0.7.9}/scribe/util.py RENAMED Viewed

@@ -3,26 +3,7 @@ import re
 import tqdm
 import shutil
 from functools import partial
-class bcolors:
-    # https://stackoverflow.com/a/287944/2192272
-    HEADER = '\033[95m'
-    OKBLUE = '\033[94m'
-    OKGREEN = '\033[92m'
-    WARNING = '\033[93m'
-    FAIL = '\033[91m'
-    ENDC = '\033[0m'
-    BOLD = '\033[1m'
-    UNDERLINE = '\033[4m'
-def strip_colors(s):
-    for name, c in vars(bcolors).items():
-        if name.startswith("_"):
-            continue
-        s = s.replace(c, '')
-    return s
+from termcolor import colored
 def ansi_link(uri, label=None):
     """https://stackoverflow.com/a/71309268/2192272
@@ -36,25 +17,6 @@ def ansi_link(uri, label=None):
     return escape_mask.format(parameters, uri, label)
-def colored(text, color):
-    if hasattr(bcolors, color):
-        color = getattr(bcolors, color)
-    return f"{color}{text}{bcolors.ENDC}"
-ANSI_LINK_RE = re.compile(r'(?P<ansi_sequence>\033]8;(?P<parameter>.*?);(?P<uri>.*?)\033\\(?P<label>.*?)\033]8;;\033\\)')
-def strip_ansi_link(s):
-    for m in ANSI_LINK_RE.findall(s):
-        s = s.replace(m[0], m[3])
-    return s
-def strip_all(s):
-    s = strip_colors(s)
-    s = strip_ansi_link(s)
-    return s
 # Function to clear the terminal line
 def clear_line():
@@ -119,9 +81,9 @@ def format_choice(enum, default=None, unavailable=None):
         value_str = value
     if (default is not None and value == default) or (default is None and i == 0):
-        return f'  ' + colored(f'({i+1}) {value_str} [Press Enter]', 'BOLD')
+        return f'  ' + colored(f'({i+1}) {value_str} [Press Enter]', attrs=['bold'])
     elif unavailable and value in unavailable:
-        return f'  ' + colored(f'{" "} {value_str} -> unavailable !!', 'FAIL')
+        return f'  ' + colored(f'{" "} {value_str} -> unavailable !!', attrs=["strike"])
     else:
         return f'  ({i+1}) {value_str}'

{scribe_cli-0.7.7 → scribe_cli-0.7.9/scribe_cli.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: scribe-cli
-Version: 0.7.7
+Version: 0.7.9
 Summary: scribe is a local speech recognition tool that provides real-time transcription using vosk and whisper AI, with the goal of serving as a virtual keyboard on a computer
 Author-email: Mahé Perrette <mahe.perrette@gmail.com>
 License: MIT License
@@ -45,6 +45,7 @@ Requires-Dist: tqdm
 Requires-Dist: requests
 Requires-Dist: pyperclip
 Requires-Dist: unidecode
+Requires-Dist: termcolor
 Provides-Extra: keyboard
 Requires-Dist: pynput; extra == "keyboard"
 Provides-Extra: whisper
@@ -61,7 +62,7 @@ Requires-Dist: vosk; extra == "all"
 Requires-Dist: pystray; extra == "all"
 [![python](https://img.shields.io/badge/python-3.12-blue.svg)]()
-[![pypi](https://github.com/perrette/scribe/actions/workflows/pypi.yml/badge.svg)](https://pypi.org/project/scribe-cli)
+[![pypi](https://img.shields.io/pypi/v/scribe-cli)](https://pypi.org/project/scribe-cli)
 # Scribe

{scribe_cli-0.7.7 → scribe_cli-0.7.9}/scribe_cli.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,6 +1,7 @@
 .gitignore
 LICENSE
 README.md
+icon.xcf
 pyproject.toml
 .github/workflows/pypi.yml
 scribe/__init__.py
@@ -21,5 +22,7 @@ scribe_cli.egg-info/entry_points.txt
 scribe_cli.egg-info/requires.txt
 scribe_cli.egg-info/top_level.txt
 scribe_data/__init__.py
-scribe_data/share/icon.jpg
+scribe_data/share/icon.png
+scribe_data/share/icon_recording.png
+scribe_data/share/icon_writing.png
 scribe_data/templates/scribe.desktop