PyPI - s2t - Versions diffs - 0.1.2__tar.gz → 0.1.3.post1.dev1__tar.gz - Mend

s2t 0.1.2tar.gz → 0.1.3.post1.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{s2t-0.1.2 → s2t-0.1.3.post1.dev1}/.gitignore RENAMED Viewed

@@ -8,6 +8,7 @@ __pycache__/
 *.pyd
 .Python
 .venv/
+.venv312/
 venv/
 env/
 build/

{s2t-0.1.2 → s2t-0.1.3.post1.dev1}/Makefile RENAMED Viewed

@@ -117,8 +117,8 @@ precommit-install: guard-venv ensure-dev
 	pre-commit install --install-hooks
 guard-venv:
-	@if [ -n "$$VIRTUAL_ENV" ] && [ "$$VIRTUAL_ENV" != "$$PWD/.venv" ]; then \
-		echo "Error: active venv ($$VIRTUAL_ENV) differs from project .venv ($$PWD/.venv)."; \
-		echo "Please 'deactivate' or use the project venv (.venv)."; \
+	@if [ -n "$$VIRTUAL_ENV" ] && [ "$$VIRTUAL_ENV" != "$$PWD/.venv312" ]; then \
+		echo "Error: active venv ($$VIRTUAL_ENV) differs from project .venv ($$PWD/.venv312)."; \
+		echo "Please 'deactivate' or use the project venv (.venv312)."; \
 		exit 1; \
 	fi

{s2t-0.1.2 → s2t-0.1.3.post1.dev1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: s2t
-Version: 0.1.2
+Version: 0.1.3.post1.dev1
 Summary: Speech to Text (s2t): Record audio, run Whisper, export formats, and copy transcript to clipboard.
 Author: Maintainers
 License-Expression: LicenseRef-Proprietary
@@ -23,6 +23,8 @@ Requires-Dist: mypy>=1.7; extra == "dev"
 Requires-Dist: build>=1; extra == "dev"
 Requires-Dist: setuptools-scm>=8; extra == "dev"
 Requires-Dist: twine>=4; extra == "dev"
+Provides-Extra: translate
+Requires-Dist: argostranslate>=1.9.0; extra == "translate"
 # s2t

{s2t-0.1.2 → s2t-0.1.3.post1.dev1}/pyproject.toml RENAMED Viewed

@@ -41,6 +41,10 @@ dev = [
   "setuptools-scm>=8",
   "twine>=4",
 ]
+translate = [
+  # Argos translation support (offline after one-time package download)
+  "argostranslate>=1.9.0",
+]
 [project.scripts]

{s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/cli.py RENAMED Viewed

@@ -40,6 +40,11 @@ from . import __version__
 from .config import SessionOptions
 from .outputs import concat_audio, write_final_outputs
 from .recorder import Recorder
+from .translator.argos_backend import (
+    ArgosTranslator,
+    ensure_packages_background,
+    translate_result_segments,
+)
 from .types import TranscriptionResult
 from .utils import (
     convert_wav_to_mp3,
@@ -62,7 +67,7 @@ def run_session(opts: SessionOptions) -> int:
     engine = WhisperEngine(
         model_name=opts.model,
-        translate=opts.translate,
+        translate=False,  # translation handled as post-processing
         language=opts.lang,
         native_segmentation=opts.native_segmentation,
         session_dir=session_dir,
@@ -73,6 +78,27 @@ def run_session(opts: SessionOptions) -> int:
     )
     ex, fut = engine.preload()
+    # Determine translation target languages from options
+    target_langs: list[str] = []
+    if opts.translate_to:
+        target_langs = list(dict.fromkeys([s.strip().lower() for s in opts.translate_to if s]))
+    elif opts.translate:
+        target_langs = ["en"]
+    # Background auto-install/update Argos packages as early as possible
+    detected_lang: dict[str, str | None] = {"code": None}
+    detected_lang_event = threading.Event()
+    translator: ArgosTranslator | None = None
+    if target_langs:
+        translator = ArgosTranslator(verbose=opts.verbose)
+        ensure_packages_background(
+            translator,
+            src_lang_hint=(opts.lang.lower() if opts.lang else None),
+            target_langs=target_langs,
+            detected_lang_event=detected_lang_event,
+            detected_lang_holder=detected_lang,
+        )
     tx_q: queue.Queue[tuple[int, Path, int, float]] = queue.Queue()
     cumulative_text = ""
     next_to_emit = 1
@@ -134,6 +160,12 @@ def run_session(opts: SessionOptions) -> int:
             # Build latest-ready prompt based on already finished chunks
             prompt = _build_latest_ready_prompt(idx, finished_texts)
             res = engine.transcribe_chunk(model, path, frames, initial_prompt=prompt)
+            # Record detected language once (for translator preload if needed)
+            if target_langs and detected_lang["code"] is None:
+                lang_code = str(res.get("language") or "").strip().lower()
+                if lang_code:
+                    detected_lang["code"] = lang_code
+                    detected_lang_event.set()
             engine.write_chunk_outputs(res, path)
             text_i = (res.get("text", "") or "").strip()
             with agg_lock:
@@ -260,6 +292,55 @@ def run_session(opts: SessionOptions) -> int:
         print("=" * 60)
         print(text_final.rstrip("\n"))
+    # Post-processing: translate outputs for requested target languages
+    if target_langs and translator is not None:
+        # Decide source language: CLI hint takes precedence; else detected; else skip with warning
+        src_lang = (opts.lang.lower() if opts.lang else (detected_lang["code"] or "")).strip()
+        if not src_lang:
+            if opts.verbose:
+                print(
+                    "Warning: Could not determine source language for translation; skipping post-translation.",
+                    file=sys.stderr,
+                )
+        else:
+            # Skip identical language targets
+            effective_targets = [t for t in target_langs if t.lower() != src_lang.lower()]
+            # Ensure required packages if missing; perform synchronous install as needed
+            for tgt in effective_targets:
+                if not translator.has_package(src_lang, tgt):
+                    print(
+                        f"Ensuring Argos translation package for '{src_lang}->{tgt}' (may download 50–250 MB)…",
+                        file=sys.stderr,
+                    )
+                    ok = False
+                    try:
+                        ok = translator.ensure_package(src_lang, tgt)
+                    except Exception as e:
+                        print(
+                            f"Warning: could not install '{src_lang}->{tgt}' package: {e}",
+                            file=sys.stderr,
+                        )
+                    if not ok and not translator.has_package(src_lang, tgt):
+                        print(
+                            f"Warning: translation package unavailable or failed for '{src_lang}->{tgt}'. Skipping.",
+                            file=sys.stderr,
+                        )
+                        continue
+            try:
+                translated = translate_result_segments(translator, merged, src_lang, tgt)
+                # Write translated outputs with language suffix by passing a suffixed base path
+                suffixed = base_audio_path.with_name(
+                    f"{base_audio_path.stem}.{tgt}{base_audio_path.suffix}"
+                )
+                write_final_outputs(translated, session_dir, suffixed)
+                if opts.verbose:
+                    print(f"Created translated outputs for '{tgt}'.", file=sys.stderr)
+            except Exception as e:
+                print(
+                    f"Warning: failed to translate to '{tgt}': {e}",
+                    file=sys.stderr,
+                )
     if opts.profile:
         try:
             prof_path = session_dir / "profile.json"
@@ -329,7 +410,13 @@ def main(argv: list[str] | None = None) -> int:
         "-t",
         "--translate",
         action="store_true",
-        help="Translate to English instead of transcribing in source language",
+        help="After transcription, translate all outputs to English (post-processing)",
+    )
+    parser.add_argument(
+        "--translate-to",
+        action="append",
+        default=None,
+        help="After transcription, translate all outputs to the given language (can be repeated)",
     )
     parser.add_argument(
         "-v",
@@ -404,6 +491,7 @@ def main(argv: list[str] | None = None) -> int:
             model=args.model,
             lang=args.lang,
             translate=args.translate,
+            translate_to=(args.translate_to or []),
             native_segmentation=getattr(args, "native_segmentation", False),
             verbose=args.verbose,
             edit=args.edit,

{s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/config.py RENAMED Viewed

@@ -13,6 +13,7 @@ class SessionOptions:
     model: str
     lang: str | None
     translate: bool
+    translate_to: list[str]
     native_segmentation: bool
     verbose: bool
     edit: bool

s2t-0.1.3.post1.dev1/src/s2t/recorder.py ADDED Viewed

@@ -0,0 +1,336 @@
+from __future__ import annotations
+import os
+import queue
+import select
+import sys
+import threading
+import time
+from pathlib import Path
+from typing import Any, Protocol, cast, runtime_checkable
+class Recorder:
+    def __init__(
+        self,
+        session_dir: Path,
+        samplerate: int,
+        channels: int,
+        ext: str,
+        debounce_ms: int = 0,
+        verbose: bool = False,
+        pause_after_first_chunk: bool = False,
+        resume_event: threading.Event | None = None,
+    ) -> None:
+        self.session_dir = session_dir
+        self.samplerate = samplerate
+        self.channels = channels
+        self.ext = ext
+        self.debounce_ms = max(0, int(debounce_ms))
+        self.verbose = verbose
+        self.pause_after_first_chunk = pause_after_first_chunk
+        self.resume_event = resume_event
+        self._paused = False
+    def run(
+        self,
+        tx_queue: queue.Queue[tuple[int, Path, int, float]],
+    ) -> tuple[list[Path], list[int], list[float]]:
+        import platform
+        import termios
+        import tty
+        try:
+            import sounddevice as sd
+            import soundfile as sf
+        except Exception as e:
+            raise RuntimeError("sounddevice/soundfile required for recording.") from e
+        evt_q: queue.Queue[str] = queue.Queue()
+        # Control queue is separate from audio frames to avoid control backpressure.
+        ctrl_q: queue.Queue[str] = queue.Queue()
+        stop_evt = threading.Event()
+        def key_reader() -> None:
+            try:
+                if platform.system() == "Windows":
+                    import msvcrt
+                    @runtime_checkable
+                    class _MSVCRT(Protocol):
+                        def kbhit(self) -> int: ...
+                        def getwch(self) -> str: ...
+                    ms = cast(_MSVCRT, msvcrt)
+                    last_space = 0.0
+                    if self.verbose:
+                        print("[key] using msvcrt (Windows)", file=sys.stderr)
+                    while not stop_evt.is_set():
+                        if ms.kbhit():
+                            ch = ms.getwch()
+                            if ch in ("\r", "\n"):
+                                if self.verbose:
+                                    print("[key] ENTER", file=sys.stderr)
+                                evt_q.put("ENTER")
+                                break
+                            if ch == " ":
+                                now = time.perf_counter()
+                                if self.debounce_ms and (now - last_space) < (
+                                    self.debounce_ms / 1000.0
+                                ):
+                                    continue
+                                last_space = now
+                                if self.verbose:
+                                    print("[key] SPACE", file=sys.stderr)
+                                evt_q.put("SPACE")
+                        time.sleep(0.01)
+                else:
+                    # Prefer sys.stdin when it's a TTY (original, proven path). If not a TTY, try /dev/tty, else fallback to stdin line reads.
+                    try:
+                        if sys.stdin.isatty():
+                            fd = sys.stdin.fileno()
+                            if self.verbose:
+                                print("[key] using sys.stdin (isatty, fd read)", file=sys.stderr)
+                            old = termios.tcgetattr(fd)
+                            tty.setcbreak(fd)
+                            last_space = 0.0
+                            try:
+                                while not stop_evt.is_set():
+                                    r, _, _ = select.select([fd], [], [], 0.05)
+                                    if r:
+                                        try:
+                                            ch_b = os.read(fd, 1)
+                                        except BlockingIOError:
+                                            continue
+                                        if not ch_b:
+                                            continue
+                                        ch = ch_b.decode(errors="ignore")
+                                        if ch in ("\n", "\r"):
+                                            if self.verbose:
+                                                print("[key] ENTER", file=sys.stderr)
+                                            evt_q.put("ENTER")
+                                            break
+                                        if ch == " ":
+                                            now = time.perf_counter()
+                                            if self.debounce_ms and (now - last_space) < (
+                                                self.debounce_ms / 1000.0
+                                            ):
+                                                continue
+                                            last_space = now
+                                            if self.verbose:
+                                                print("[key] SPACE", file=sys.stderr)
+                                            evt_q.put("SPACE")
+                            finally:
+                                termios.tcsetattr(fd, termios.TCSADRAIN, old)
+                        else:
+                            # Try /dev/tty when stdin is not a TTY
+                            using_devtty = False
+                            fd = None
+                            try:
+                                fd = os.open("/dev/tty", os.O_RDONLY)
+                                using_devtty = True
+                                if self.verbose:
+                                    print("[key] using /dev/tty (stdin not TTY)", file=sys.stderr)
+                                old = termios.tcgetattr(fd)
+                                tty.setcbreak(fd)
+                                last_space = 0.0
+                                try:
+                                    while not stop_evt.is_set():
+                                        r, _, _ = select.select([fd], [], [], 0.05)
+                                        if r:
+                                            ch_b = os.read(fd, 1)
+                                            if not ch_b:
+                                                continue
+                                            ch = ch_b.decode(errors="ignore")
+                                            if ch in ("\n", "\r"):
+                                                if self.verbose:
+                                                    print("[key] ENTER", file=sys.stderr)
+                                                evt_q.put("ENTER")
+                                                break
+                                            if ch == " ":
+                                                now = time.perf_counter()
+                                                if self.debounce_ms and (now - last_space) < (
+                                                    self.debounce_ms / 1000.0
+                                                ):
+                                                    continue
+                                                last_space = now
+                                                if self.verbose:
+                                                    print("[key] SPACE", file=sys.stderr)
+                                                evt_q.put("SPACE")
+                                finally:
+                                    termios.tcsetattr(fd, termios.TCSADRAIN, old)
+                            except Exception:
+                                if using_devtty and fd is not None:
+                                    try:
+                                        os.close(fd)
+                                    except Exception:
+                                        pass
+                                print(
+                                    "Warning: no TTY for key input; falling back to stdin line mode.",
+                                    file=sys.stderr,
+                                )
+                                # Last resort: line-buffered stdin; Enter will still end.
+                                while not stop_evt.is_set():
+                                    line = sys.stdin.readline()
+                                    if not line:
+                                        time.sleep(0.05)
+                                        continue
+                                    # If user hits Enter on empty line, treat as ENTER
+                                    if line == "\n" or line == "\r\n":
+                                        if self.verbose:
+                                            print("[key] ENTER (line mode)", file=sys.stderr)
+                                        evt_q.put("ENTER")
+                                        break
+                                    # If first non-empty char is space, treat as SPACE
+                                    if line and line[0] == " ":
+                                        if self.verbose:
+                                            print("[key] SPACE (line mode)", file=sys.stderr)
+                                        evt_q.put("SPACE")
+                    except Exception as e:
+                        print(f"Warning: key reader failed: {e}", file=sys.stderr)
+            except Exception as e:
+                # Log unexpected key reader errors to aid debugging, but keep recording running.
+                print(f"Warning: key reader stopped unexpectedly: {e}", file=sys.stderr)
+        audio_q: queue.Queue[tuple[str, Any]] = queue.Queue(maxsize=128)
+        chunk_index = 1
+        chunk_paths: list[Path] = []
+        chunk_frames: list[int] = []
+        chunk_offsets: list[float] = []
+        offset_seconds_total = 0.0
+        def writer_fn() -> None:
+            nonlocal chunk_index, offset_seconds_total
+            frames_written = 0
+            cur_path = self.session_dir / f"chunk_{chunk_index:04d}{self.ext}"
+            fh = sf.SoundFile(
+                str(cur_path), mode="w", samplerate=self.samplerate, channels=self.channels
+            )
+            while True:
+                # First, handle any pending control commands so SPACE/ENTER are never blocked by frames backlog.
+                try:
+                    while True:
+                        cmd = ctrl_q.get_nowait()
+                        if cmd == "split":
+                            fh.flush()
+                            fh.close()
+                            if frames_written > 0:
+                                dur = frames_written / float(self.samplerate)
+                                chunk_paths.append(cur_path)
+                                chunk_frames.append(frames_written)
+                                chunk_offsets.append(offset_seconds_total)
+                                offset_seconds_total += dur
+                                if self.verbose:
+                                    print(
+                                        f"Saved chunk: {cur_path.name} ({dur:.2f}s)",
+                                        file=sys.stderr,
+                                    )
+                                tx_queue.put(
+                                    (chunk_index, cur_path, frames_written, chunk_offsets[-1])
+                                )
+                            else:
+                                try:
+                                    cur_path.unlink(missing_ok=True)
+                                except Exception:
+                                    pass
+                            frames_written = 0
+                            chunk_index += 1
+                            if (
+                                self.pause_after_first_chunk
+                                and chunk_index == 2
+                                and self.resume_event is not None
+                            ):
+                                self._paused = True
+                                self.resume_event.wait()
+                                self._paused = False
+                            cur_path = self.session_dir / f"chunk_{chunk_index:04d}{self.ext}"
+                            fh = sf.SoundFile(
+                                str(cur_path),
+                                mode="w",
+                                samplerate=self.samplerate,
+                                channels=self.channels,
+                            )
+                        elif cmd == "finish":
+                            fh.flush()
+                            fh.close()
+                            if frames_written > 0:
+                                dur = frames_written / float(self.samplerate)
+                                chunk_paths.append(cur_path)
+                                chunk_frames.append(frames_written)
+                                chunk_offsets.append(offset_seconds_total)
+                                offset_seconds_total += dur
+                                if self.verbose:
+                                    print(
+                                        f"Saved chunk: {cur_path.name} ({dur:.2f}s)",
+                                        file=sys.stderr,
+                                    )
+                                tx_queue.put(
+                                    (chunk_index, cur_path, frames_written, chunk_offsets[-1])
+                                )
+                            else:
+                                try:
+                                    cur_path.unlink(missing_ok=True)
+                                except Exception:
+                                    pass
+                            tx_queue.put((-1, Path(), 0, 0.0))
+                            return
+                except queue.Empty:
+                    pass
+                # Then, write frames if available; short timeout to re-check control queue regularly.
+                try:
+                    kind, payload = audio_q.get(timeout=0.05)
+                except queue.Empty:
+                    continue
+                if kind == "frames":
+                    data = payload
+                    fh.write(data)
+                    frames_written += len(data)
+            tx_queue.put((-1, Path(), 0, 0.0))
+        # Timestamp of last dropped-frame warning (throttling for verbose mode)
+        last_drop_log = 0.0
+        def cb(indata: Any, frames: int, time_info: Any, status: Any) -> None:
+            nonlocal last_drop_log
+            if status:
+                print(status, file=sys.stderr)
+            if not self._paused:
+                try:
+                    audio_q.put_nowait(("frames", indata.copy()))
+                except queue.Full:
+                    # Drop frame if the queue is saturated; throttle warnings.
+                    now = time.perf_counter()
+                    if self.verbose and (now - last_drop_log) > 1.0:
+                        print(
+                            "Warning: audio queue full; dropping input frames.",
+                            file=sys.stderr,
+                        )
+                        last_drop_log = now
+        key_t = threading.Thread(target=key_reader, daemon=True)
+        writer_t = threading.Thread(target=writer_fn, daemon=True)
+        key_t.start()
+        writer_t.start()
+        print("Recording… Press SPACE to split, Enter to finish.")
+        print("—" * 60)
+        print("")
+        import sounddevice as sd
+        with sd.InputStream(samplerate=self.samplerate, channels=self.channels, callback=cb):
+            while True:
+                try:
+                    evt = evt_q.get(timeout=0.05)
+                except queue.Empty:
+                    continue
+                if evt == "SPACE":
+                    ctrl_q.put("split")
+                elif evt == "ENTER":
+                    ctrl_q.put("finish")
+                    break
+        writer_t.join()
+        return chunk_paths, chunk_frames, chunk_offsets

s2t-0.1.3.post1.dev1/src/s2t/translator/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from __future__ import annotations
+from .argos_backend import ArgosTranslator, ensure_packages_background, translate_result_segments
+__all__ = [
+    "ArgosTranslator",
+    "ensure_packages_background",
+    "translate_result_segments",
+]

s2t 0.1.2__tar.gz → 0.1.3.post1.dev1__tar.gz

s2t 0.1.2tar.gz → 0.1.3.post1.dev1tar.gz