s2t 0.1.2__tar.gz → 0.1.3.post1.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/.gitignore +1 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/Makefile +3 -3
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/PKG-INFO +3 -1
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/pyproject.toml +4 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/cli.py +90 -2
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/config.py +1 -0
- s2t-0.1.3.post1.dev1/src/s2t/recorder.py +336 -0
- s2t-0.1.3.post1.dev1/src/s2t/translator/__init__.py +9 -0
- s2t-0.1.3.post1.dev1/src/s2t/translator/argos_backend.py +472 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/types.py +3 -1
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/whisper_engine.py +9 -2
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t.egg-info/PKG-INFO +3 -1
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t.egg-info/SOURCES.txt +3 -1
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t.egg-info/requires.txt +3 -0
- s2t-0.1.2/src/s2t/recorder.py +0 -205
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/.pre-commit-config.yaml +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/AGENTS.md +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/CONTRIBUTING.md +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/MANIFEST.in +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/README.md +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/docs/RELEASING.md +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/docs/SESSION_STATE.md +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/scripts/bench_transcribe.py +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/setup.cfg +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/__init__.py +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/outputs.py +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/py.typed +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t/utils.py +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t.egg-info/dependency_links.txt +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t.egg-info/entry_points.txt +0 -0
- {s2t-0.1.2 → s2t-0.1.3.post1.dev1}/src/s2t.egg-info/top_level.txt +0 -0
@@ -117,8 +117,8 @@ precommit-install: guard-venv ensure-dev
|
|
117
117
|
pre-commit install --install-hooks
|
118
118
|
|
119
119
|
guard-venv:
|
120
|
-
@if [ -n "$$VIRTUAL_ENV" ] && [ "$$VIRTUAL_ENV" != "$$PWD/.
|
121
|
-
echo "Error: active venv ($$VIRTUAL_ENV) differs from project .venv ($$PWD/.
|
122
|
-
echo "Please 'deactivate' or use the project venv (.
|
120
|
+
@if [ -n "$$VIRTUAL_ENV" ] && [ "$$VIRTUAL_ENV" != "$$PWD/.venv312" ]; then \
|
121
|
+
echo "Error: active venv ($$VIRTUAL_ENV) differs from project .venv ($$PWD/.venv312)."; \
|
122
|
+
echo "Please 'deactivate' or use the project venv (.venv312)."; \
|
123
123
|
exit 1; \
|
124
124
|
fi
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: s2t
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3.post1.dev1
|
4
4
|
Summary: Speech to Text (s2t): Record audio, run Whisper, export formats, and copy transcript to clipboard.
|
5
5
|
Author: Maintainers
|
6
6
|
License-Expression: LicenseRef-Proprietary
|
@@ -23,6 +23,8 @@ Requires-Dist: mypy>=1.7; extra == "dev"
|
|
23
23
|
Requires-Dist: build>=1; extra == "dev"
|
24
24
|
Requires-Dist: setuptools-scm>=8; extra == "dev"
|
25
25
|
Requires-Dist: twine>=4; extra == "dev"
|
26
|
+
Provides-Extra: translate
|
27
|
+
Requires-Dist: argostranslate>=1.9.0; extra == "translate"
|
26
28
|
|
27
29
|
# s2t
|
28
30
|
|
@@ -40,6 +40,11 @@ from . import __version__
|
|
40
40
|
from .config import SessionOptions
|
41
41
|
from .outputs import concat_audio, write_final_outputs
|
42
42
|
from .recorder import Recorder
|
43
|
+
from .translator.argos_backend import (
|
44
|
+
ArgosTranslator,
|
45
|
+
ensure_packages_background,
|
46
|
+
translate_result_segments,
|
47
|
+
)
|
43
48
|
from .types import TranscriptionResult
|
44
49
|
from .utils import (
|
45
50
|
convert_wav_to_mp3,
|
@@ -62,7 +67,7 @@ def run_session(opts: SessionOptions) -> int:
|
|
62
67
|
|
63
68
|
engine = WhisperEngine(
|
64
69
|
model_name=opts.model,
|
65
|
-
translate=
|
70
|
+
translate=False, # translation handled as post-processing
|
66
71
|
language=opts.lang,
|
67
72
|
native_segmentation=opts.native_segmentation,
|
68
73
|
session_dir=session_dir,
|
@@ -73,6 +78,27 @@ def run_session(opts: SessionOptions) -> int:
|
|
73
78
|
)
|
74
79
|
ex, fut = engine.preload()
|
75
80
|
|
81
|
+
# Determine translation target languages from options
|
82
|
+
target_langs: list[str] = []
|
83
|
+
if opts.translate_to:
|
84
|
+
target_langs = list(dict.fromkeys([s.strip().lower() for s in opts.translate_to if s]))
|
85
|
+
elif opts.translate:
|
86
|
+
target_langs = ["en"]
|
87
|
+
|
88
|
+
# Background auto-install/update Argos packages as early as possible
|
89
|
+
detected_lang: dict[str, str | None] = {"code": None}
|
90
|
+
detected_lang_event = threading.Event()
|
91
|
+
translator: ArgosTranslator | None = None
|
92
|
+
if target_langs:
|
93
|
+
translator = ArgosTranslator(verbose=opts.verbose)
|
94
|
+
ensure_packages_background(
|
95
|
+
translator,
|
96
|
+
src_lang_hint=(opts.lang.lower() if opts.lang else None),
|
97
|
+
target_langs=target_langs,
|
98
|
+
detected_lang_event=detected_lang_event,
|
99
|
+
detected_lang_holder=detected_lang,
|
100
|
+
)
|
101
|
+
|
76
102
|
tx_q: queue.Queue[tuple[int, Path, int, float]] = queue.Queue()
|
77
103
|
cumulative_text = ""
|
78
104
|
next_to_emit = 1
|
@@ -134,6 +160,12 @@ def run_session(opts: SessionOptions) -> int:
|
|
134
160
|
# Build latest-ready prompt based on already finished chunks
|
135
161
|
prompt = _build_latest_ready_prompt(idx, finished_texts)
|
136
162
|
res = engine.transcribe_chunk(model, path, frames, initial_prompt=prompt)
|
163
|
+
# Record detected language once (for translator preload if needed)
|
164
|
+
if target_langs and detected_lang["code"] is None:
|
165
|
+
lang_code = str(res.get("language") or "").strip().lower()
|
166
|
+
if lang_code:
|
167
|
+
detected_lang["code"] = lang_code
|
168
|
+
detected_lang_event.set()
|
137
169
|
engine.write_chunk_outputs(res, path)
|
138
170
|
text_i = (res.get("text", "") or "").strip()
|
139
171
|
with agg_lock:
|
@@ -260,6 +292,55 @@ def run_session(opts: SessionOptions) -> int:
|
|
260
292
|
print("=" * 60)
|
261
293
|
print(text_final.rstrip("\n"))
|
262
294
|
|
295
|
+
# Post-processing: translate outputs for requested target languages
|
296
|
+
if target_langs and translator is not None:
|
297
|
+
# Decide source language: CLI hint takes precedence; else detected; else skip with warning
|
298
|
+
src_lang = (opts.lang.lower() if opts.lang else (detected_lang["code"] or "")).strip()
|
299
|
+
if not src_lang:
|
300
|
+
if opts.verbose:
|
301
|
+
print(
|
302
|
+
"Warning: Could not determine source language for translation; skipping post-translation.",
|
303
|
+
file=sys.stderr,
|
304
|
+
)
|
305
|
+
else:
|
306
|
+
# Skip identical language targets
|
307
|
+
effective_targets = [t for t in target_langs if t.lower() != src_lang.lower()]
|
308
|
+
# Ensure required packages if missing; perform synchronous install as needed
|
309
|
+
for tgt in effective_targets:
|
310
|
+
if not translator.has_package(src_lang, tgt):
|
311
|
+
print(
|
312
|
+
f"Ensuring Argos translation package for '{src_lang}->{tgt}' (may download 50–250 MB)…",
|
313
|
+
file=sys.stderr,
|
314
|
+
)
|
315
|
+
ok = False
|
316
|
+
try:
|
317
|
+
ok = translator.ensure_package(src_lang, tgt)
|
318
|
+
except Exception as e:
|
319
|
+
print(
|
320
|
+
f"Warning: could not install '{src_lang}->{tgt}' package: {e}",
|
321
|
+
file=sys.stderr,
|
322
|
+
)
|
323
|
+
if not ok and not translator.has_package(src_lang, tgt):
|
324
|
+
print(
|
325
|
+
f"Warning: translation package unavailable or failed for '{src_lang}->{tgt}'. Skipping.",
|
326
|
+
file=sys.stderr,
|
327
|
+
)
|
328
|
+
continue
|
329
|
+
try:
|
330
|
+
translated = translate_result_segments(translator, merged, src_lang, tgt)
|
331
|
+
# Write translated outputs with language suffix by passing a suffixed base path
|
332
|
+
suffixed = base_audio_path.with_name(
|
333
|
+
f"{base_audio_path.stem}.{tgt}{base_audio_path.suffix}"
|
334
|
+
)
|
335
|
+
write_final_outputs(translated, session_dir, suffixed)
|
336
|
+
if opts.verbose:
|
337
|
+
print(f"Created translated outputs for '{tgt}'.", file=sys.stderr)
|
338
|
+
except Exception as e:
|
339
|
+
print(
|
340
|
+
f"Warning: failed to translate to '{tgt}': {e}",
|
341
|
+
file=sys.stderr,
|
342
|
+
)
|
343
|
+
|
263
344
|
if opts.profile:
|
264
345
|
try:
|
265
346
|
prof_path = session_dir / "profile.json"
|
@@ -329,7 +410,13 @@ def main(argv: list[str] | None = None) -> int:
|
|
329
410
|
"-t",
|
330
411
|
"--translate",
|
331
412
|
action="store_true",
|
332
|
-
help="
|
413
|
+
help="After transcription, translate all outputs to English (post-processing)",
|
414
|
+
)
|
415
|
+
parser.add_argument(
|
416
|
+
"--translate-to",
|
417
|
+
action="append",
|
418
|
+
default=None,
|
419
|
+
help="After transcription, translate all outputs to the given language (can be repeated)",
|
333
420
|
)
|
334
421
|
parser.add_argument(
|
335
422
|
"-v",
|
@@ -404,6 +491,7 @@ def main(argv: list[str] | None = None) -> int:
|
|
404
491
|
model=args.model,
|
405
492
|
lang=args.lang,
|
406
493
|
translate=args.translate,
|
494
|
+
translate_to=(args.translate_to or []),
|
407
495
|
native_segmentation=getattr(args, "native_segmentation", False),
|
408
496
|
verbose=args.verbose,
|
409
497
|
edit=args.edit,
|
@@ -0,0 +1,336 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
import queue
|
5
|
+
import select
|
6
|
+
import sys
|
7
|
+
import threading
|
8
|
+
import time
|
9
|
+
from pathlib import Path
|
10
|
+
from typing import Any, Protocol, cast, runtime_checkable
|
11
|
+
|
12
|
+
|
13
|
+
class Recorder:
|
14
|
+
def __init__(
|
15
|
+
self,
|
16
|
+
session_dir: Path,
|
17
|
+
samplerate: int,
|
18
|
+
channels: int,
|
19
|
+
ext: str,
|
20
|
+
debounce_ms: int = 0,
|
21
|
+
verbose: bool = False,
|
22
|
+
pause_after_first_chunk: bool = False,
|
23
|
+
resume_event: threading.Event | None = None,
|
24
|
+
) -> None:
|
25
|
+
self.session_dir = session_dir
|
26
|
+
self.samplerate = samplerate
|
27
|
+
self.channels = channels
|
28
|
+
self.ext = ext
|
29
|
+
self.debounce_ms = max(0, int(debounce_ms))
|
30
|
+
self.verbose = verbose
|
31
|
+
self.pause_after_first_chunk = pause_after_first_chunk
|
32
|
+
self.resume_event = resume_event
|
33
|
+
self._paused = False
|
34
|
+
|
35
|
+
def run(
|
36
|
+
self,
|
37
|
+
tx_queue: queue.Queue[tuple[int, Path, int, float]],
|
38
|
+
) -> tuple[list[Path], list[int], list[float]]:
|
39
|
+
import platform
|
40
|
+
import termios
|
41
|
+
import tty
|
42
|
+
|
43
|
+
try:
|
44
|
+
import sounddevice as sd
|
45
|
+
import soundfile as sf
|
46
|
+
except Exception as e:
|
47
|
+
raise RuntimeError("sounddevice/soundfile required for recording.") from e
|
48
|
+
|
49
|
+
evt_q: queue.Queue[str] = queue.Queue()
|
50
|
+
# Control queue is separate from audio frames to avoid control backpressure.
|
51
|
+
ctrl_q: queue.Queue[str] = queue.Queue()
|
52
|
+
stop_evt = threading.Event()
|
53
|
+
|
54
|
+
def key_reader() -> None:
|
55
|
+
try:
|
56
|
+
if platform.system() == "Windows":
|
57
|
+
import msvcrt
|
58
|
+
|
59
|
+
@runtime_checkable
|
60
|
+
class _MSVCRT(Protocol):
|
61
|
+
def kbhit(self) -> int: ...
|
62
|
+
def getwch(self) -> str: ...
|
63
|
+
|
64
|
+
ms = cast(_MSVCRT, msvcrt)
|
65
|
+
|
66
|
+
last_space = 0.0
|
67
|
+
if self.verbose:
|
68
|
+
print("[key] using msvcrt (Windows)", file=sys.stderr)
|
69
|
+
while not stop_evt.is_set():
|
70
|
+
if ms.kbhit():
|
71
|
+
ch = ms.getwch()
|
72
|
+
if ch in ("\r", "\n"):
|
73
|
+
if self.verbose:
|
74
|
+
print("[key] ENTER", file=sys.stderr)
|
75
|
+
evt_q.put("ENTER")
|
76
|
+
break
|
77
|
+
if ch == " ":
|
78
|
+
now = time.perf_counter()
|
79
|
+
if self.debounce_ms and (now - last_space) < (
|
80
|
+
self.debounce_ms / 1000.0
|
81
|
+
):
|
82
|
+
continue
|
83
|
+
last_space = now
|
84
|
+
if self.verbose:
|
85
|
+
print("[key] SPACE", file=sys.stderr)
|
86
|
+
evt_q.put("SPACE")
|
87
|
+
time.sleep(0.01)
|
88
|
+
else:
|
89
|
+
# Prefer sys.stdin when it's a TTY (original, proven path). If not a TTY, try /dev/tty, else fallback to stdin line reads.
|
90
|
+
try:
|
91
|
+
if sys.stdin.isatty():
|
92
|
+
fd = sys.stdin.fileno()
|
93
|
+
if self.verbose:
|
94
|
+
print("[key] using sys.stdin (isatty, fd read)", file=sys.stderr)
|
95
|
+
old = termios.tcgetattr(fd)
|
96
|
+
tty.setcbreak(fd)
|
97
|
+
last_space = 0.0
|
98
|
+
try:
|
99
|
+
while not stop_evt.is_set():
|
100
|
+
r, _, _ = select.select([fd], [], [], 0.05)
|
101
|
+
if r:
|
102
|
+
try:
|
103
|
+
ch_b = os.read(fd, 1)
|
104
|
+
except BlockingIOError:
|
105
|
+
continue
|
106
|
+
if not ch_b:
|
107
|
+
continue
|
108
|
+
ch = ch_b.decode(errors="ignore")
|
109
|
+
if ch in ("\n", "\r"):
|
110
|
+
if self.verbose:
|
111
|
+
print("[key] ENTER", file=sys.stderr)
|
112
|
+
evt_q.put("ENTER")
|
113
|
+
break
|
114
|
+
if ch == " ":
|
115
|
+
now = time.perf_counter()
|
116
|
+
if self.debounce_ms and (now - last_space) < (
|
117
|
+
self.debounce_ms / 1000.0
|
118
|
+
):
|
119
|
+
continue
|
120
|
+
last_space = now
|
121
|
+
if self.verbose:
|
122
|
+
print("[key] SPACE", file=sys.stderr)
|
123
|
+
evt_q.put("SPACE")
|
124
|
+
finally:
|
125
|
+
termios.tcsetattr(fd, termios.TCSADRAIN, old)
|
126
|
+
else:
|
127
|
+
# Try /dev/tty when stdin is not a TTY
|
128
|
+
using_devtty = False
|
129
|
+
fd = None
|
130
|
+
try:
|
131
|
+
fd = os.open("/dev/tty", os.O_RDONLY)
|
132
|
+
using_devtty = True
|
133
|
+
if self.verbose:
|
134
|
+
print("[key] using /dev/tty (stdin not TTY)", file=sys.stderr)
|
135
|
+
old = termios.tcgetattr(fd)
|
136
|
+
tty.setcbreak(fd)
|
137
|
+
last_space = 0.0
|
138
|
+
try:
|
139
|
+
while not stop_evt.is_set():
|
140
|
+
r, _, _ = select.select([fd], [], [], 0.05)
|
141
|
+
if r:
|
142
|
+
ch_b = os.read(fd, 1)
|
143
|
+
if not ch_b:
|
144
|
+
continue
|
145
|
+
ch = ch_b.decode(errors="ignore")
|
146
|
+
if ch in ("\n", "\r"):
|
147
|
+
if self.verbose:
|
148
|
+
print("[key] ENTER", file=sys.stderr)
|
149
|
+
evt_q.put("ENTER")
|
150
|
+
break
|
151
|
+
if ch == " ":
|
152
|
+
now = time.perf_counter()
|
153
|
+
if self.debounce_ms and (now - last_space) < (
|
154
|
+
self.debounce_ms / 1000.0
|
155
|
+
):
|
156
|
+
continue
|
157
|
+
last_space = now
|
158
|
+
if self.verbose:
|
159
|
+
print("[key] SPACE", file=sys.stderr)
|
160
|
+
evt_q.put("SPACE")
|
161
|
+
finally:
|
162
|
+
termios.tcsetattr(fd, termios.TCSADRAIN, old)
|
163
|
+
except Exception:
|
164
|
+
if using_devtty and fd is not None:
|
165
|
+
try:
|
166
|
+
os.close(fd)
|
167
|
+
except Exception:
|
168
|
+
pass
|
169
|
+
print(
|
170
|
+
"Warning: no TTY for key input; falling back to stdin line mode.",
|
171
|
+
file=sys.stderr,
|
172
|
+
)
|
173
|
+
# Last resort: line-buffered stdin; Enter will still end.
|
174
|
+
while not stop_evt.is_set():
|
175
|
+
line = sys.stdin.readline()
|
176
|
+
if not line:
|
177
|
+
time.sleep(0.05)
|
178
|
+
continue
|
179
|
+
# If user hits Enter on empty line, treat as ENTER
|
180
|
+
if line == "\n" or line == "\r\n":
|
181
|
+
if self.verbose:
|
182
|
+
print("[key] ENTER (line mode)", file=sys.stderr)
|
183
|
+
evt_q.put("ENTER")
|
184
|
+
break
|
185
|
+
# If first non-empty char is space, treat as SPACE
|
186
|
+
if line and line[0] == " ":
|
187
|
+
if self.verbose:
|
188
|
+
print("[key] SPACE (line mode)", file=sys.stderr)
|
189
|
+
evt_q.put("SPACE")
|
190
|
+
except Exception as e:
|
191
|
+
print(f"Warning: key reader failed: {e}", file=sys.stderr)
|
192
|
+
|
193
|
+
except Exception as e:
|
194
|
+
# Log unexpected key reader errors to aid debugging, but keep recording running.
|
195
|
+
print(f"Warning: key reader stopped unexpectedly: {e}", file=sys.stderr)
|
196
|
+
|
197
|
+
audio_q: queue.Queue[tuple[str, Any]] = queue.Queue(maxsize=128)
|
198
|
+
chunk_index = 1
|
199
|
+
chunk_paths: list[Path] = []
|
200
|
+
chunk_frames: list[int] = []
|
201
|
+
chunk_offsets: list[float] = []
|
202
|
+
offset_seconds_total = 0.0
|
203
|
+
|
204
|
+
def writer_fn() -> None:
|
205
|
+
nonlocal chunk_index, offset_seconds_total
|
206
|
+
frames_written = 0
|
207
|
+
cur_path = self.session_dir / f"chunk_{chunk_index:04d}{self.ext}"
|
208
|
+
fh = sf.SoundFile(
|
209
|
+
str(cur_path), mode="w", samplerate=self.samplerate, channels=self.channels
|
210
|
+
)
|
211
|
+
while True:
|
212
|
+
# First, handle any pending control commands so SPACE/ENTER are never blocked by frames backlog.
|
213
|
+
try:
|
214
|
+
while True:
|
215
|
+
cmd = ctrl_q.get_nowait()
|
216
|
+
if cmd == "split":
|
217
|
+
fh.flush()
|
218
|
+
fh.close()
|
219
|
+
if frames_written > 0:
|
220
|
+
dur = frames_written / float(self.samplerate)
|
221
|
+
chunk_paths.append(cur_path)
|
222
|
+
chunk_frames.append(frames_written)
|
223
|
+
chunk_offsets.append(offset_seconds_total)
|
224
|
+
offset_seconds_total += dur
|
225
|
+
if self.verbose:
|
226
|
+
print(
|
227
|
+
f"Saved chunk: {cur_path.name} ({dur:.2f}s)",
|
228
|
+
file=sys.stderr,
|
229
|
+
)
|
230
|
+
tx_queue.put(
|
231
|
+
(chunk_index, cur_path, frames_written, chunk_offsets[-1])
|
232
|
+
)
|
233
|
+
else:
|
234
|
+
try:
|
235
|
+
cur_path.unlink(missing_ok=True)
|
236
|
+
except Exception:
|
237
|
+
pass
|
238
|
+
frames_written = 0
|
239
|
+
chunk_index += 1
|
240
|
+
if (
|
241
|
+
self.pause_after_first_chunk
|
242
|
+
and chunk_index == 2
|
243
|
+
and self.resume_event is not None
|
244
|
+
):
|
245
|
+
self._paused = True
|
246
|
+
self.resume_event.wait()
|
247
|
+
self._paused = False
|
248
|
+
cur_path = self.session_dir / f"chunk_{chunk_index:04d}{self.ext}"
|
249
|
+
fh = sf.SoundFile(
|
250
|
+
str(cur_path),
|
251
|
+
mode="w",
|
252
|
+
samplerate=self.samplerate,
|
253
|
+
channels=self.channels,
|
254
|
+
)
|
255
|
+
elif cmd == "finish":
|
256
|
+
fh.flush()
|
257
|
+
fh.close()
|
258
|
+
if frames_written > 0:
|
259
|
+
dur = frames_written / float(self.samplerate)
|
260
|
+
chunk_paths.append(cur_path)
|
261
|
+
chunk_frames.append(frames_written)
|
262
|
+
chunk_offsets.append(offset_seconds_total)
|
263
|
+
offset_seconds_total += dur
|
264
|
+
if self.verbose:
|
265
|
+
print(
|
266
|
+
f"Saved chunk: {cur_path.name} ({dur:.2f}s)",
|
267
|
+
file=sys.stderr,
|
268
|
+
)
|
269
|
+
tx_queue.put(
|
270
|
+
(chunk_index, cur_path, frames_written, chunk_offsets[-1])
|
271
|
+
)
|
272
|
+
else:
|
273
|
+
try:
|
274
|
+
cur_path.unlink(missing_ok=True)
|
275
|
+
except Exception:
|
276
|
+
pass
|
277
|
+
tx_queue.put((-1, Path(), 0, 0.0))
|
278
|
+
return
|
279
|
+
except queue.Empty:
|
280
|
+
pass
|
281
|
+
|
282
|
+
# Then, write frames if available; short timeout to re-check control queue regularly.
|
283
|
+
try:
|
284
|
+
kind, payload = audio_q.get(timeout=0.05)
|
285
|
+
except queue.Empty:
|
286
|
+
continue
|
287
|
+
if kind == "frames":
|
288
|
+
data = payload
|
289
|
+
fh.write(data)
|
290
|
+
frames_written += len(data)
|
291
|
+
tx_queue.put((-1, Path(), 0, 0.0))
|
292
|
+
|
293
|
+
# Timestamp of last dropped-frame warning (throttling for verbose mode)
|
294
|
+
last_drop_log = 0.0
|
295
|
+
|
296
|
+
def cb(indata: Any, frames: int, time_info: Any, status: Any) -> None:
|
297
|
+
nonlocal last_drop_log
|
298
|
+
if status:
|
299
|
+
print(status, file=sys.stderr)
|
300
|
+
if not self._paused:
|
301
|
+
try:
|
302
|
+
audio_q.put_nowait(("frames", indata.copy()))
|
303
|
+
except queue.Full:
|
304
|
+
# Drop frame if the queue is saturated; throttle warnings.
|
305
|
+
now = time.perf_counter()
|
306
|
+
if self.verbose and (now - last_drop_log) > 1.0:
|
307
|
+
print(
|
308
|
+
"Warning: audio queue full; dropping input frames.",
|
309
|
+
file=sys.stderr,
|
310
|
+
)
|
311
|
+
last_drop_log = now
|
312
|
+
|
313
|
+
key_t = threading.Thread(target=key_reader, daemon=True)
|
314
|
+
writer_t = threading.Thread(target=writer_fn, daemon=True)
|
315
|
+
key_t.start()
|
316
|
+
writer_t.start()
|
317
|
+
|
318
|
+
print("Recording… Press SPACE to split, Enter to finish.")
|
319
|
+
print("—" * 60)
|
320
|
+
print("")
|
321
|
+
|
322
|
+
import sounddevice as sd
|
323
|
+
|
324
|
+
with sd.InputStream(samplerate=self.samplerate, channels=self.channels, callback=cb):
|
325
|
+
while True:
|
326
|
+
try:
|
327
|
+
evt = evt_q.get(timeout=0.05)
|
328
|
+
except queue.Empty:
|
329
|
+
continue
|
330
|
+
if evt == "SPACE":
|
331
|
+
ctrl_q.put("split")
|
332
|
+
elif evt == "ENTER":
|
333
|
+
ctrl_q.put("finish")
|
334
|
+
break
|
335
|
+
writer_t.join()
|
336
|
+
return chunk_paths, chunk_frames, chunk_offsets
|