s2t 0.1.4__py3-none-any.whl → 0.1.6.post1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
s2t/cli.py CHANGED
@@ -213,6 +213,8 @@ def run_session(opts: SessionOptions) -> int:
213
213
  verbose=opts.verbose,
214
214
  pause_after_first_chunk=opts.prompt,
215
215
  resume_event=prompt_resume_event,
216
+ silence_sec=opts.silence_sec,
217
+ min_chunk_sec=opts.min_chunk_sec,
216
218
  )
217
219
  t0 = time.perf_counter()
218
220
  chunk_paths, chunk_frames, chunk_offsets = rec.run(tx_q)
@@ -441,6 +443,18 @@ def main(argv: list[str] | None = None) -> int:
441
443
  default=0,
442
444
  help="Debounce window for SPACE (ms). If >0, ignores rapid successive space presses",
443
445
  )
446
+ parser.add_argument(
447
+ "--silence-sec",
448
+ type=float,
449
+ default=1.0,
450
+ help="Auto-split when continuous silence >= this many seconds (0 disables)",
451
+ )
452
+ parser.add_argument(
453
+ "--min-chunk-sec",
454
+ type=float,
455
+ default=5.0,
456
+ help="Minimum duration a chunk must reach before auto-split can trigger",
457
+ )
444
458
  parser.add_argument(
445
459
  "--native-segmentation",
446
460
  action="store_true",
@@ -496,6 +510,8 @@ def main(argv: list[str] | None = None) -> int:
496
510
  verbose=args.verbose,
497
511
  edit=args.edit,
498
512
  debounce_ms=getattr(args, "debounce_ms", 0),
513
+ silence_sec=getattr(args, "silence_sec", 1.0),
514
+ min_chunk_sec=getattr(args, "min_chunk_sec", 5.0),
499
515
  profile=args.profile,
500
516
  keep_chunks=getattr(args, "keep_chunks", False),
501
517
  prompt=getattr(args, "prompt", False),
s2t/config.py CHANGED
@@ -18,6 +18,8 @@ class SessionOptions:
18
18
  verbose: bool
19
19
  edit: bool
20
20
  debounce_ms: int
21
+ silence_sec: float
22
+ min_chunk_sec: float
21
23
  profile: bool
22
24
  keep_chunks: bool
23
25
  prompt: bool
s2t/recorder.py CHANGED
@@ -9,6 +9,8 @@ import time
9
9
  from pathlib import Path
10
10
  from typing import Any, Protocol, cast, runtime_checkable
11
11
 
12
+ import numpy as np
13
+
12
14
 
13
15
  class Recorder:
14
16
  def __init__(
@@ -21,6 +23,8 @@ class Recorder:
21
23
  verbose: bool = False,
22
24
  pause_after_first_chunk: bool = False,
23
25
  resume_event: threading.Event | None = None,
26
+ silence_sec: float = 1.0,
27
+ min_chunk_sec: float = 5.0,
24
28
  ) -> None:
25
29
  self.session_dir = session_dir
26
30
  self.samplerate = samplerate
@@ -31,6 +35,9 @@ class Recorder:
31
35
  self.pause_after_first_chunk = pause_after_first_chunk
32
36
  self.resume_event = resume_event
33
37
  self._paused = False
38
+ # Auto-split config
39
+ self.silence_sec = max(0.0, float(silence_sec))
40
+ self.min_chunk_sec = max(0.0, float(min_chunk_sec))
34
41
 
35
42
  def run(
36
43
  self,
@@ -194,7 +201,8 @@ class Recorder:
194
201
  # Log unexpected key reader errors to aid debugging, but keep recording running.
195
202
  print(f"Warning: key reader stopped unexpectedly: {e}", file=sys.stderr)
196
203
 
197
- audio_q: queue.Queue[tuple[str, Any]] = queue.Queue(maxsize=128)
204
+ # Unbounded audio queue to avoid drops on slower machines; control signals are separate.
205
+ audio_q: queue.Queue[tuple[str, Any]] = queue.Queue()
198
206
  chunk_index = 1
199
207
  chunk_paths: list[Path] = []
200
208
  chunk_frames: list[int] = []
@@ -208,50 +216,65 @@ class Recorder:
208
216
  fh = sf.SoundFile(
209
217
  str(cur_path), mode="w", samplerate=self.samplerate, channels=self.channels
210
218
  )
219
+ # State for auto-split based on silence
220
+ silent_frames_run = 0
221
+ seen_non_silent = False
222
+ last_split_time = 0.0
223
+ # Internal thresholds
224
+ threshold_rms = 0.015 # conservative RMS threshold for float32 [-1,1]
225
+ split_cooldown_sec = 0.2
226
+
227
+ def _do_split() -> None:
228
+ nonlocal fh, frames_written, cur_path, chunk_index, offset_seconds_total
229
+ fh.flush()
230
+ fh.close()
231
+ if frames_written > 0:
232
+ dur = frames_written / float(self.samplerate)
233
+ chunk_paths.append(cur_path)
234
+ chunk_frames.append(frames_written)
235
+ chunk_offsets.append(offset_seconds_total)
236
+ offset_seconds_total += dur
237
+ if self.verbose:
238
+ print(
239
+ f"Saved chunk: {cur_path.name} ({dur:.2f}s)",
240
+ file=sys.stderr,
241
+ )
242
+ tx_queue.put((chunk_index, cur_path, frames_written, chunk_offsets[-1]))
243
+ else:
244
+ try:
245
+ cur_path.unlink(missing_ok=True)
246
+ except Exception:
247
+ pass
248
+ frames_written = 0
249
+ chunk_index += 1
250
+ if (
251
+ self.pause_after_first_chunk
252
+ and chunk_index == 2
253
+ and self.resume_event is not None
254
+ ):
255
+ self._paused = True
256
+ self.resume_event.wait()
257
+ self._paused = False
258
+ cur_path = self.session_dir / f"chunk_{chunk_index:04d}{self.ext}"
259
+ fh = sf.SoundFile(
260
+ str(cur_path),
261
+ mode="w",
262
+ samplerate=self.samplerate,
263
+ channels=self.channels,
264
+ )
265
+ # Reset silence tracking after a split
266
+ return
267
+
211
268
  while True:
212
269
  # First, handle any pending control commands so SPACE/ENTER are never blocked by frames backlog.
213
270
  try:
214
271
  while True:
215
272
  cmd = ctrl_q.get_nowait()
216
273
  if cmd == "split":
217
- fh.flush()
218
- fh.close()
219
- if frames_written > 0:
220
- dur = frames_written / float(self.samplerate)
221
- chunk_paths.append(cur_path)
222
- chunk_frames.append(frames_written)
223
- chunk_offsets.append(offset_seconds_total)
224
- offset_seconds_total += dur
225
- if self.verbose:
226
- print(
227
- f"Saved chunk: {cur_path.name} ({dur:.2f}s)",
228
- file=sys.stderr,
229
- )
230
- tx_queue.put(
231
- (chunk_index, cur_path, frames_written, chunk_offsets[-1])
232
- )
233
- else:
234
- try:
235
- cur_path.unlink(missing_ok=True)
236
- except Exception:
237
- pass
238
- frames_written = 0
239
- chunk_index += 1
240
- if (
241
- self.pause_after_first_chunk
242
- and chunk_index == 2
243
- and self.resume_event is not None
244
- ):
245
- self._paused = True
246
- self.resume_event.wait()
247
- self._paused = False
248
- cur_path = self.session_dir / f"chunk_{chunk_index:04d}{self.ext}"
249
- fh = sf.SoundFile(
250
- str(cur_path),
251
- mode="w",
252
- samplerate=self.samplerate,
253
- channels=self.channels,
254
- )
274
+ _do_split()
275
+ # Reset silence tracking on manual split
276
+ silent_frames_run = 0
277
+ seen_non_silent = False
255
278
  elif cmd == "finish":
256
279
  fh.flush()
257
280
  fh.close()
@@ -288,34 +311,67 @@ class Recorder:
288
311
  data = payload
289
312
  fh.write(data)
290
313
  frames_written += len(data)
291
- tx_queue.put((-1, Path(), 0, 0.0))
314
+ # Auto-split based on silence if enabled
315
+ if self.silence_sec > 0.0:
316
+ try:
317
+ arr = np.asarray(data, dtype=np.float32)
318
+ if arr.ndim == 2 and arr.shape[1] > 1:
319
+ # average channels
320
+ arr_mono = arr.mean(axis=1)
321
+ else:
322
+ arr_mono = arr.reshape(-1)
323
+ # compute RMS
324
+ rms = (
325
+ float(np.sqrt(np.mean(np.square(arr_mono))))
326
+ if arr_mono.size
327
+ else 0.0
328
+ )
329
+ except Exception:
330
+ rms = 0.0
292
331
 
293
- # Timestamp of last dropped-frame warning (throttling for verbose mode)
294
- last_drop_log = 0.0
332
+ if rms < threshold_rms:
333
+ silent_frames_run += len(arr_mono)
334
+ else:
335
+ silent_frames_run = 0
336
+ seen_non_silent = True
337
+
338
+ # Conditions to auto-split
339
+ enough_silence = silent_frames_run >= int(
340
+ self.samplerate * self.silence_sec
341
+ )
342
+ enough_length = frames_written >= int(self.samplerate * self.min_chunk_sec)
343
+ cooldown_ok = (time.perf_counter() - last_split_time) >= split_cooldown_sec
344
+ if enough_silence and enough_length and seen_non_silent and cooldown_ok:
345
+ if self.verbose:
346
+ print(
347
+ f"[auto] split (≥{self.silence_sec:.2f}s silence)",
348
+ file=sys.stderr,
349
+ )
350
+ last_split_time = time.perf_counter()
351
+ # Queue a split for the next control phase
352
+ ctrl_q.put("split")
353
+ # Reset silence tracking now to avoid cascaded triggers
354
+ silent_frames_run = 0
355
+ seen_non_silent = False
356
+ tx_queue.put((-1, Path(), 0, 0.0))
295
357
 
296
358
  def cb(indata: Any, frames: int, time_info: Any, status: Any) -> None:
297
- nonlocal last_drop_log
298
359
  if status:
299
360
  print(status, file=sys.stderr)
300
361
  if not self._paused:
301
- try:
302
- audio_q.put_nowait(("frames", indata.copy()))
303
- except queue.Full:
304
- # Drop frame if the queue is saturated; throttle warnings.
305
- now = time.perf_counter()
306
- if self.verbose and (now - last_drop_log) > 1.0:
307
- print(
308
- "Warning: audio queue full; dropping input frames.",
309
- file=sys.stderr,
310
- )
311
- last_drop_log = now
362
+ audio_q.put(("frames", indata.copy()))
312
363
 
313
364
  key_t = threading.Thread(target=key_reader, daemon=True)
314
365
  writer_t = threading.Thread(target=writer_fn, daemon=True)
315
366
  key_t.start()
316
367
  writer_t.start()
317
368
 
318
- print("Recording… Press SPACE to split, Enter to finish.")
369
+ msg = "Recording… Press SPACE to split, Enter to finish."
370
+ if self.silence_sec > 0.0:
371
+ msg += (
372
+ f" Auto-split on ≥{self.silence_sec:.2f}s silence (min {self.min_chunk_sec:.2f}s)."
373
+ )
374
+ print(msg)
319
375
  print("—" * 60)
320
376
  print("")
321
377
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: s2t
3
- Version: 0.1.4
3
+ Version: 0.1.6.post1.dev0
4
4
  Summary: Speech to Text (s2t): Record audio, run Whisper, export formats, and copy transcript to clipboard.
5
5
  Author: Maintainers
6
6
  License-Expression: LicenseRef-Proprietary
@@ -55,6 +55,8 @@ System requirements (Linux)
55
55
  - Translate to English: `-t` (long: `--translate`). You may still provide `--lang` as an input-language hint if you want.
56
56
  - List available models and exit: `-L` (long: `--list-models`)
57
57
  - Recording format: `-f flac|wav|mp3` (long: `--recording-format`), default `flac`. MP3 requires ffmpeg; if absent, it falls back to FLAC with a warning.
58
+ - Auto-split on silence: `--silence-sec 1.0` (default `1.0`; `0` disables). When continuous silence ≥ this many seconds is detected, the current chunk is ended automatically.
59
+ - Minimum chunk length for auto-split: `--min-chunk-sec 5.0` (default `5.0`). Prevents very short chunks and avoids splitting early in a sentence.
58
60
  - Prompt mode (spoken prompt): `-p` (long: `--prompt`). Speak your prompt first, then press SPACE to use it as prompt and continue with your main content. If you press ENTER instead of SPACE, no prompt is used; the spoken audio is transcribed as normal payload and the session ends.
59
61
  - Keep chunk files: `--keep-chunks` — by default, per‑chunk audio and per‑chunk Whisper outputs are deleted after the final merge.
60
62
  - Open transcript for editing: `-e` (long: `--edit`) — opens the generated `.txt` in your shell editor (`$VISUAL`/`$EDITOR`).
@@ -69,6 +71,11 @@ Outputs are written into a timestamped folder under the chosen output directory
69
71
  - Final outputs: `recording.flac/.wav` (and `recording.mp3` if requested and ffmpeg available), plus `recording.txt/.srt/.vtt/.tsv/.json`
70
72
  - Clipboard mirrors the combined `.txt` with blank lines between chunks.
71
73
 
74
+ Auto-splitting details
75
+ - SPACE always splits immediately; ENTER finishes the recording.
76
+ - With `--silence-sec > 0`, chunks end automatically after detected continuous silence of that many seconds.
77
+ - Auto-split only triggers once the current chunk has at least `--min-chunk-sec` seconds and after speech has been detected (to ignore leading silence). A short internal cooldown avoids duplicate splits.
78
+
72
79
  ## Makefile (optional)
73
80
  - Setup venv + dev deps: `make setup`
74
81
  - Lint/format/test: `make lint`, `make format`, `make test`; combined gate: `make check`
@@ -0,0 +1,16 @@
1
+ s2t/__init__.py,sha256=wV4E9i-7KrUn1dOtLUQB3ZGEKx9gRWH3hPHlpw-ZdWc,332
2
+ s2t/cli.py,sha256=LWrG2SxoXY46wgiegw9ePNVvjpJj9EyuF1ggNk4sm-4,20583
3
+ s2t/config.py,sha256=uw4CZSSXmUvnlOrqBGR1Rcq-WdXucHj3KICRcCb_pkU,485
4
+ s2t/outputs.py,sha256=Lo8VcARZ7QPuuQQNu8myD5J4c4NO1Rs0L1DLnzLe9tM,1546
5
+ s2t/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
+ s2t/recorder.py,sha256=tVUGwnHnlwQM9UtdHUlQIZ3GubCrop9suZYxc9yHHmE,18370
7
+ s2t/types.py,sha256=jBiRN-tr0qVw-lhaXvnsyKrVGDyLkqEbxs9qkQ6qGqI,339
8
+ s2t/utils.py,sha256=YU6YhiuONmqhrKte4DY5tiC5PP-yFExJMMBzFUiA8qA,3416
9
+ s2t/whisper_engine.py,sha256=x-V7ST9e3JnwMWdbMh4C7dHjA420jaOtXH2-igeh7vc,6492
10
+ s2t/translator/__init__.py,sha256=K-MKves7kZ4-62POfrmWeOcBaTjsTzeFSu8QNHqYuus,239
11
+ s2t/translator/argos_backend.py,sha256=VW_OYFFBuNZgcWM-fbvR6XGokuxS2fptkCMFIO9MD1I,19068
12
+ s2t-0.1.6.post1.dev0.dist-info/METADATA,sha256=YLjWRv0JW_FPbBE05ycupKtzkcWexosIuFZ63NGeVes,5383
13
+ s2t-0.1.6.post1.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ s2t-0.1.6.post1.dev0.dist-info/entry_points.txt,sha256=JISIUlZAJ3DX1dB6zT3X_E3vcXI-eWEQKwHiT35fPKs,37
15
+ s2t-0.1.6.post1.dev0.dist-info/top_level.txt,sha256=o8N0JcuHdIrfX3iGHvntHiDC2XgN7__joyNu08ZOh0s,4
16
+ s2t-0.1.6.post1.dev0.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- s2t/__init__.py,sha256=wV4E9i-7KrUn1dOtLUQB3ZGEKx9gRWH3hPHlpw-ZdWc,332
2
- s2t/cli.py,sha256=Qf6Hz0Ew9ncLbQQoCPDG7ZiYWeGbwBcZMZi_WbEu54w,20018
3
- s2t/config.py,sha256=lFc_x5fIx_q0JpTcI4Lm4aubxhIXVH34foBvLMUNFGs,437
4
- s2t/outputs.py,sha256=Lo8VcARZ7QPuuQQNu8myD5J4c4NO1Rs0L1DLnzLe9tM,1546
5
- s2t/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
- s2t/recorder.py,sha256=0sw1UJqQIRdiJO5dugUxRjTN5kFU0CBETVjoQz99a8E,16055
7
- s2t/types.py,sha256=jBiRN-tr0qVw-lhaXvnsyKrVGDyLkqEbxs9qkQ6qGqI,339
8
- s2t/utils.py,sha256=YU6YhiuONmqhrKte4DY5tiC5PP-yFExJMMBzFUiA8qA,3416
9
- s2t/whisper_engine.py,sha256=x-V7ST9e3JnwMWdbMh4C7dHjA420jaOtXH2-igeh7vc,6492
10
- s2t/translator/__init__.py,sha256=K-MKves7kZ4-62POfrmWeOcBaTjsTzeFSu8QNHqYuus,239
11
- s2t/translator/argos_backend.py,sha256=VW_OYFFBuNZgcWM-fbvR6XGokuxS2fptkCMFIO9MD1I,19068
12
- s2t-0.1.4.dist-info/METADATA,sha256=oQYIN7eNFsSBvLQZTaORC_TvJtp0AUuhkuVmMIsfI28,4642
13
- s2t-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- s2t-0.1.4.dist-info/entry_points.txt,sha256=JISIUlZAJ3DX1dB6zT3X_E3vcXI-eWEQKwHiT35fPKs,37
15
- s2t-0.1.4.dist-info/top_level.txt,sha256=o8N0JcuHdIrfX3iGHvntHiDC2XgN7__joyNu08ZOh0s,4
16
- s2t-0.1.4.dist-info/RECORD,,