s2t 0.1.0.post1.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- s2t-0.1.0.post1.dev2/.gitignore +35 -0
- s2t-0.1.0.post1.dev2/.pre-commit-config.yaml +19 -0
- s2t-0.1.0.post1.dev2/AGENTS.md +44 -0
- s2t-0.1.0.post1.dev2/CONTRIBUTING.md +37 -0
- s2t-0.1.0.post1.dev2/MANIFEST.in +1 -0
- s2t-0.1.0.post1.dev2/Makefile +116 -0
- s2t-0.1.0.post1.dev2/PKG-INFO +85 -0
- s2t-0.1.0.post1.dev2/README.md +59 -0
- s2t-0.1.0.post1.dev2/docs/RELEASING.md +19 -0
- s2t-0.1.0.post1.dev2/pyproject.toml +87 -0
- s2t-0.1.0.post1.dev2/scripts/bench_transcribe.py +124 -0
- s2t-0.1.0.post1.dev2/setup.cfg +4 -0
- s2t-0.1.0.post1.dev2/src/s2t/__init__.py +13 -0
- s2t-0.1.0.post1.dev2/src/s2t/cli.py +420 -0
- s2t-0.1.0.post1.dev2/src/s2t/config.py +22 -0
- s2t-0.1.0.post1.dev2/src/s2t/outputs.py +49 -0
- s2t-0.1.0.post1.dev2/src/s2t/py.typed +1 -0
- s2t-0.1.0.post1.dev2/src/s2t/recorder.py +205 -0
- s2t-0.1.0.post1.dev2/src/s2t/types.py +14 -0
- s2t-0.1.0.post1.dev2/src/s2t/utils.py +109 -0
- s2t-0.1.0.post1.dev2/src/s2t/whisper_engine.py +139 -0
- s2t-0.1.0.post1.dev2/src/s2t.egg-info/PKG-INFO +85 -0
- s2t-0.1.0.post1.dev2/src/s2t.egg-info/SOURCES.txt +25 -0
- s2t-0.1.0.post1.dev2/src/s2t.egg-info/dependency_links.txt +1 -0
- s2t-0.1.0.post1.dev2/src/s2t.egg-info/entry_points.txt +2 -0
- s2t-0.1.0.post1.dev2/src/s2t.egg-info/requires.txt +13 -0
- s2t-0.1.0.post1.dev2/src/s2t.egg-info/top_level.txt +1 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
# OS
|
2
|
+
.DS_Store
|
3
|
+
|
4
|
+
# Python
|
5
|
+
__pycache__/
|
6
|
+
*.py[cod]
|
7
|
+
*.pyo
|
8
|
+
*.pyd
|
9
|
+
.Python
|
10
|
+
.venv/
|
11
|
+
venv/
|
12
|
+
env/
|
13
|
+
build/
|
14
|
+
dist/
|
15
|
+
*.egg-info/
|
16
|
+
.pytest_cache/
|
17
|
+
.mypy_cache/
|
18
|
+
.ruff_cache/
|
19
|
+
|
20
|
+
# IDE
|
21
|
+
.idea/
|
22
|
+
.vscode/
|
23
|
+
|
24
|
+
# Env and local config
|
25
|
+
.env
|
26
|
+
.env.local
|
27
|
+
.env.*.local
|
28
|
+
.env.twine
|
29
|
+
|
30
|
+
# Project outputs (set via --outdir, e.g., 'transcripts/')
|
31
|
+
transcripts/
|
32
|
+
runs/
|
33
|
+
output/
|
34
|
+
out/
|
35
|
+
data/
|
@@ -0,0 +1,19 @@
|
|
1
|
+
repos:
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
3
|
+
rev: v0.6.8
|
4
|
+
hooks:
|
5
|
+
- id: ruff
|
6
|
+
args: ["--fix"]
|
7
|
+
- id: ruff-format
|
8
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
9
|
+
rev: v1.10.0
|
10
|
+
hooks:
|
11
|
+
- id: mypy
|
12
|
+
additional_dependencies: []
|
13
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
14
|
+
rev: v4.6.0
|
15
|
+
hooks:
|
16
|
+
- id: end-of-file-fixer
|
17
|
+
- id: trailing-whitespace
|
18
|
+
- id: check-ast
|
19
|
+
- id: check-yaml
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Repository Guidelines
|
2
|
+
|
3
|
+
## Project Structure & Module Organization
|
4
|
+
- `src/transcriber/` — application code (CLI, core modules, utils).
|
5
|
+
- `tests/` — pytest test suite; fixtures under `tests/fixtures/`.
|
6
|
+
- `scripts/` — maintenance/dev scripts (one task per file).
|
7
|
+
- `assets/` — sample media and static assets used in docs/tests.
|
8
|
+
- `docs/` — user and developer docs.
|
9
|
+
- `data/` — local runtime artifacts (ignored via `.gitignore`).
|
10
|
+
|
11
|
+
## Build, Test, and Development Commands
|
12
|
+
Prefer Make targets when available:
|
13
|
+
- `make setup` — create venv and install app + dev deps.
|
14
|
+
- `make run ARGS=\"...\"` — run the CLI locally with arguments.
|
15
|
+
- `make test` — run tests; show failures concisely.
|
16
|
+
- `make lint` — static checks (ruff/flake8, mypy) and style checks.
|
17
|
+
- `make format` — auto-format code (black) in-place.
|
18
|
+
If no Makefile: `python -m venv .venv && source .venv/bin/activate && pip install -e .[dev]`, then `pytest -q`, `ruff check .`, `black --check .`.
|
19
|
+
|
20
|
+
## Coding Style & Naming Conventions
|
21
|
+
- Python 3.11+, 4-space indentation, type hints required.
|
22
|
+
- Naming: `snake_case` for modules/functions/vars, `PascalCase` for classes.
|
23
|
+
- Docstrings: concise, Google-style; include argument/return types when helpful.
|
24
|
+
- Tools: `black` (format), `ruff` (lint), `mypy` (types). Keep imports sorted.
|
25
|
+
|
26
|
+
## Testing Guidelines
|
27
|
+
- Framework: `pytest` with `pytest-cov`.
|
28
|
+
- Place tests mirroring package paths; name files `test_*.py`.
|
29
|
+
- Aim for ≥90% coverage on core modules; run `pytest --cov=src/transcriber`.
|
30
|
+
- Include edge cases (empty/long inputs, I/O errors). Use fixtures for media samples.
|
31
|
+
|
32
|
+
## Commit & Pull Request Guidelines
|
33
|
+
- Use Conventional Commits: `feat:`, `fix:`, `docs:`, `refactor:`, `test:`, `chore:`.
|
34
|
+
- Keep commits focused; include rationale in body when non-trivial.
|
35
|
+
- PRs: clear description, linked issues, screenshots or sample CLI output when applicable, and updated tests/docs.
|
36
|
+
|
37
|
+
## Security & Configuration Tips
|
38
|
+
- Never commit API keys or raw user data. Use `.env` and provide `.env.example`.
|
39
|
+
- Sanitize example assets; large files belong outside the repo or via Git LFS.
|
40
|
+
|
41
|
+
## Agent-Specific Instructions
|
42
|
+
- Scope: applies to the entire repository.
|
43
|
+
- Keep patches minimal and targeted; update tests/docs alongside code.
|
44
|
+
- Follow the structure and commands above; avoid introducing new tools without discussion.
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# Contributing
|
2
|
+
|
3
|
+
Danke für deinen Beitrag! Hier findest du die wichtigsten Hinweise für Entwicklung und Checks.
|
4
|
+
|
5
|
+
## Voraussetzungen
|
6
|
+
- Python 3.11+
|
7
|
+
- Empfohlen: Projekt-venv via Makefile
|
8
|
+
|
9
|
+
## Setup
|
10
|
+
```
|
11
|
+
make setup
|
12
|
+
```
|
13
|
+
|
14
|
+
## Wichtige Befehle
|
15
|
+
- Formatieren (auto-fix): `make format` (Ruff Lint-Fixes + Ruff Formatter)
|
16
|
+
- Lint + Typprüfung (auto-fix + mypy): `make lint`
|
17
|
+
- Tests: `make test`
|
18
|
+
- Kombiniert (Gate vor Build/Release): `make check`
|
19
|
+
|
20
|
+
## Pre-commit Hooks
|
21
|
+
Installiere optionale Git-Hooks lokal:
|
22
|
+
```
|
23
|
+
make precommit-install
|
24
|
+
```
|
25
|
+
Aktive Hooks: Ruff (`--fix`), Ruff Formatter, mypy und Basis-Hooks.
|
26
|
+
|
27
|
+
## Stil & Typen
|
28
|
+
- Formatter: Ruff Formatter (Black wurde entfernt)
|
29
|
+
- Linting: Ruff (Importsortierung, Qualitätsregeln)
|
30
|
+
- Typen: mypy; bitte durchgängig Typannotationen nutzen
|
31
|
+
|
32
|
+
## Struktur
|
33
|
+
- App-Code: `src/transcriber/` (CLI, Module)
|
34
|
+
- Tests: `tests/`
|
35
|
+
- Scripts: `scripts/`
|
36
|
+
- Docs: `docs/`
|
37
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
recursive-exclude stubs *
|
@@ -0,0 +1,116 @@
|
|
1
|
+
SHELL := /bin/bash
|
2
|
+
# Ensure project venv binaries are preferred
|
3
|
+
PATH := .venv/bin:$(PATH)
|
4
|
+
# Prefer the project venv if present
|
5
|
+
PYTHON := $(shell if [ -x .venv/bin/python ]; then echo .venv/bin/python; else command -v python; fi)
|
6
|
+
PIP := $(PYTHON) -m pip
|
7
|
+
.PHONY: help setup dev install ensure-dev lint format test check build publish publish-test record list-models clean version precommit-install guard-venv
|
8
|
+
|
9
|
+
help:
|
10
|
+
@echo "Common targets:"
|
11
|
+
@echo " setup Create/refresh .venv and install dev extras"
|
12
|
+
@echo " dev Install dev extras into current env"
|
13
|
+
@echo " install Install package (editable)"
|
14
|
+
@echo " lint Run ruff (auto-fix) and mypy"
|
15
|
+
@echo " format Run ruff check --fix and ruff format"
|
16
|
+
@echo " test Run pytest"
|
17
|
+
@echo " check Lint + tests (pre-publish gate)"
|
18
|
+
@echo " build Build sdist/wheel (installs 'build' if missing)"
|
19
|
+
@echo " publish Upload to PyPI via twine (installs 'twine' if missing)"
|
20
|
+
@echo " publish-test Upload to TestPyPI via twine"
|
21
|
+
@echo " record Run s2t (passes ARGS='...' to forward options)"
|
22
|
+
@echo " profile Run s2t with --profile (use ARGS to pass flags)"
|
23
|
+
@echo " list-models Print whisper.available_models()"
|
24
|
+
@echo " version Print package version"
|
25
|
+
@echo " precommit-install Install pre-commit git hooks"
|
26
|
+
|
27
|
+
setup: guard-venv
|
28
|
+
@if [ ! -x .venv/bin/python ]; then $(PYTHON) -m venv .venv; fi; \
|
29
|
+
.venv/bin/python -m pip install -U pip setuptools wheel; \
|
30
|
+
.venv/bin/python -m pip install -e '.[dev]'
|
31
|
+
|
32
|
+
dev: guard-venv
|
33
|
+
$(PIP) install -e '.[dev]'
|
34
|
+
|
35
|
+
install: guard-venv
|
36
|
+
$(PIP) install -e .
|
37
|
+
|
38
|
+
lint:
|
39
|
+
lint: guard-venv ensure-dev
|
40
|
+
# Auto-fix what Ruff can, then format, then type-check
|
41
|
+
$(PYTHON) -m ruff check . --fix --unsafe-fixes
|
42
|
+
$(PYTHON) -m ruff format .
|
43
|
+
$(PYTHON) -m mypy src
|
44
|
+
|
45
|
+
format:
|
46
|
+
format: guard-venv ensure-dev
|
47
|
+
$(PYTHON) -m ruff check . --fix --unsafe-fixes
|
48
|
+
$(PYTHON) -m ruff format .
|
49
|
+
|
50
|
+
test:
|
51
|
+
test: guard-venv ensure-dev
|
52
|
+
$(PYTHON) -m pytest -q
|
53
|
+
|
54
|
+
check: guard-venv ensure-dev lint test
|
55
|
+
|
56
|
+
build: guard-venv
|
57
|
+
$(MAKE) format
|
58
|
+
$(MAKE) lint
|
59
|
+
$(PYTHON) -m mypy src
|
60
|
+
# Build artifacts (clean dist to avoid duplicate files)
|
61
|
+
rm -rf dist
|
62
|
+
$(PYTHON) -c 'import importlib.util,sys; sys.exit(0) if importlib.util.find_spec("build") else sys.exit(1)' || $(PIP) install build ; \
|
63
|
+
$(PYTHON) -m build $(if $(NO_ISOLATION),--no-isolation,)
|
64
|
+
|
65
|
+
publish: guard-venv build
|
66
|
+
$(PYTHON) -c 'import importlib.util,sys; sys.exit(0) if importlib.util.find_spec("twine") else sys.exit(1)' || $(PIP) install twine ; \
|
67
|
+
[ -f .env.twine ] && set -a && . ./.env.twine && set +a || true; \
|
68
|
+
if [ "$$ALLOW_CUSTOM_TWINE_USERNAME" = "1" ]; then TWINE_USERNAME="$${TWINE_USERNAME:-__token__}"; else TWINE_USERNAME="__token__"; fi; \
|
69
|
+
if [ -n "$$TWINE_PASSWORD_CMD" ] && [ -z "$$TWINE_PASSWORD" ]; then TWINE_PASSWORD="$$(sh -c "$$TWINE_PASSWORD_CMD" | head -n1 | tr -d '\r\n')"; fi; \
|
70
|
+
if [ -n "$$PASS_TWINE_ENTRY" ] && [ -z "$$TWINE_PASSWORD" ]; then TWINE_PASSWORD="$$(pass show "$$PASS_TWINE_ENTRY" | head -n1 | tr -d '\r\n')"; fi; \
|
71
|
+
if [ -z "$$TWINE_PASSWORD" ]; then echo "Error: TWINE_PASSWORD is empty. Provide via .env.twine, TWINE_PASSWORD_CMD, or PASS_TWINE_ENTRY." >&2; exit 2; fi; \
|
72
|
+
env TWINE_USERNAME="$$TWINE_USERNAME" TWINE_PASSWORD="$$TWINE_PASSWORD" twine upload --non-interactive $$TWINE_EXTRA_FLAGS dist/*
|
73
|
+
|
74
|
+
publish-test: guard-venv build
|
75
|
+
$(PYTHON) -c 'import importlib.util,sys; sys.exit(0) if importlib.util.find_spec("twine") else sys.exit(1)' || $(PIP) install twine ; \
|
76
|
+
[ -f .env.twine ] && set -a && . ./.env.twine && set +a || true; \
|
77
|
+
if [ "$$ALLOW_CUSTOM_TWINE_USERNAME" = "1" ]; then TWINE_USERNAME="$${TWINE_USERNAME:-__token__}"; else TWINE_USERNAME="__token__"; fi; \
|
78
|
+
if [ -n "$$TWINE_PASSWORD_CMD" ] && [ -z "$$TWINE_PASSWORD" ]; then TWINE_PASSWORD="$$(sh -c "$$TWINE_PASSWORD_CMD" | head -n1 | tr -d '\r\n')"; fi; \
|
79
|
+
if [ -n "$$PASS_TWINE_ENTRY" ] && [ -z "$$TWINE_PASSWORD" ]; then TWINE_PASSWORD="$$(pass show "$$PASS_TWINE_ENTRY" | head -n1 | tr -d '\r\n')"; fi; \
|
80
|
+
if [ -z "$$TWINE_PASSWORD" ]; then echo "Error: TWINE_PASSWORD is empty. Provide via .env.twine, TWINE_PASSWORD_CMD, or PASS_TWINE_ENTRY." >&2; exit 2; fi; \
|
81
|
+
env TWINE_USERNAME="$$TWINE_USERNAME" TWINE_PASSWORD="$$TWINE_PASSWORD" twine upload --non-interactive --repository testpypi $$TWINE_EXTRA_FLAGS dist/*
|
82
|
+
|
83
|
+
record: guard-venv
|
84
|
+
@if [ -x .venv/bin/s2t ]; then .venv/bin/s2t $(ARGS); else s2t $(ARGS); fi
|
85
|
+
|
86
|
+
profile: guard-venv
|
87
|
+
@if [ -x .venv/bin/s2t ]; then .venv/bin/s2t --profile $(ARGS); else s2t --profile $(ARGS); fi
|
88
|
+
|
89
|
+
list-models: guard-venv
|
90
|
+
$(PYTHON) -c "import whisper; print('\n'.join(sorted(whisper.available_models())))"
|
91
|
+
|
92
|
+
clean:
|
93
|
+
rm -rf build dist .pytest_cache .mypy_cache .ruff_cache *.egg-info
|
94
|
+
|
95
|
+
version: guard-venv
|
96
|
+
$(PYTHON) -c "import s2t; print(s2t.__version__)"
|
97
|
+
|
98
|
+
|
99
|
+
ensure-dev:
|
100
|
+
@which ruff >/dev/null 2>&1 || $(PIP) install -e '.[dev]'
|
101
|
+
@which mypy >/dev/null 2>&1 || true
|
102
|
+
@which pytest >/dev/null 2>&1 || true
|
103
|
+
# Do not auto-install pre-commit during lint/format/test to avoid network
|
104
|
+
@which pre-commit >/dev/null 2>&1 || true
|
105
|
+
|
106
|
+
precommit-install: guard-venv ensure-dev
|
107
|
+
# Install pre-commit only when explicitly requested
|
108
|
+
@which pre-commit >/dev/null 2>&1 || $(PIP) install pre-commit
|
109
|
+
pre-commit install --install-hooks
|
110
|
+
|
111
|
+
guard-venv:
|
112
|
+
@if [ -n "$$VIRTUAL_ENV" ] && [ "$$VIRTUAL_ENV" != "$$PWD/.venv" ]; then \
|
113
|
+
echo "Error: active venv ($$VIRTUAL_ENV) differs from project .venv ($$PWD/.venv)."; \
|
114
|
+
echo "Please 'deactivate' or use the project venv (.venv)."; \
|
115
|
+
exit 1; \
|
116
|
+
fi
|
@@ -0,0 +1,85 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: s2t
|
3
|
+
Version: 0.1.0.post1.dev2
|
4
|
+
Summary: Speech to Text (s2t): Record audio, run Whisper, export formats, and copy transcript to clipboard.
|
5
|
+
Author: Maintainers
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
10
|
+
Classifier: Environment :: Console
|
11
|
+
Classifier: Operating System :: OS Independent
|
12
|
+
Requires-Python: >=3.11
|
13
|
+
Description-Content-Type: text/markdown
|
14
|
+
Requires-Dist: sounddevice>=0.4.6
|
15
|
+
Requires-Dist: soundfile>=0.12.1
|
16
|
+
Requires-Dist: numpy>=1.23
|
17
|
+
Requires-Dist: openai-whisper>=20231117
|
18
|
+
Provides-Extra: dev
|
19
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
20
|
+
Requires-Dist: pytest-cov>=4; extra == "dev"
|
21
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
22
|
+
Requires-Dist: mypy>=1.7; extra == "dev"
|
23
|
+
Requires-Dist: build>=1; extra == "dev"
|
24
|
+
Requires-Dist: setuptools-scm>=8; extra == "dev"
|
25
|
+
Requires-Dist: twine>=4; extra == "dev"
|
26
|
+
|
27
|
+
# s2t
|
28
|
+
|
29
|
+
Record audio from your microphone, run Whisper to transcribe it, export common formats, and copy the .txt transcript to your clipboard.
|
30
|
+
|
31
|
+
## Install
|
32
|
+
- From local checkout:
|
33
|
+
- Editable: `pip install -e .`
|
34
|
+
- Standard: `pip install .`
|
35
|
+
|
36
|
+
Requirements: Python 3.11+. No mandatory external binaries. ffmpeg is optional (only for MP3 encoding/decoding).
|
37
|
+
|
38
|
+
System requirements (Linux)
|
39
|
+
- Some environments need system libraries for audio I/O:
|
40
|
+
- Debian/Ubuntu: `sudo apt-get install libportaudio2 libsndfile1`
|
41
|
+
- Fedora/RHEL: `sudo dnf install portaudio libsndfile`
|
42
|
+
- Optional for MP3: ffmpeg (`sudo apt-get install ffmpeg` or `brew install ffmpeg`).
|
43
|
+
|
44
|
+
## Usage
|
45
|
+
- Start interactive recording and transcribe:
|
46
|
+
- `s2t`
|
47
|
+
- Short options:
|
48
|
+
- Language: `-l de` (long: `--lang de`)
|
49
|
+
- Model: `-m large-v3` (long: `--model large-v3`)
|
50
|
+
- Sample rate: `-r 48000` (long: `--rate 48000`)
|
51
|
+
- Channels: `-c 2` (long: `--channels 2`)
|
52
|
+
- Output dir: `-o transcripts` (long: `--outdir transcripts`) — default is `transcripts/` if omitted
|
53
|
+
- Translate to English: `-t` (long: `--translate`). You may still provide `--lang` as an input-language hint if you want.
|
54
|
+
- List available models and exit: `-L` (long: `--list-models`)
|
55
|
+
- Recording format: `-f flac|wav|mp3` (long: `--recording-format`), default `flac`. MP3 requires ffmpeg; if absent, it falls back to FLAC with a warning.
|
56
|
+
- Prompt mode (spoken prompt): `-p` (long: `--prompt`). Speak your prompt first, then press SPACE to use it as prompt and continue with your main content. If you press ENTER instead of SPACE, no prompt is used; the spoken audio is transcribed as normal payload and the session ends.
|
57
|
+
- Keep chunk files: `--keep-chunks` — by default, per‑chunk audio and per‑chunk Whisper outputs are deleted after the final merge.
|
58
|
+
- Open transcript for editing: `-e` (long: `--edit`) — opens the generated `.txt` in your shell editor (`$VISUAL`/`$EDITOR`).
|
59
|
+
- Examples:
|
60
|
+
- Transcribe in German using large-v3: `s2t -l de -m large-v3`
|
61
|
+
- Translate any input to English: `s2t -t`
|
62
|
+
- Write outputs under transcripts/: `s2t -o transcripts`
|
63
|
+
- List local model names: `s2t -L`
|
64
|
+
|
65
|
+
Outputs are written into a timestamped folder under the chosen output directory (default is `transcripts/`), e.g. `transcripts/2025-01-31T14-22-05+0200/`, containing:
|
66
|
+
- Per‑chunk outputs: `chunk_####.flac/.wav` plus `chunk_####.txt/.srt/.vtt/.tsv/.json` (deleted by default unless `--keep-chunks`)
|
67
|
+
- Final outputs: `recording.flac/.wav` (and `recording.mp3` if requested and ffmpeg available), plus `recording.txt/.srt/.vtt/.tsv/.json`
|
68
|
+
- Clipboard mirrors the combined `.txt` with blank lines between chunks.
|
69
|
+
|
70
|
+
## Makefile (optional)
|
71
|
+
- Setup venv + dev deps: `make setup`
|
72
|
+
- Lint/format/test: `make lint`, `make format`, `make test`; combined gate: `make check`
|
73
|
+
- Build sdist/wheel: `make build` (runs `check` first)
|
74
|
+
- Publish to PyPI/TestPyPI: `make publish`, `make publish-test` (run after `build`)
|
75
|
+
- Run CLI: `make record ARGS='-l de -t -o transcripts'`
|
76
|
+
- List models: `make list-models`
|
77
|
+
- Show package version: `make version`
|
78
|
+
|
79
|
+
Notes on models
|
80
|
+
- The local openai-whisper CLI supports models like: `tiny`, `base`, `small`, `medium`, `large-v1`, `large-v2`, `large-v3` and their `.en` variants.
|
81
|
+
- The name `turbo` refers to OpenAI’s hosted model family and is not provided by the local `whisper` CLI. If you pass `-m turbo`, the command may fail; choose a supported local model instead.
|
82
|
+
|
83
|
+
## Development & Release
|
84
|
+
- Für Entwickler-Setup und Beitragshinweise siehe `CONTRIBUTING.md`.
|
85
|
+
- Für den Release-Prozess siehe `docs/RELEASING.md`.
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# s2t
|
2
|
+
|
3
|
+
Record audio from your microphone, run Whisper to transcribe it, export common formats, and copy the .txt transcript to your clipboard.
|
4
|
+
|
5
|
+
## Install
|
6
|
+
- From local checkout:
|
7
|
+
- Editable: `pip install -e .`
|
8
|
+
- Standard: `pip install .`
|
9
|
+
|
10
|
+
Requirements: Python 3.11+. No mandatory external binaries. ffmpeg is optional (only for MP3 encoding/decoding).
|
11
|
+
|
12
|
+
System requirements (Linux)
|
13
|
+
- Some environments need system libraries for audio I/O:
|
14
|
+
- Debian/Ubuntu: `sudo apt-get install libportaudio2 libsndfile1`
|
15
|
+
- Fedora/RHEL: `sudo dnf install portaudio libsndfile`
|
16
|
+
- Optional for MP3: ffmpeg (`sudo apt-get install ffmpeg` or `brew install ffmpeg`).
|
17
|
+
|
18
|
+
## Usage
|
19
|
+
- Start interactive recording and transcribe:
|
20
|
+
- `s2t`
|
21
|
+
- Short options:
|
22
|
+
- Language: `-l de` (long: `--lang de`)
|
23
|
+
- Model: `-m large-v3` (long: `--model large-v3`)
|
24
|
+
- Sample rate: `-r 48000` (long: `--rate 48000`)
|
25
|
+
- Channels: `-c 2` (long: `--channels 2`)
|
26
|
+
- Output dir: `-o transcripts` (long: `--outdir transcripts`) — default is `transcripts/` if omitted
|
27
|
+
- Translate to English: `-t` (long: `--translate`). You may still provide `--lang` as an input-language hint if you want.
|
28
|
+
- List available models and exit: `-L` (long: `--list-models`)
|
29
|
+
- Recording format: `-f flac|wav|mp3` (long: `--recording-format`), default `flac`. MP3 requires ffmpeg; if absent, it falls back to FLAC with a warning.
|
30
|
+
- Prompt mode (spoken prompt): `-p` (long: `--prompt`). Speak your prompt first, then press SPACE to use it as prompt and continue with your main content. If you press ENTER instead of SPACE, no prompt is used; the spoken audio is transcribed as normal payload and the session ends.
|
31
|
+
- Keep chunk files: `--keep-chunks` — by default, per‑chunk audio and per‑chunk Whisper outputs are deleted after the final merge.
|
32
|
+
- Open transcript for editing: `-e` (long: `--edit`) — opens the generated `.txt` in your shell editor (`$VISUAL`/`$EDITOR`).
|
33
|
+
- Examples:
|
34
|
+
- Transcribe in German using large-v3: `s2t -l de -m large-v3`
|
35
|
+
- Translate any input to English: `s2t -t`
|
36
|
+
- Write outputs under transcripts/: `s2t -o transcripts`
|
37
|
+
- List local model names: `s2t -L`
|
38
|
+
|
39
|
+
Outputs are written into a timestamped folder under the chosen output directory (default is `transcripts/`), e.g. `transcripts/2025-01-31T14-22-05+0200/`, containing:
|
40
|
+
- Per‑chunk outputs: `chunk_####.flac/.wav` plus `chunk_####.txt/.srt/.vtt/.tsv/.json` (deleted by default unless `--keep-chunks`)
|
41
|
+
- Final outputs: `recording.flac/.wav` (and `recording.mp3` if requested and ffmpeg available), plus `recording.txt/.srt/.vtt/.tsv/.json`
|
42
|
+
- Clipboard mirrors the combined `.txt` with blank lines between chunks.
|
43
|
+
|
44
|
+
## Makefile (optional)
|
45
|
+
- Setup venv + dev deps: `make setup`
|
46
|
+
- Lint/format/test: `make lint`, `make format`, `make test`; combined gate: `make check`
|
47
|
+
- Build sdist/wheel: `make build` (runs `check` first)
|
48
|
+
- Publish to PyPI/TestPyPI: `make publish`, `make publish-test` (run after `build`)
|
49
|
+
- Run CLI: `make record ARGS='-l de -t -o transcripts'`
|
50
|
+
- List models: `make list-models`
|
51
|
+
- Show package version: `make version`
|
52
|
+
|
53
|
+
Notes on models
|
54
|
+
- The local openai-whisper CLI supports models like: `tiny`, `base`, `small`, `medium`, `large-v1`, `large-v2`, `large-v3` and their `.en` variants.
|
55
|
+
- The name `turbo` refers to OpenAI’s hosted model family and is not provided by the local `whisper` CLI. If you pass `-m turbo`, the command may fail; choose a supported local model instead.
|
56
|
+
|
57
|
+
## Development & Release
|
58
|
+
- Für Entwickler-Setup und Beitragshinweise siehe `CONTRIBUTING.md`.
|
59
|
+
- Für den Release-Prozess siehe `docs/RELEASING.md`.
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Releasing
|
2
|
+
|
3
|
+
Versionierung und Releases basieren auf `setuptools-scm` (PEP 621, dynamische Version):
|
4
|
+
|
5
|
+
- Versionen werden über Git-Tags abgeleitet (z. B. `v0.1.0`).
|
6
|
+
- Während der Entwicklung (ohne Tag) wird eine Fallback-Version genutzt; mit Tag erhält das Paket die exakte Version.
|
7
|
+
|
8
|
+
## Schritte
|
9
|
+
1. Tag erstellen und pushen, z. B.:
|
10
|
+
- `git tag v0.1.1`
|
11
|
+
- `git push --tags`
|
12
|
+
2. Optional TestPyPI, sonst direkt PyPI:
|
13
|
+
- `make publish-test` oder `make publish`
|
14
|
+
- Diese Targets bauen zuvor (`make build`) und führen Checks aus (`make check`).
|
15
|
+
|
16
|
+
## Build-Details
|
17
|
+
- `make check` führt Format/Lint/Typprüfung und Tests aus.
|
18
|
+
- `make build` erzeugt sdist und Wheel.
|
19
|
+
|
@@ -0,0 +1,87 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = [
|
3
|
+
"setuptools>=68",
|
4
|
+
"wheel",
|
5
|
+
"setuptools-scm>=8",
|
6
|
+
]
|
7
|
+
build-backend = "setuptools.build_meta"
|
8
|
+
|
9
|
+
[project]
|
10
|
+
name = "s2t"
|
11
|
+
dynamic = ["version"]
|
12
|
+
description = "Speech to Text (s2t): Record audio, run Whisper, export formats, and copy transcript to clipboard."
|
13
|
+
readme = "README.md"
|
14
|
+
requires-python = ">=3.11"
|
15
|
+
license = "LicenseRef-Proprietary"
|
16
|
+
authors = [
|
17
|
+
{ name = "Maintainers" }
|
18
|
+
]
|
19
|
+
dependencies = [
|
20
|
+
"sounddevice>=0.4.6",
|
21
|
+
"soundfile>=0.12.1",
|
22
|
+
"numpy>=1.23",
|
23
|
+
"openai-whisper>=20231117",
|
24
|
+
]
|
25
|
+
classifiers = [
|
26
|
+
"Programming Language :: Python :: 3",
|
27
|
+
"Programming Language :: Python :: 3 :: Only",
|
28
|
+
"Programming Language :: Python :: 3.11",
|
29
|
+
"Environment :: Console",
|
30
|
+
"Operating System :: OS Independent",
|
31
|
+
]
|
32
|
+
|
33
|
+
[project.optional-dependencies]
|
34
|
+
dev = [
|
35
|
+
"pytest>=7",
|
36
|
+
"pytest-cov>=4",
|
37
|
+
"ruff>=0.4",
|
38
|
+
"mypy>=1.7",
|
39
|
+
"build>=1",
|
40
|
+
# Required for local no-isolation builds because version is derived via setuptools-scm
|
41
|
+
"setuptools-scm>=8",
|
42
|
+
"twine>=4",
|
43
|
+
]
|
44
|
+
|
45
|
+
|
46
|
+
[project.scripts]
|
47
|
+
s2t = "s2t.cli:main"
|
48
|
+
|
49
|
+
[tool.setuptools]
|
50
|
+
package-dir = {"" = "src"}
|
51
|
+
|
52
|
+
[tool.setuptools.packages.find]
|
53
|
+
where = ["src"]
|
54
|
+
include = ["s2t*"]
|
55
|
+
|
56
|
+
[tool.setuptools_scm]
|
57
|
+
fallback_version = "0.1.0"
|
58
|
+
version_scheme = "no-guess-dev"
|
59
|
+
local_scheme = "no-local-version"
|
60
|
+
|
61
|
+
[tool.ruff]
|
62
|
+
line-length = 100
|
63
|
+
target-version = "py311"
|
64
|
+
|
65
|
+
[tool.ruff.lint]
|
66
|
+
select = ["E", "F", "I", "B", "UP"]
|
67
|
+
ignore = ["E501"]
|
68
|
+
|
69
|
+
[tool.ruff.format]
|
70
|
+
# Ruff-Formatter übernimmt die Formatierung (Black ersetzt)
|
71
|
+
quote-style = "preserve"
|
72
|
+
|
73
|
+
[tool.pytest.ini_options]
|
74
|
+
addopts = "-q"
|
75
|
+
testpaths = ["tests"]
|
76
|
+
|
77
|
+
[tool.mypy]
|
78
|
+
python_version = "3.11"
|
79
|
+
mypy_path = ["stubs"]
|
80
|
+
files = ["src"]
|
81
|
+
strict = false
|
82
|
+
warn_unused_ignores = true
|
83
|
+
warn_redundant_casts = true
|
84
|
+
no_implicit_optional = true
|
85
|
+
|
86
|
+
[tool.setuptools.package-data]
|
87
|
+
"s2t" = ["py.typed"]
|
@@ -0,0 +1,124 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Simple benchmarking for Whisper transcription to compare cold vs. warm runs.
|
4
|
+
|
5
|
+
Examples:
|
6
|
+
python scripts/bench_transcribe.py --file transcripts/sample.flac --runs 3 --model large-v3
|
7
|
+
python scripts/bench_transcribe.py --file transcripts/sample.flac --runs 3 --model large-v3 --reuse-model
|
8
|
+
python scripts/bench_transcribe.py --file transcripts/sample.flac --runs 3 --translate
|
9
|
+
"""
|
10
|
+
|
11
|
+
from __future__ import annotations
|
12
|
+
|
13
|
+
import argparse
|
14
|
+
import json
|
15
|
+
import time
|
16
|
+
from pathlib import Path
|
17
|
+
|
18
|
+
import numpy as np
|
19
|
+
|
20
|
+
|
21
|
+
def _resample_linear(x: np.ndarray, src_sr: int, dst_sr: int) -> np.ndarray:
|
22
|
+
if src_sr == dst_sr:
|
23
|
+
return x.astype(np.float32, copy=False)
|
24
|
+
x = x.astype(np.float32, copy=False)
|
25
|
+
n_src = x.shape[0]
|
26
|
+
n_dst = int(round(n_src * (dst_sr / float(src_sr))))
|
27
|
+
if n_src == 0 or n_dst == 0:
|
28
|
+
return np.zeros(n_dst, dtype=np.float32)
|
29
|
+
import numpy as _np
|
30
|
+
|
31
|
+
src_t = _np.linspace(0.0, 1.0, num=n_src, endpoint=False)
|
32
|
+
dst_t = _np.linspace(0.0, 1.0, num=n_dst, endpoint=False)
|
33
|
+
return _np.interp(dst_t, src_t, x).astype(np.float32)
|
34
|
+
|
35
|
+
|
36
|
+
def _load_audio_for_model(path: Path) -> np.ndarray | None:
|
37
|
+
# Avoid ffmpeg for non-mp3
|
38
|
+
if path.suffix.lower() == ".mp3":
|
39
|
+
return None
|
40
|
+
try:
|
41
|
+
import soundfile as sf # type: ignore
|
42
|
+
|
43
|
+
data, sr = sf.read(str(path), always_2d=False)
|
44
|
+
if isinstance(data, np.ndarray) and data.ndim == 2:
|
45
|
+
data = data.mean(axis=1)
|
46
|
+
return _resample_linear(np.asarray(data, dtype=np.float32), sr, 16000)
|
47
|
+
except Exception:
|
48
|
+
return None
|
49
|
+
|
50
|
+
|
51
|
+
def bench(
|
52
|
+
file: Path, runs: int, model_name: str, language: str | None, translate: bool, reuse_model: bool
|
53
|
+
) -> dict:
|
54
|
+
import whisper # type: ignore
|
55
|
+
|
56
|
+
audio_arr = _load_audio_for_model(file)
|
57
|
+
task = "translate" if translate else "transcribe"
|
58
|
+
times = []
|
59
|
+
model = None
|
60
|
+
if reuse_model:
|
61
|
+
t0 = time.perf_counter()
|
62
|
+
model = whisper.load_model(model_name)
|
63
|
+
t1 = time.perf_counter()
|
64
|
+
times.append({"model_load_sec": t1 - t0, "transcribe_sec": 0.0})
|
65
|
+
for _i in range(runs):
|
66
|
+
t_load0 = time.perf_counter()
|
67
|
+
if not reuse_model:
|
68
|
+
model = whisper.load_model(model_name)
|
69
|
+
t_load1 = time.perf_counter()
|
70
|
+
t_tx0 = time.perf_counter()
|
71
|
+
_ = model.transcribe(
|
72
|
+
audio_arr if audio_arr is not None else str(file),
|
73
|
+
task=task,
|
74
|
+
language=language,
|
75
|
+
fp16=False,
|
76
|
+
)
|
77
|
+
t_tx1 = time.perf_counter()
|
78
|
+
times.append(
|
79
|
+
{
|
80
|
+
"model_load_sec": (t_load1 - t_load0) if not reuse_model else 0.0,
|
81
|
+
"transcribe_sec": t_tx1 - t_tx0,
|
82
|
+
}
|
83
|
+
)
|
84
|
+
return {
|
85
|
+
"file": str(file),
|
86
|
+
"runs": runs,
|
87
|
+
"model": model_name,
|
88
|
+
"language": language,
|
89
|
+
"task": task,
|
90
|
+
"results": times,
|
91
|
+
"avg_model_load_sec": sum(t["model_load_sec"] for t in times) / len(times),
|
92
|
+
"avg_transcribe_sec": sum(t["transcribe_sec"] for t in times) / len(times),
|
93
|
+
}
|
94
|
+
|
95
|
+
|
96
|
+
def main(argv: list[str] | None = None) -> int:
|
97
|
+
p = argparse.ArgumentParser(description="Benchmark Whisper transcription performance")
|
98
|
+
p.add_argument("--file", required=True, help="Audio file (flac/wav/mp3)")
|
99
|
+
p.add_argument("--runs", type=int, default=3, help="Number of runs")
|
100
|
+
p.add_argument("--model", default="base", help="Model name (e.g., base, small, large-v3)")
|
101
|
+
p.add_argument("--lang", default=None, help="Language hint (e.g., de, en)")
|
102
|
+
p.add_argument("--translate", action="store_true", help="Use translate task (to English)")
|
103
|
+
p.add_argument(
|
104
|
+
"--reuse-model", action="store_true", help="Load model once and reuse across runs"
|
105
|
+
)
|
106
|
+
p.add_argument("--json-out", default=None, help="Write JSON to file path")
|
107
|
+
args = p.parse_args(argv)
|
108
|
+
|
109
|
+
file = Path(args.file)
|
110
|
+
res = bench(file, args.runs, args.model, args.lang, args.translate, args.reuse_model)
|
111
|
+
print("Benchmark summary:")
|
112
|
+
print(f" file: {res['file']}")
|
113
|
+
print(f" model: {res['model']} task: {res['task']} lang: {res['language']}")
|
114
|
+
print(f" runs: {res['runs']}")
|
115
|
+
print(f" avg model load (s): {res['avg_model_load_sec']:.3f}")
|
116
|
+
print(f" avg transcribe (s): {res['avg_transcribe_sec']:.3f}")
|
117
|
+
if args.json_out:
|
118
|
+
Path(args.json_out).write_text(json.dumps(res, indent=2), encoding="utf-8")
|
119
|
+
print(f" wrote JSON: {args.json_out}")
|
120
|
+
return 0
|
121
|
+
|
122
|
+
|
123
|
+
if __name__ == "__main__":
|
124
|
+
raise SystemExit(main())
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from importlib.metadata import PackageNotFoundError, version
|
2
|
+
|
3
|
+
__all__ = ["__version__"]
|
4
|
+
|
5
|
+
try:
|
6
|
+
__version__ = version("s2t")
|
7
|
+
except PackageNotFoundError:
|
8
|
+
try:
|
9
|
+
from setuptools_scm import get_version
|
10
|
+
|
11
|
+
__version__ = get_version(root="..", relative_to=__file__)
|
12
|
+
except Exception:
|
13
|
+
__version__ = "0.0.0"
|