audio-transcriber 0.5.40__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012-2023 Audel Rouhi
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ include README.md include requirements.txt recursive-include audio_transcriber *.py
@@ -0,0 +1,177 @@
1
+ Metadata-Version: 2.4
2
+ Name: audio-transcriber
3
+ Version: 0.5.40
4
+ Summary: Transcribe your .wav .mp4 .mp3 .flac files to text or record your own audio!
5
+ Author-email: Audel Rouhi <knucklessg1@gmail.com>
6
+ License: MIT
7
+ Classifier: Development Status :: 5 - Production/Stable
8
+ Classifier: License :: Public Domain
9
+ Classifier: Environment :: Console
10
+ Classifier: Operating System :: POSIX :: Linux
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: torch>=1.13.1
16
+ Requires-Dist: transformers>=4.25.1
17
+ Requires-Dist: pyaudio>=0.2.13
18
+ Requires-Dist: openai-whisper>=20250625
19
+ Requires-Dist: setuptools-rust>=1.12.0
20
+ Dynamic: license-file
21
+
22
+ # Audio-Transcriber
23
+
24
+ ![PyPI - Version](https://img.shields.io/pypi/v/audio-transcriber)
25
+ ![PyPI - Downloads](https://img.shields.io/pypi/dd/audio-transcriber)
26
+ ![GitHub Repo stars](https://img.shields.io/github/stars/Knuckles-Team/audio-transcriber)
27
+ ![GitHub forks](https://img.shields.io/github/forks/Knuckles-Team/audio-transcriber)
28
+ ![GitHub contributors](https://img.shields.io/github/contributors/Knuckles-Team/audio-transcriber)
29
+ ![PyPI - License](https://img.shields.io/pypi/l/audio-transcriber)
30
+ ![GitHub](https://img.shields.io/github/license/Knuckles-Team/audio-transcriber)
31
+
32
+ ![GitHub last commit (by committer)](https://img.shields.io/github/last-commit/Knuckles-Team/audio-transcriber)
33
+ ![GitHub pull requests](https://img.shields.io/github/issues-pr/Knuckles-Team/audio-transcriber)
34
+ ![GitHub closed pull requests](https://img.shields.io/github/issues-pr-closed/Knuckles-Team/audio-transcriber)
35
+ ![GitHub issues](https://img.shields.io/github/issues/Knuckles-Team/audio-transcriber)
36
+
37
+ ![GitHub top language](https://img.shields.io/github/languages/top/Knuckles-Team/audio-transcriber)
38
+ ![GitHub language count](https://img.shields.io/github/languages/count/Knuckles-Team/audio-transcriber)
39
+ ![GitHub repo size](https://img.shields.io/github/repo-size/Knuckles-Team/audio-transcriber)
40
+ ![GitHub repo file count (file type)](https://img.shields.io/github/directory-file-count/Knuckles-Team/audio-transcriber)
41
+ ![PyPI - Wheel](https://img.shields.io/pypi/wheel/audio-transcriber)
42
+ ![PyPI - Implementation](https://img.shields.io/pypi/implementation/audio-transcriber)
43
+
44
+ *Version: 0.5.40*
45
+
46
+ Transcribe your .wav .mp4 .mp3 .flac files to text or record your own audio!
47
+
48
+ This repository is actively maintained - Contributions are welcome!
49
+
50
+ Contribution Opportunities:
51
+ - Support new models
52
+
53
+ Wrapped around [OpenAI Whisper](https://pypi.org/project/openai-whisper)
54
+
55
+ <details>
56
+ <summary><b>Usage:</b></summary>
57
+
58
+ | Short Flag | Long Flag | Description |
59
+ |------------|-------------|---------------------------------------------------------------|
60
+ | -h | --help | See Usage |
61
+ | -b | --bitrate | Bitrate to use during recording |
62
+ | -c | --channels | Number of channels to use during recording |
63
+ | -d | --directory | Directory to save recording |
64
+ | -e | --export | Export txt, srt, and vtt files |
65
+ | -f | --file | File to transcribe |
66
+ | -l | --language | Language to transcribe |
67
+ | -m | --model | Model to use: <tiny, base, small, medium, large> |
68
+ | -n | --name | Name of recording |
69
+ | -r | --record | Specify number of seconds to record to record from microphone |
70
+
71
+ </details>
72
+
73
+ <details>
74
+ <summary><b>Example:</b></summary>
75
+
76
+ ```bash
77
+ audio-transcriber --file '~/Downloads/Federal_Reserve.mp4' --model 'large'
78
+ audio-transcriber --record 60 --directory '~/Downloads/' --name 'my_recording.wav' --model 'tiny'
79
+ ```
80
+
81
+
82
+ </details>
83
+
84
+ <details>
85
+ <summary><b>Model Information:</b></summary>
86
+
87
+ [Courtesy of and Credits to OpenAI: Whisper.ai](https://github.com/openai/whisper/blob/main/README.md)
88
+
89
+ | Size | Parameters | English-only model | Multilingual model | Required VRAM | Relative speed |
90
+ |:------:|:----------:|:------------------:|:------------------:|:-------------:|:--------------:|
91
+ | tiny | 39 M | `tiny.en` | `tiny` | ~1 GB | ~32x |
92
+ | base | 74 M | `base.en` | `base` | ~1 GB | ~16x |
93
+ | small | 244 M | `small.en` | `small` | ~2 GB | ~6x |
94
+ | medium | 769 M | `medium.en` | `medium` | ~5 GB | ~2x |
95
+ | large | 1550 M | N/A | `large` | ~10 GB | 1x |
96
+
97
+
98
+ </details>
99
+
100
+ <details>
101
+ <summary><b>Installation Instructions:</b></summary>
102
+
103
+ ## Use with AI
104
+
105
+ Configure `mcp.json`
106
+
107
+ Recommended: Store secrets in environment variables with lookup in JSON file.
108
+
109
+ For Testing Only: Plain text storage will also work, although **not** recommended.
110
+
111
+ ```json
112
+ {
113
+ "mcpServers": {
114
+ "audio_transcriber": {
115
+ "command": "uv",
116
+ "args": [
117
+ "run",
118
+ "--with",
119
+ "audio-transcriber",
120
+ "audio-transcriber-mcp"
121
+ ],
122
+ "env": {
123
+ "WHISPER_MODEL": "medium", // Optional
124
+ "TRANSCRIBE_DIRECTORY": "~/Downloads" // Optional
125
+ },
126
+ "timeout": 200000
127
+ }
128
+ }
129
+ }
130
+ ```
131
+
132
+ ### Deploy MCP Server as a container
133
+ ```bash
134
+ docker pull knucklessg1/audio-transcriber:latest
135
+ ```
136
+
137
+ Modify the `compose.yml`
138
+
139
+ ```compose
140
+ services:
141
+ audio-transcriber:
142
+ image: knucklessg1/audio-transcriber:latest
143
+ environment:
144
+ - HOST=0.0.0.0
145
+ - PORT=8021
146
+ ports:
147
+ - 8021:8021
148
+ ```
149
+
150
+ ### Install Python Package
151
+
152
+ ```bash
153
+ python -m pip install audio-transcriber
154
+ ```
155
+
156
+ or
157
+
158
+ ```bash
159
+ uv pip install --upgrade audio-transcriber
160
+ ```
161
+
162
+ ##### Ubuntu Dependencies
163
+ ```bash
164
+ apt install -y libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
165
+ ```
166
+
167
+ </details>
168
+
169
+ <details>
170
+ <summary><b>Repository Owners:</b></summary>
171
+
172
+
173
+ <img width="100%" height="180em" src="https://github-readme-stats.vercel.app/api?username=Knucklessg1&show_icons=true&hide_border=true&&count_private=true&include_all_commits=true" />
174
+
175
+ ![GitHub followers](https://img.shields.io/github/followers/Knucklessg1)
176
+ ![GitHub User's stars](https://img.shields.io/github/stars/Knucklessg1)
177
+ </details>
@@ -0,0 +1,156 @@
1
+ # Audio-Transcriber
2
+
3
+ ![PyPI - Version](https://img.shields.io/pypi/v/audio-transcriber)
4
+ ![PyPI - Downloads](https://img.shields.io/pypi/dd/audio-transcriber)
5
+ ![GitHub Repo stars](https://img.shields.io/github/stars/Knuckles-Team/audio-transcriber)
6
+ ![GitHub forks](https://img.shields.io/github/forks/Knuckles-Team/audio-transcriber)
7
+ ![GitHub contributors](https://img.shields.io/github/contributors/Knuckles-Team/audio-transcriber)
8
+ ![PyPI - License](https://img.shields.io/pypi/l/audio-transcriber)
9
+ ![GitHub](https://img.shields.io/github/license/Knuckles-Team/audio-transcriber)
10
+
11
+ ![GitHub last commit (by committer)](https://img.shields.io/github/last-commit/Knuckles-Team/audio-transcriber)
12
+ ![GitHub pull requests](https://img.shields.io/github/issues-pr/Knuckles-Team/audio-transcriber)
13
+ ![GitHub closed pull requests](https://img.shields.io/github/issues-pr-closed/Knuckles-Team/audio-transcriber)
14
+ ![GitHub issues](https://img.shields.io/github/issues/Knuckles-Team/audio-transcriber)
15
+
16
+ ![GitHub top language](https://img.shields.io/github/languages/top/Knuckles-Team/audio-transcriber)
17
+ ![GitHub language count](https://img.shields.io/github/languages/count/Knuckles-Team/audio-transcriber)
18
+ ![GitHub repo size](https://img.shields.io/github/repo-size/Knuckles-Team/audio-transcriber)
19
+ ![GitHub repo file count (file type)](https://img.shields.io/github/directory-file-count/Knuckles-Team/audio-transcriber)
20
+ ![PyPI - Wheel](https://img.shields.io/pypi/wheel/audio-transcriber)
21
+ ![PyPI - Implementation](https://img.shields.io/pypi/implementation/audio-transcriber)
22
+
23
+ *Version: 0.5.40*
24
+
25
+ Transcribe your .wav .mp4 .mp3 .flac files to text or record your own audio!
26
+
27
+ This repository is actively maintained - Contributions are welcome!
28
+
29
+ Contribution Opportunities:
30
+ - Support new models
31
+
32
+ Wrapped around [OpenAI Whisper](https://pypi.org/project/openai-whisper)
33
+
34
+ <details>
35
+ <summary><b>Usage:</b></summary>
36
+
37
+ | Short Flag | Long Flag | Description |
38
+ |------------|-------------|---------------------------------------------------------------|
39
+ | -h | --help | See Usage |
40
+ | -b | --bitrate | Bitrate to use during recording |
41
+ | -c | --channels | Number of channels to use during recording |
42
+ | -d | --directory | Directory to save recording |
43
+ | -e | --export | Export txt, srt, and vtt files |
44
+ | -f | --file | File to transcribe |
45
+ | -l | --language | Language to transcribe |
46
+ | -m | --model | Model to use: <tiny, base, small, medium, large> |
47
+ | -n | --name | Name of recording |
48
+ | -r | --record | Specify number of seconds to record to record from microphone |
49
+
50
+ </details>
51
+
52
+ <details>
53
+ <summary><b>Example:</b></summary>
54
+
55
+ ```bash
56
+ audio-transcriber --file '~/Downloads/Federal_Reserve.mp4' --model 'large'
57
+ audio-transcriber --record 60 --directory '~/Downloads/' --name 'my_recording.wav' --model 'tiny'
58
+ ```
59
+
60
+
61
+ </details>
62
+
63
+ <details>
64
+ <summary><b>Model Information:</b></summary>
65
+
66
+ [Courtesy of and Credits to OpenAI: Whisper.ai](https://github.com/openai/whisper/blob/main/README.md)
67
+
68
+ | Size | Parameters | English-only model | Multilingual model | Required VRAM | Relative speed |
69
+ |:------:|:----------:|:------------------:|:------------------:|:-------------:|:--------------:|
70
+ | tiny | 39 M | `tiny.en` | `tiny` | ~1 GB | ~32x |
71
+ | base | 74 M | `base.en` | `base` | ~1 GB | ~16x |
72
+ | small | 244 M | `small.en` | `small` | ~2 GB | ~6x |
73
+ | medium | 769 M | `medium.en` | `medium` | ~5 GB | ~2x |
74
+ | large | 1550 M | N/A | `large` | ~10 GB | 1x |
75
+
76
+
77
+ </details>
78
+
79
+ <details>
80
+ <summary><b>Installation Instructions:</b></summary>
81
+
82
+ ## Use with AI
83
+
84
+ Configure `mcp.json`
85
+
86
+ Recommended: Store secrets in environment variables with lookup in JSON file.
87
+
88
+ For Testing Only: Plain text storage will also work, although **not** recommended.
89
+
90
+ ```json
91
+ {
92
+ "mcpServers": {
93
+ "audio_transcriber": {
94
+ "command": "uv",
95
+ "args": [
96
+ "run",
97
+ "--with",
98
+ "audio-transcriber",
99
+ "audio-transcriber-mcp"
100
+ ],
101
+ "env": {
102
+ "WHISPER_MODEL": "medium", // Optional
103
+ "TRANSCRIBE_DIRECTORY": "~/Downloads" // Optional
104
+ },
105
+ "timeout": 200000
106
+ }
107
+ }
108
+ }
109
+ ```
110
+
111
+ ### Deploy MCP Server as a container
112
+ ```bash
113
+ docker pull knucklessg1/audio-transcriber:latest
114
+ ```
115
+
116
+ Modify the `compose.yml`
117
+
118
+ ```compose
119
+ services:
120
+ audio-transcriber:
121
+ image: knucklessg1/audio-transcriber:latest
122
+ environment:
123
+ - HOST=0.0.0.0
124
+ - PORT=8021
125
+ ports:
126
+ - 8021:8021
127
+ ```
128
+
129
+ ### Install Python Package
130
+
131
+ ```bash
132
+ python -m pip install audio-transcriber
133
+ ```
134
+
135
+ or
136
+
137
+ ```bash
138
+ uv pip install --upgrade audio-transcriber
139
+ ```
140
+
141
+ ##### Ubuntu Dependencies
142
+ ```bash
143
+ apt install -y libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
144
+ ```
145
+
146
+ </details>
147
+
148
+ <details>
149
+ <summary><b>Repository Owners:</b></summary>
150
+
151
+
152
+ <img width="100%" height="180em" src="https://github-readme-stats.vercel.app/api?username=Knucklessg1&show_icons=true&hide_border=true&&count_private=true&include_all_commits=true" />
153
+
154
+ ![GitHub followers](https://img.shields.io/github/followers/Knucklessg1)
155
+ ![GitHub User's stars](https://img.shields.io/github/stars/Knucklessg1)
156
+ </details>
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ from audio_transcriber.audio_transcriber import (
5
+ main,
6
+ AudioTranscriber,
7
+ setup_logging,
8
+ )
9
+ from audio_transcriber.audio_transcriber_mcp import audio_transcriber_mcp
10
+
11
+ """
12
+ audio-transcriber
13
+
14
+ Transcribe your .wav .mp4 .mp3 .flac files to text using AI!
15
+ """
16
+
17
+
18
+ __all__ = ["audio_transcriber_mcp", "main", "AudioTranscriber", "setup_logging"]
@@ -0,0 +1,429 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ import argparse
5
+ import datetime
6
+ import json
7
+ import logging
8
+ import sys
9
+ import threading
10
+ from pathlib import Path
11
+ from typing import Iterator, List, Optional, TextIO, Union
12
+
13
+ import pyaudio
14
+ import whisper
15
+ import wave
16
+
17
+
18
+ class AudioTranscriber:
19
+ """A class for recording audio and transcribing it using OpenAI's Whisper model."""
20
+
21
+ def __init__(
22
+ self,
23
+ model: str = "base",
24
+ channels: int = 1,
25
+ rate: int = 16000, # Whisper recommends 16kHz for better accuracy
26
+ file_name: str = "output.wav",
27
+ directory: Union[str, Path] = Path.cwd(),
28
+ file: Optional[Union[str, Path]] = None,
29
+ device: Optional[int] = None,
30
+ logger: Optional[logging.Logger] = None,
31
+ ):
32
+ self.chunk = 1024
33
+ self.format = pyaudio.paInt16
34
+ self.channels = channels
35
+ self.rate = rate
36
+ self.pyaudio_instance = pyaudio.PyAudio()
37
+ self.stream = None
38
+ self.frames: List[bytes] = []
39
+ self.file_path = Path(file) if file else Path(directory) / file_name
40
+ self.title = self.file_path.stem
41
+ self.directory = self.file_path.parent
42
+ self.stop = False
43
+ self.model = whisper.load_model(model)
44
+ self.device_index = device or self._get_default_device()
45
+ self.logger = logger or logging.getLogger(__name__)
46
+ self._check_ffmpeg()
47
+
48
+ def _get_default_device(self) -> int:
49
+ """Get the default input device index."""
50
+ return self.pyaudio_instance.get_default_input_device_info()["index"]
51
+
52
+ def _check_ffmpeg(self) -> None:
53
+ """Check if ffmpeg is installed; log warning if not."""
54
+ import shutil
55
+
56
+ if not shutil.which("ffmpeg"):
57
+ self.logger.warning(
58
+ "ffmpeg not found. Install it for better audio format support. "
59
+ "See https://ffmpeg.org/download.html for instructions."
60
+ )
61
+
62
+ def initiate_stream(self) -> None:
63
+ """Initiate the audio input stream."""
64
+ self.stream = self.pyaudio_instance.open(
65
+ format=self.format,
66
+ channels=self.channels,
67
+ rate=self.rate,
68
+ input=True,
69
+ frames_per_buffer=self.chunk,
70
+ input_device_index=self.device_index,
71
+ )
72
+
73
+ def record(self, seconds: int = 0) -> None:
74
+ """Record audio for a specified duration or until stopped."""
75
+ self.logger.info("Recording started...")
76
+ self.frames = []
77
+ self.stop = False
78
+ if seconds > 0:
79
+ for _ in range(0, int((self.rate / self.chunk) * seconds)):
80
+ if self.stop:
81
+ break
82
+ data = self.stream.read(self.chunk)
83
+ self.frames.append(data)
84
+ else:
85
+ self.logger.info("Recording indefinitely until interrupted (Ctrl+C)...")
86
+ threading.Thread(target=self._unlimited_record, daemon=True).start()
87
+ try:
88
+ while not self.stop:
89
+ pass
90
+ except KeyboardInterrupt:
91
+ self.stop = True
92
+ self.logger.info("Recording stopped.")
93
+
94
+ def _unlimited_record(self) -> None:
95
+ """Thread for unlimited recording."""
96
+ while not self.stop:
97
+ data = self.stream.read(self.chunk)
98
+ self.frames.append(data)
99
+
100
+ def stop_stream(self) -> None:
101
+ """Stop and close the audio stream."""
102
+ self.stop = True
103
+ if self.stream:
104
+ self.stream.stop_stream()
105
+ self.stream.close()
106
+ self.pyaudio_instance.terminate()
107
+
108
+ def save_stream(self) -> None:
109
+ """Save the recorded frames to a WAV file."""
110
+ if not self.frames:
111
+ self.logger.warning("No audio frames to save.")
112
+ return
113
+ with wave.open(str(self.file_path), "wb") as wave_file:
114
+ wave_file.setnchannels(self.channels)
115
+ wave_file.setsampwidth(self.pyaudio_instance.get_sample_size(self.format))
116
+ wave_file.setframerate(self.rate)
117
+ wave_file.writeframes(b"".join(self.frames))
118
+ self.logger.info(f"Audio saved to {self.file_path}")
119
+
120
+ def transcribe(
121
+ self,
122
+ language: Optional[str] = None,
123
+ task: str = "transcribe",
124
+ fp16: bool = True,
125
+ word_timestamps: bool = False,
126
+ temperature: float = 0.0,
127
+ initial_prompt: Optional[str] = None,
128
+ verbose: bool = False,
129
+ ) -> dict:
130
+ """Transcribe the audio file using Whisper."""
131
+ if task == "translate" and self.model.name.startswith("turbo"):
132
+ raise ValueError(
133
+ "Turbo model does not support translation. Use a multilingual model like 'medium' or 'large'."
134
+ )
135
+
136
+ start_time = datetime.datetime.now()
137
+ self.logger.info(
138
+ f"Started transcription at {start_time} for file: {self.file_path}"
139
+ )
140
+
141
+ options = whisper.DecodingOptions(
142
+ language=language,
143
+ task=task,
144
+ fp16=fp16,
145
+ word_timestamps=word_timestamps,
146
+ temperature=temperature,
147
+ prompt=initial_prompt,
148
+ )
149
+
150
+ result = self.model.transcribe(
151
+ str(self.file_path), **options.__dict__, verbose=verbose
152
+ )
153
+
154
+ end_time = datetime.datetime.now()
155
+ self.logger.info(
156
+ f"Ended transcription at {end_time}. Time elapsed: {end_time - start_time}"
157
+ )
158
+ if verbose:
159
+ self.logger.info(f"Transcription result: {result['text']}")
160
+
161
+ return result
162
+
163
+ def export(
164
+ self,
165
+ result: dict,
166
+ formats: List[str],
167
+ ) -> None:
168
+ """Export transcription to specified formats."""
169
+ segments = result["segments"]
170
+ for fmt in formats:
171
+ export_path = self.directory / f"{self.title}.{fmt}"
172
+ if fmt == "txt":
173
+ with open(export_path, "w", encoding="utf-8") as f:
174
+ self._write_txt(segments, f)
175
+ elif fmt == "vtt":
176
+ with open(export_path, "w", encoding="utf-8") as f:
177
+ self._write_vtt(segments, f)
178
+ elif fmt == "srt":
179
+ with open(export_path, "w", encoding="utf-8") as f:
180
+ self._write_srt(segments, f)
181
+ elif fmt == "json":
182
+ with open(export_path, "w", encoding="utf-8") as f:
183
+ json.dump(result, f, indent=4, ensure_ascii=False)
184
+ else:
185
+ self.logger.warning(f"Unsupported export format: {fmt}")
186
+ self.logger.info(f"Exported to {export_path}")
187
+
188
+ @staticmethod
189
+ def _srt_format_timestamp(seconds: float) -> str:
190
+ """Format timestamp for SRT."""
191
+ assert seconds >= 0, "non-negative timestamp expected"
192
+ milliseconds = round(seconds * 1000.0)
193
+ hours = milliseconds // 3_600_000
194
+ milliseconds -= hours * 3_600_000
195
+ minutes = milliseconds // 60_000
196
+ milliseconds -= minutes * 60_000
197
+ seconds_int = milliseconds // 1_000
198
+ milliseconds -= seconds_int * 1_000
199
+ return f"{hours:02d}:{minutes:02d}:{seconds_int:02d},{milliseconds:03d}"
200
+
201
+ def _write_srt(self, transcript: Iterator[dict], file: TextIO) -> None:
202
+ """Write SRT file."""
203
+ count = 0
204
+ for segment in transcript:
205
+ count += 1
206
+ print(
207
+ f"{count}\n"
208
+ f"{self._srt_format_timestamp(segment['start'])} --> {self._srt_format_timestamp(segment['end'])}\n"
209
+ f"{segment['text'].replace('-->', '->').strip()}\n",
210
+ file=file,
211
+ flush=True,
212
+ )
213
+
214
+ @staticmethod
215
+ def _write_txt(transcript: Iterator[dict], file: TextIO) -> None:
216
+ """Write TXT file."""
217
+ for segment in transcript:
218
+ print(segment["text"].strip(), file=file, flush=True)
219
+
220
+ @staticmethod
221
+ def _write_vtt(transcript: Iterator[dict], file: TextIO) -> None:
222
+ """Write VTT file."""
223
+ print("WEBVTT\n", file=file)
224
+ for segment in transcript:
225
+ print(
226
+ f"{AudioTranscriber._format_timestamp(segment['start'])} --> {AudioTranscriber._format_timestamp(segment['end'])}\n"
227
+ f"{segment['text'].strip().replace('-->', '->')}\n",
228
+ file=file,
229
+ flush=True,
230
+ )
231
+
232
+ @staticmethod
233
+ def _format_timestamp(
234
+ seconds: float, always_include_hours: bool = False, decimal_marker: str = "."
235
+ ) -> str:
236
+ """Format timestamp for VTT."""
237
+ assert seconds >= 0, "non-negative timestamp expected"
238
+ milliseconds = round(seconds * 1000.0)
239
+ hours = milliseconds // 3_600_000
240
+ milliseconds -= hours * 3_600_000
241
+ minutes = milliseconds // 60_000
242
+ milliseconds -= minutes * 60_000
243
+ seconds_int = milliseconds // 1_000
244
+ milliseconds -= seconds_int * 1_000
245
+ hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
246
+ return f"{hours_marker}{minutes:02d}:{seconds_int:02d}{decimal_marker}{milliseconds:03d}"
247
+
248
+
249
+ def setup_logging(
250
+ verbose: bool = False, log_file: Optional[str] = None
251
+ ) -> logging.Logger:
252
+ """Set up logging configuration."""
253
+ logger = logging.getLogger(__name__)
254
+ logger.setLevel(logging.INFO if verbose else logging.WARNING)
255
+
256
+ # Console handler
257
+ ch = logging.StreamHandler()
258
+ ch.setLevel(logging.INFO if verbose else logging.WARNING)
259
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
260
+ ch.setFormatter(formatter)
261
+ logger.addHandler(ch)
262
+
263
+ # File handler if specified
264
+ if log_file:
265
+ fh = logging.FileHandler(log_file)
266
+ fh.setLevel(logging.INFO)
267
+ fh.setFormatter(formatter)
268
+ logger.addHandler(fh)
269
+
270
+ return logger
271
+
272
+
273
+ def main() -> None:
274
+ parser = argparse.ArgumentParser(
275
+ description="Audio Transcriber: Record and transcribe audio using OpenAI Whisper.",
276
+ epilog="Examples:\n"
277
+ " python audio_transcriber.py --file path/to/audio.mp3 --model large --task translate --language ja\n"
278
+ " python audio_transcriber.py --record 60 --directory ./recordings --name my_recording.wav --verbose",
279
+ formatter_class=argparse.RawDescriptionHelpFormatter,
280
+ )
281
+ parser.add_argument(
282
+ "--model",
283
+ default="base",
284
+ choices=[
285
+ "tiny",
286
+ "base",
287
+ "small",
288
+ "medium",
289
+ "large",
290
+ "turbo",
291
+ "tiny.en",
292
+ "base.en",
293
+ "small.en",
294
+ "medium.en",
295
+ ],
296
+ help="Whisper model to use (default: base)",
297
+ )
298
+ parser.add_argument(
299
+ "--channels", type=int, default=1, help="Number of audio channels (default: 1)"
300
+ )
301
+ parser.add_argument(
302
+ "--rate",
303
+ type=int,
304
+ default=16000,
305
+ help="Sample rate for recording (default: 16000)",
306
+ )
307
+ parser.add_argument(
308
+ "--directory",
309
+ type=Path,
310
+ default=Path.cwd(),
311
+ help="Directory to save recordings/exports (default: current dir)",
312
+ )
313
+ parser.add_argument(
314
+ "--name",
315
+ default="output.wav",
316
+ help="Name of the output file (default: output.wav)",
317
+ )
318
+ parser.add_argument(
319
+ "--file",
320
+ type=Path,
321
+ nargs="*",
322
+ help="Path(s) to audio file(s) to transcribe (skips recording)",
323
+ )
324
+ parser.add_argument(
325
+ "--record",
326
+ type=int,
327
+ default=0,
328
+ help="Seconds to record (0 for unlimited until Ctrl+C; default: 0)",
329
+ )
330
+ parser.add_argument(
331
+ "--device", type=int, help="Input device index (default: system default)"
332
+ )
333
+ parser.add_argument(
334
+ "--language", help="Language code (e.g., 'en', 'fr'; auto-detected if omitted)"
335
+ )
336
+ parser.add_argument(
337
+ "--task",
338
+ default="transcribe",
339
+ choices=["transcribe", "translate"],
340
+ help="Task: transcribe or translate to English (default: transcribe)",
341
+ )
342
+ parser.add_argument(
343
+ "--fp16",
344
+ action="store_true",
345
+ help="Use FP16 for faster inference (default: False)",
346
+ )
347
+ parser.add_argument(
348
+ "--word-timestamps",
349
+ action="store_true",
350
+ help="Include word-level timestamps in output (default: False)",
351
+ )
352
+ parser.add_argument(
353
+ "--temperature",
354
+ type=float,
355
+ default=0.0,
356
+ help="Temperature for sampling diversity (default: 0.0)",
357
+ )
358
+ parser.add_argument(
359
+ "--initial-prompt", help="Initial text prompt to guide transcription"
360
+ )
361
+ parser.add_argument(
362
+ "--export",
363
+ nargs="*",
364
+ choices=["txt", "vtt", "srt", "json"],
365
+ default=[],
366
+ help="Export formats (e.g., --export txt srt)",
367
+ )
368
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
369
+ parser.add_argument("--log-file", help="Path to log file")
370
+
371
+ args = parser.parse_args()
372
+
373
+ logger = setup_logging(args.verbose, args.log_file)
374
+
375
+ if args.file:
376
+ # Batch transcription
377
+ for file_path in args.file:
378
+ if not file_path.exists():
379
+ logger.error(f"File not found: {file_path}")
380
+ sys.exit(1)
381
+ transcriber = AudioTranscriber(
382
+ model=args.model,
383
+ channels=args.channels,
384
+ rate=args.rate,
385
+ file=file_path,
386
+ device=args.device,
387
+ logger=logger,
388
+ )
389
+ result = transcriber.transcribe(
390
+ language=args.language,
391
+ task=args.task,
392
+ fp16=args.fp16,
393
+ word_timestamps=args.word_timestamps,
394
+ temperature=args.temperature,
395
+ initial_prompt=args.initial_prompt,
396
+ verbose=args.verbose,
397
+ )
398
+ if args.export:
399
+ transcriber.export(result, args.export)
400
+ else:
401
+ # Recording mode
402
+ transcriber = AudioTranscriber(
403
+ model=args.model,
404
+ channels=args.channels,
405
+ rate=args.rate,
406
+ file_name=args.name,
407
+ directory=args.directory,
408
+ device=args.device,
409
+ logger=logger,
410
+ )
411
+ transcriber.initiate_stream()
412
+ transcriber.record(seconds=args.record)
413
+ transcriber.stop_stream()
414
+ transcriber.save_stream()
415
+ result = transcriber.transcribe(
416
+ language=args.language,
417
+ task=args.task,
418
+ fp16=args.fp16,
419
+ word_timestamps=args.word_timestamps,
420
+ temperature=args.temperature,
421
+ initial_prompt=args.initial_prompt,
422
+ verbose=args.verbose,
423
+ )
424
+ if args.export:
425
+ transcriber.export(result, args.export)
426
+
427
+
428
+ if __name__ == "__main__":
429
+ main()
@@ -0,0 +1,200 @@
1
+ #!/usr/bin/python
2
+ # coding: utf-8
3
+
4
+ import getopt
5
+ import os
6
+ import sys
7
+ from typing import List, Optional
8
+ from pathlib import Path
9
+ from audio_transcriber import AudioTranscriber, setup_logging
10
+ from fastmcp import FastMCP, Context
11
+ from pydantic import Field
12
+
13
+ # Initialize logging for MCP server (logs to file, verbose for details)
14
+ logger = setup_logging(verbose=True, log_file="audio_transcriber_mcp.log")
15
+
16
+ mcp = FastMCP(name="AudioTranscriberServer")
17
+
18
+ # Environment variables for defaults
19
+ environment_model = os.environ.get("WHISPER_MODEL", "base")
20
+ environment_directory = os.environ.get(
21
+ "TRANSCRIBE_DIRECTORY", str(Path.home() / "Downloads")
22
+ )
23
+
24
+
25
+ @mcp.tool(
26
+ annotations={
27
+ "title": "Transcribe Audio",
28
+ "readOnlyHint": False,
29
+ "destructiveHint": False,
30
+ "idempotentHint": True,
31
+ "openWorldHint": False,
32
+ },
33
+ tags={"audio_processing"},
34
+ )
35
+ async def transcribe_audio(
36
+ audio_file: Optional[str] = Field(
37
+ description="Path to the audio file to transcribe. If provided, transcription is performed on this file.",
38
+ default=None,
39
+ ),
40
+ record_seconds: int = Field(
41
+ description="Number of seconds to record audio from microphone. Must be positive if no audio_file is provided. 0 or negative not supported for recording in this context.",
42
+ default=0,
43
+ ),
44
+ directory: Optional[str] = Field(
45
+ description="Directory for saving recordings or exports.",
46
+ default=environment_directory,
47
+ ),
48
+ model: str = Field(
49
+ description="Whisper model to use (e.g., 'base', 'small', 'turbo').",
50
+ default=environment_model,
51
+ ),
52
+ language: Optional[str] = Field(
53
+ description="Language code for transcription (e.g., 'en', 'fr'). Auto-detected if not specified.",
54
+ default=None,
55
+ ),
56
+ task: str = Field(
57
+ description="Task to perform: 'transcribe' or 'translate' (to English).",
58
+ default="transcribe",
59
+ ),
60
+ fp16: bool = Field(description="Use FP16 for faster inference.", default=True),
61
+ word_timestamps: bool = Field(
62
+ description="Include word-level timestamps in the output.", default=False
63
+ ),
64
+ temperature: float = Field(
65
+ description="Temperature for sampling diversity (0.0 for deterministic).",
66
+ default=0.0,
67
+ ),
68
+ initial_prompt: Optional[str] = Field(
69
+ description="Initial text prompt to guide the transcription.", default=None
70
+ ),
71
+ export_formats: List[str] = Field(
72
+ description="Formats to export the transcription (e.g., ['txt', 'srt']).",
73
+ default=None,
74
+ ),
75
+ ctx: Context = Field(
76
+ description="MCP context for progress reporting.", default=None
77
+ ),
78
+ ) -> str:
79
+ """Transcribes audio from a provided file or by recording from the microphone."""
80
+ logger.info(
81
+ f"Starting transcription: audio_file={audio_file}, record_seconds={record_seconds}, "
82
+ f"directory={directory}, model={model}, language={language}, task={task}"
83
+ )
84
+
85
+ try:
86
+ if not audio_file and record_seconds <= 0:
87
+ raise ValueError(
88
+ "Either audio_file must be provided or record_seconds must be positive."
89
+ )
90
+
91
+ # Create transcriber instance
92
+ transcriber = AudioTranscriber(
93
+ model=model,
94
+ directory=Path(directory),
95
+ file=audio_file if audio_file else None,
96
+ logger=logger,
97
+ )
98
+
99
+ # Report initial progress
100
+ if ctx:
101
+ await ctx.report_progress(progress=0, total=100)
102
+ logger.debug("Reported initial progress: 0/100")
103
+
104
+ if audio_file:
105
+ # Validate file existence
106
+ file_path = Path(audio_file)
107
+ if not file_path.exists():
108
+ raise ValueError(f"Audio file not found: {audio_file}")
109
+ else:
110
+ # Recording mode (only fixed duration supported)
111
+ logger.info(f"Starting recording for {record_seconds} seconds.")
112
+ transcriber.initiate_stream()
113
+
114
+ # Coarse progress for recording (sync call, so limited granularity)
115
+ transcriber.record(seconds=record_seconds)
116
+ transcriber.stop_stream()
117
+ transcriber.save_stream()
118
+
119
+ if ctx:
120
+ await ctx.report_progress(
121
+ progress=40, total=100
122
+ ) # Arbitrary midpoint after recording
123
+ logger.debug("Reported progress after recording: 40/100")
124
+
125
+ # Perform transcription
126
+ logger.info("Starting Whisper transcription.")
127
+ result = transcriber.transcribe(
128
+ language=language,
129
+ task=task,
130
+ fp16=fp16,
131
+ word_timestamps=word_timestamps,
132
+ temperature=temperature,
133
+ initial_prompt=initial_prompt,
134
+ verbose=True, # Enable verbose for logging details
135
+ )
136
+
137
+ if ctx:
138
+ await ctx.report_progress(progress=90, total=100)
139
+ logger.debug("Reported progress after transcription: 90/100")
140
+
141
+ # Export if requested
142
+ if export_formats:
143
+ transcriber.export(result, formats=export_formats)
144
+ logger.info(f"Exported transcription to formats: {export_formats}")
145
+
146
+ # Report completion
147
+ if ctx:
148
+ await ctx.report_progress(progress=100, total=100)
149
+ logger.debug("Reported final progress: 100/100")
150
+
151
+ logger.info("Transcription completed successfully.")
152
+ return result["text"]
153
+ except Exception as e:
154
+ logger.error(f"Failed to transcribe audio: {str(e)}")
155
+ raise RuntimeError(f"Failed to transcribe audio: {str(e)}")
156
+
157
+
158
+ def audio_transcriber_mcp(argv):
159
+ transport = "stdio"
160
+ host = "0.0.0.0"
161
+ port = 8000
162
+ try:
163
+ opts, args = getopt.getopt(
164
+ argv,
165
+ "ht:h:p:",
166
+ ["help", "transport=", "host=", "port="],
167
+ )
168
+ except getopt.GetoptError:
169
+ sys.exit(2)
170
+ for opt, arg in opts:
171
+ if opt in ("-h", "--help"):
172
+ sys.exit()
173
+ elif opt in ("-t", "--transport"):
174
+ transport = arg
175
+ elif opt in ("-h", "--host"):
176
+ host = arg
177
+ elif opt in ("-p", "--port"):
178
+ try:
179
+ port = int(arg)
180
+ if not (0 <= port <= 65535):
181
+ print(f"Error: Port {arg} is out of valid range (0-65535).")
182
+ sys.exit(1)
183
+ except ValueError:
184
+ print(f"Error: Port {arg} is not a valid integer.")
185
+ sys.exit(1)
186
+ if transport == "stdio":
187
+ mcp.run(transport="stdio")
188
+ elif transport == "http":
189
+ mcp.run(transport="http", host=host, port=port)
190
+ else:
191
+ logger.error("Transport not supported")
192
+ sys.exit(1)
193
+
194
+
195
+ def main():
196
+ audio_transcriber_mcp(sys.argv[1:])
197
+
198
+
199
+ if __name__ == "__main__":
200
+ audio_transcriber_mcp(sys.argv[1:])
@@ -0,0 +1,177 @@
1
+ Metadata-Version: 2.4
2
+ Name: audio-transcriber
3
+ Version: 0.5.40
4
+ Summary: Transcribe your .wav .mp4 .mp3 .flac files to text or record your own audio!
5
+ Author-email: Audel Rouhi <knucklessg1@gmail.com>
6
+ License: MIT
7
+ Classifier: Development Status :: 5 - Production/Stable
8
+ Classifier: License :: Public Domain
9
+ Classifier: Environment :: Console
10
+ Classifier: Operating System :: POSIX :: Linux
11
+ Classifier: Programming Language :: Python :: 3
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: torch>=1.13.1
16
+ Requires-Dist: transformers>=4.25.1
17
+ Requires-Dist: pyaudio>=0.2.13
18
+ Requires-Dist: openai-whisper>=20250625
19
+ Requires-Dist: setuptools-rust>=1.12.0
20
+ Dynamic: license-file
21
+
22
+ # Audio-Transcriber
23
+
24
+ ![PyPI - Version](https://img.shields.io/pypi/v/audio-transcriber)
25
+ ![PyPI - Downloads](https://img.shields.io/pypi/dd/audio-transcriber)
26
+ ![GitHub Repo stars](https://img.shields.io/github/stars/Knuckles-Team/audio-transcriber)
27
+ ![GitHub forks](https://img.shields.io/github/forks/Knuckles-Team/audio-transcriber)
28
+ ![GitHub contributors](https://img.shields.io/github/contributors/Knuckles-Team/audio-transcriber)
29
+ ![PyPI - License](https://img.shields.io/pypi/l/audio-transcriber)
30
+ ![GitHub](https://img.shields.io/github/license/Knuckles-Team/audio-transcriber)
31
+
32
+ ![GitHub last commit (by committer)](https://img.shields.io/github/last-commit/Knuckles-Team/audio-transcriber)
33
+ ![GitHub pull requests](https://img.shields.io/github/issues-pr/Knuckles-Team/audio-transcriber)
34
+ ![GitHub closed pull requests](https://img.shields.io/github/issues-pr-closed/Knuckles-Team/audio-transcriber)
35
+ ![GitHub issues](https://img.shields.io/github/issues/Knuckles-Team/audio-transcriber)
36
+
37
+ ![GitHub top language](https://img.shields.io/github/languages/top/Knuckles-Team/audio-transcriber)
38
+ ![GitHub language count](https://img.shields.io/github/languages/count/Knuckles-Team/audio-transcriber)
39
+ ![GitHub repo size](https://img.shields.io/github/repo-size/Knuckles-Team/audio-transcriber)
40
+ ![GitHub repo file count (file type)](https://img.shields.io/github/directory-file-count/Knuckles-Team/audio-transcriber)
41
+ ![PyPI - Wheel](https://img.shields.io/pypi/wheel/audio-transcriber)
42
+ ![PyPI - Implementation](https://img.shields.io/pypi/implementation/audio-transcriber)
43
+
44
+ *Version: 0.5.40*
45
+
46
+ Transcribe your .wav .mp4 .mp3 .flac files to text or record your own audio!
47
+
48
+ This repository is actively maintained - Contributions are welcome!
49
+
50
+ Contribution Opportunities:
51
+ - Support new models
52
+
53
+ Wrapped around [OpenAI Whisper](https://pypi.org/project/openai-whisper)
54
+
55
+ <details>
56
+ <summary><b>Usage:</b></summary>
57
+
58
+ | Short Flag | Long Flag | Description |
59
+ |------------|-------------|---------------------------------------------------------------|
60
+ | -h | --help | See Usage |
61
+ | -b | --bitrate | Bitrate to use during recording |
62
+ | -c | --channels | Number of channels to use during recording |
63
+ | -d | --directory | Directory to save recording |
64
+ | -e | --export | Export txt, srt, and vtt files |
65
+ | -f | --file | File to transcribe |
66
+ | -l | --language | Language to transcribe |
67
+ | -m | --model | Model to use: <tiny, base, small, medium, large> |
68
+ | -n | --name | Name of recording |
69
+ | -r | --record | Specify number of seconds to record to record from microphone |
70
+
71
+ </details>
72
+
73
+ <details>
74
+ <summary><b>Example:</b></summary>
75
+
76
+ ```bash
77
+ audio-transcriber --file '~/Downloads/Federal_Reserve.mp4' --model 'large'
78
+ audio-transcriber --record 60 --directory '~/Downloads/' --name 'my_recording.wav' --model 'tiny'
79
+ ```
80
+
81
+
82
+ </details>
83
+
84
+ <details>
85
+ <summary><b>Model Information:</b></summary>
86
+
87
+ [Courtesy of and Credits to OpenAI: Whisper.ai](https://github.com/openai/whisper/blob/main/README.md)
88
+
89
+ | Size | Parameters | English-only model | Multilingual model | Required VRAM | Relative speed |
90
+ |:------:|:----------:|:------------------:|:------------------:|:-------------:|:--------------:|
91
+ | tiny | 39 M | `tiny.en` | `tiny` | ~1 GB | ~32x |
92
+ | base | 74 M | `base.en` | `base` | ~1 GB | ~16x |
93
+ | small | 244 M | `small.en` | `small` | ~2 GB | ~6x |
94
+ | medium | 769 M | `medium.en` | `medium` | ~5 GB | ~2x |
95
+ | large | 1550 M | N/A | `large` | ~10 GB | 1x |
96
+
97
+
98
+ </details>
99
+
100
+ <details>
101
+ <summary><b>Installation Instructions:</b></summary>
102
+
103
+ ## Use with AI
104
+
105
+ Configure `mcp.json`
106
+
107
+ Recommended: Store secrets in environment variables with lookup in JSON file.
108
+
109
+ For Testing Only: Plain text storage will also work, although **not** recommended.
110
+
111
+ ```json
112
+ {
113
+ "mcpServers": {
114
+ "audio_transcriber": {
115
+ "command": "uv",
116
+ "args": [
117
+ "run",
118
+ "--with",
119
+ "audio-transcriber",
120
+ "audio-transcriber-mcp"
121
+ ],
122
+ "env": {
123
+ "WHISPER_MODEL": "medium", // Optional
124
+ "TRANSCRIBE_DIRECTORY": "~/Downloads" // Optional
125
+ },
126
+ "timeout": 200000
127
+ }
128
+ }
129
+ }
130
+ ```
131
+
132
+ ### Deploy MCP Server as a container
133
+ ```bash
134
+ docker pull knucklessg1/audio-transcriber:latest
135
+ ```
136
+
137
+ Modify the `compose.yml`
138
+
139
+ ```compose
140
+ services:
141
+ audio-transcriber:
142
+ image: knucklessg1/audio-transcriber:latest
143
+ environment:
144
+ - HOST=0.0.0.0
145
+ - PORT=8021
146
+ ports:
147
+ - 8021:8021
148
+ ```
149
+
150
+ ### Install Python Package
151
+
152
+ ```bash
153
+ python -m pip install audio-transcriber
154
+ ```
155
+
156
+ or
157
+
158
+ ```bash
159
+ uv pip install --upgrade audio-transcriber
160
+ ```
161
+
162
+ ##### Ubuntu Dependencies
163
+ ```bash
164
+ apt install -y libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
165
+ ```
166
+
167
+ </details>
168
+
169
+ <details>
170
+ <summary><b>Repository Owners:</b></summary>
171
+
172
+
173
+ <img width="100%" height="180em" src="https://github-readme-stats.vercel.app/api?username=Knucklessg1&show_icons=true&hide_border=true&&count_private=true&include_all_commits=true" />
174
+
175
+ ![GitHub followers](https://img.shields.io/github/followers/Knucklessg1)
176
+ ![GitHub User's stars](https://img.shields.io/github/stars/Knucklessg1)
177
+ </details>
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ requirements.txt
6
+ audio_transcriber/__init__.py
7
+ audio_transcriber/audio_transcriber.py
8
+ audio_transcriber/audio_transcriber_mcp.py
9
+ audio_transcriber.egg-info/PKG-INFO
10
+ audio_transcriber.egg-info/SOURCES.txt
11
+ audio_transcriber.egg-info/dependency_links.txt
12
+ audio_transcriber.egg-info/entry_points.txt
13
+ audio_transcriber.egg-info/requires.txt
14
+ audio_transcriber.egg-info/top_level.txt
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ audio-transcriber = audio_transcriber.audio_transcriber:main
3
+ audio-transcriber-mcp = audio_transcriber.audio_transcriber_mcp:main
@@ -0,0 +1,5 @@
1
+ torch>=1.13.1
2
+ transformers>=4.25.1
3
+ pyaudio>=0.2.13
4
+ openai-whisper>=20250625
5
+ setuptools-rust>=1.12.0
@@ -0,0 +1,2 @@
1
+ audio_transcriber
2
+ dist
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=80.9.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "audio-transcriber"
7
+ version = "0.5.40"
8
+ description = "Transcribe your .wav .mp4 .mp3 .flac files to text or record your own audio!"
9
+ readme = "README.md"
10
+ authors = [{ name = "Audel Rouhi", email = "knucklessg1@gmail.com" }]
11
+ license = { text = "MIT" }
12
+ classifiers = [
13
+ "Development Status :: 5 - Production/Stable",
14
+ "License :: Public Domain",
15
+ "Environment :: Console",
16
+ "Operating System :: POSIX :: Linux",
17
+ "Programming Language :: Python :: 3",
18
+ ]
19
+ requires-python = ">=3.8"
20
+ dependencies = [
21
+ "torch>=1.13.1",
22
+ "transformers>=4.25.1",
23
+ "pyaudio>=0.2.13",
24
+ "openai-whisper>=20250625",
25
+ "setuptools-rust>=1.12.0"
26
+ ]
27
+
28
+ [project.scripts]
29
+ audio-transcriber = "audio_transcriber.audio_transcriber:main"
30
+ audio-transcriber-mcp = "audio_transcriber.audio_transcriber_mcp:main"
31
+
32
+ [tool.setuptools.packages.find]
33
+ where = ["."]
34
+
35
+ [tool.setuptools]
36
+ include-package-data = true
37
+ package-data = { "audio_transcriber" = ["audio_transcriber"] }
@@ -0,0 +1,5 @@
1
+ torch>=1.13.1
2
+ transformers>=4.25.1
3
+ pyaudio>=0.2.13
4
+ openai-whisper>=20250625
5
+ setuptools-rust>=1.12.0
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+