audio-metrics-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 OpenClaw
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,296 @@
1
+ Metadata-Version: 2.4
2
+ Name: audio-metrics-cli
3
+ Version: 0.1.0
4
+ Summary: Cross-platform audio analysis toolkit for speech metrics extraction
5
+ Author-email: OpenClaw <clawbot@openclaw.ai>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/i-whimsy/audio-metrics-cli
8
+ Project-URL: Repository, https://github.com/i-whimsy/audio-metrics-cli.git
9
+ Project-URL: Documentation, https://github.com/i-whimsy/audio-metrics-cli#readme
10
+ Keywords: audio,speech,analysis,metrics,whisper,vad,prosody,cli
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
22
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: numpy>=1.23.0
27
+ Requires-Dist: librosa>=0.10.0
28
+ Requires-Dist: soundfile>=0.12.0
29
+ Requires-Dist: openai-whisper>=20230314
30
+ Requires-Dist: click>=8.1.0
31
+ Requires-Dist: tqdm>=4.65.0
32
+ Requires-Dist: pydantic>=2.0.0
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
35
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
36
+ Requires-Dist: black>=23.0.0; extra == "dev"
37
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
38
+ Provides-Extra: emotion
39
+ Requires-Dist: torch>=2.0.0; extra == "emotion"
40
+ Requires-Dist: torchaudio>=2.0.0; extra == "emotion"
41
+ Requires-Dist: speechbrain>=0.5.14; extra == "emotion"
42
+ Dynamic: license-file
43
+
44
+ # Audio Metrics CLI
45
+
46
+ 🎙️ **Cross-platform audio analysis toolkit for speech metrics extraction**
47
+
48
+ [![PyPI version](https://badge.fury.io/py/audio-metrics-cli.svg)](https://badge.fury.io/py/audio-metrics-cli)
49
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
50
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
51
+
52
+ ---
53
+
54
+ ## 🚀 Quick Start
55
+
56
+ ### Installation
57
+
58
+ ```bash
59
+ # Install from PyPI (recommended)
60
+ pip install audio-metrics-cli
61
+
62
+ # Or install from source (development)
63
+ git clone https://github.com/i-whimsy/audio-metrics-cli.git
64
+ cd audio-metrics-cli
65
+ pip install -e ".[dev]"
66
+ ```
67
+
68
+ ### Basic Usage
69
+
70
+ ```bash
71
+ # Analyze audio file
72
+ audio-metrics analyze your_audio.wav --output result.json
73
+
74
+ # With verbose output
75
+ audio-metrics analyze audio.mp3 --verbose --show-progress
76
+
77
+ # Transcribe only
78
+ audio-metrics transcribe audio.m4a -o transcript.txt
79
+
80
+ # Compare two audio files
81
+ audio-metrics compare v1.wav v2.wav
82
+ ```
83
+
84
+ ---
85
+
86
+ ## 📦 Features
87
+
88
+ ### Core Metrics
89
+
90
+ - **🎵 Audio Information**: Duration, sample rate, file size
91
+ - **🗣️ Voice Activity Detection**: Speech/silence segmentation
92
+ - **📝 Speech-to-Text**: Whisper-powered transcription
93
+ - **🎼 Prosody Analysis**: Pitch, energy, speech rate
94
+ - **😊 Emotion Recognition**: Emotional state detection (optional)
95
+ - **🔤 Filler Word Detection**: "um", "uh", "like" detection
96
+
97
+ ### Supported Formats
98
+
99
+ - ✅ WAV
100
+ - ✅ MP3
101
+ - ✅ M4A
102
+ - ✅ FLAC
103
+ - ✅ OGG
104
+
105
+ ### Cross-Platform
106
+
107
+ - ✅ Windows
108
+ - ✅ macOS
109
+ - ✅ Linux
110
+
111
+ ---
112
+
113
+ ## 📖 Documentation
114
+
115
+ ### Command Line Interface
116
+
117
+ #### `analyze` - Full Analysis
118
+
119
+ ```bash
120
+ audio-metrics analyze audio.wav [OPTIONS]
121
+
122
+ Options:
123
+ -o, --output PATH Output JSON file path
124
+ -c, --config PATH Configuration file
125
+ -m, --model TEXT Whisper model (tiny/base/small/medium/large)
126
+ --no-emotion Skip emotion analysis
127
+ --show-progress Show progress bars
128
+ -v, --verbose Verbose output
129
+ --help Show this message
130
+ ```
131
+
132
+ #### `transcribe` - Speech to Text
133
+
134
+ ```bash
135
+ audio-metrics transcribe audio.wav [OPTIONS]
136
+
137
+ Options:
138
+ -o, --output PATH Output transcript file
139
+ -m, --model TEXT Whisper model
140
+ --language TEXT Language code
141
+ --help Show this message
142
+ ```
143
+
144
+ #### `compare` - Compare Audio Files
145
+
146
+ ```bash
147
+ audio-metrics compare audio1.wav audio2.wav [OPTIONS]
148
+
149
+ Options:
150
+ --format TEXT Output format (text/json/markdown)
151
+ --help Show this message
152
+ ```
153
+
154
+ ---
155
+
156
+ ## 📊 Output Example
157
+
158
+ ```json
159
+ {
160
+ "audio_info": {
161
+ "duration_seconds": 185.2,
162
+ "sample_rate": 44100,
163
+ "file_size_mb": 2.8
164
+ },
165
+ "vad_analysis": {
166
+ "speech_ratio": 0.81,
167
+ "pause_count": 23,
168
+ "avg_pause_duration": 1.1
169
+ },
170
+ "speech_metrics": {
171
+ "words_total": 820,
172
+ "words_per_minute": 266
173
+ },
174
+ "prosody_metrics": {
175
+ "pitch_mean_hz": 145.3,
176
+ "energy_cv": 0.33
177
+ },
178
+ "filler_metrics": {
179
+ "filler_word_count": 18,
180
+ "fillers_per_100_words": 2.2
181
+ }
182
+ }
183
+ ```
184
+
185
+ ---
186
+
187
+ ## 🔧 Configuration
188
+
189
+ Create a `config.json` file:
190
+
191
+ ```json
192
+ {
193
+ "models": {
194
+ "speech_to_text": {
195
+ "provider": "whisper",
196
+ "model": "base",
197
+ "device": "auto"
198
+ },
199
+ "vad": {
200
+ "provider": "silero",
201
+ "threshold": 0.5
202
+ }
203
+ },
204
+ "audio_analysis": {
205
+ "enable_pitch": true,
206
+ "enable_energy": true,
207
+ "enable_pause": true
208
+ },
209
+ "features": {
210
+ "enable_emotion": true,
211
+ "skip_if_too_long": 3600
212
+ }
213
+ }
214
+ ```
215
+
216
+ ---
217
+
218
+ ## 💻 Development
219
+
220
+ ### Setup Development Environment
221
+
222
+ ```bash
223
+ # Clone repository
224
+ git clone https://github.com/i-whimsy/audio-metrics-cli.git
225
+ cd audio-metrics-cli
226
+
227
+ # Install with dev dependencies
228
+ pip install -e ".[dev]"
229
+
230
+ # Run tests
231
+ pytest tests/
232
+
233
+ # Format code
234
+ black src/
235
+ ruff check src/
236
+ ```
237
+
238
+ ### Project Structure
239
+
240
+ ```
241
+ audio-metrics-cli/
242
+ ├── src/
243
+ │ └── audio_metrics/
244
+ │ ├── cli.py # CLI entry point
245
+ │ ├── config.py # Configuration
246
+ │ └── modules/ # Core modules
247
+ │ ├── audio_loader.py
248
+ │ ├── vad_analyzer.py
249
+ │ ├── speech_to_text.py
250
+ │ ├── prosody_analyzer.py
251
+ │ ├── emotion_analyzer.py
252
+ │ ├── filler_detector.py
253
+ │ ├── metrics_builder.py
254
+ │ └── json_exporter.py
255
+ ├── tests/
256
+ ├── examples/
257
+ ├── pyproject.toml
258
+ └── README.md
259
+ ```
260
+
261
+ ---
262
+
263
+ ## 🤝 Contributing
264
+
265
+ 1. Fork the repository
266
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
267
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
268
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
269
+ 5. Open a Pull Request
270
+
271
+ ---
272
+
273
+ ## 📝 License
274
+
275
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
276
+
277
+ ---
278
+
279
+ ## 🙏 Acknowledgments
280
+
281
+ - [OpenAI Whisper](https://github.com/openai/whisper) - Speech-to-text
282
+ - [Silero VAD](https://github.com/snakers4/silero-vad) - Voice activity detection
283
+ - [Librosa](https://librosa.org/) - Audio analysis
284
+ - [SpeechBrain](https://speechbrain.github.io/) - Emotion recognition
285
+
286
+ ---
287
+
288
+ ## 📞 Support
289
+
290
+ - **Issues**: [GitHub Issues](https://github.com/i-whimsy/audio-metrics-cli/issues)
291
+ - **Discussions**: [GitHub Discussions](https://github.com/i-whimsy/audio-metrics-cli/discussions)
292
+ - **Email**: clawbot@openclaw.ai
293
+
294
+ ---
295
+
296
+ **Built with ❤️ by OpenClaw Team**
@@ -0,0 +1,253 @@
1
+ # Audio Metrics CLI
2
+
3
+ 🎙️ **Cross-platform audio analysis toolkit for speech metrics extraction**
4
+
5
+ [![PyPI version](https://badge.fury.io/py/audio-metrics-cli.svg)](https://badge.fury.io/py/audio-metrics-cli)
6
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ ---
10
+
11
+ ## 🚀 Quick Start
12
+
13
+ ### Installation
14
+
15
+ ```bash
16
+ # Install from PyPI (recommended)
17
+ pip install audio-metrics-cli
18
+
19
+ # Or install from source (development)
20
+ git clone https://github.com/i-whimsy/audio-metrics-cli.git
21
+ cd audio-metrics-cli
22
+ pip install -e ".[dev]"
23
+ ```
24
+
25
+ ### Basic Usage
26
+
27
+ ```bash
28
+ # Analyze audio file
29
+ audio-metrics analyze your_audio.wav --output result.json
30
+
31
+ # With verbose output
32
+ audio-metrics analyze audio.mp3 --verbose --show-progress
33
+
34
+ # Transcribe only
35
+ audio-metrics transcribe audio.m4a -o transcript.txt
36
+
37
+ # Compare two audio files
38
+ audio-metrics compare v1.wav v2.wav
39
+ ```
40
+
41
+ ---
42
+
43
+ ## 📦 Features
44
+
45
+ ### Core Metrics
46
+
47
+ - **🎵 Audio Information**: Duration, sample rate, file size
48
+ - **🗣️ Voice Activity Detection**: Speech/silence segmentation
49
+ - **📝 Speech-to-Text**: Whisper-powered transcription
50
+ - **🎼 Prosody Analysis**: Pitch, energy, speech rate
51
+ - **😊 Emotion Recognition**: Emotional state detection (optional)
52
+ - **🔤 Filler Word Detection**: "um", "uh", "like" detection
53
+
54
+ ### Supported Formats
55
+
56
+ - ✅ WAV
57
+ - ✅ MP3
58
+ - ✅ M4A
59
+ - ✅ FLAC
60
+ - ✅ OGG
61
+
62
+ ### Cross-Platform
63
+
64
+ - ✅ Windows
65
+ - ✅ macOS
66
+ - ✅ Linux
67
+
68
+ ---
69
+
70
+ ## 📖 Documentation
71
+
72
+ ### Command Line Interface
73
+
74
+ #### `analyze` - Full Analysis
75
+
76
+ ```bash
77
+ audio-metrics analyze audio.wav [OPTIONS]
78
+
79
+ Options:
80
+ -o, --output PATH Output JSON file path
81
+ -c, --config PATH Configuration file
82
+ -m, --model TEXT Whisper model (tiny/base/small/medium/large)
83
+ --no-emotion Skip emotion analysis
84
+ --show-progress Show progress bars
85
+ -v, --verbose Verbose output
86
+ --help Show this message
87
+ ```
88
+
89
+ #### `transcribe` - Speech to Text
90
+
91
+ ```bash
92
+ audio-metrics transcribe audio.wav [OPTIONS]
93
+
94
+ Options:
95
+ -o, --output PATH Output transcript file
96
+ -m, --model TEXT Whisper model
97
+ --language TEXT Language code
98
+ --help Show this message
99
+ ```
100
+
101
+ #### `compare` - Compare Audio Files
102
+
103
+ ```bash
104
+ audio-metrics compare audio1.wav audio2.wav [OPTIONS]
105
+
106
+ Options:
107
+ --format TEXT Output format (text/json/markdown)
108
+ --help Show this message
109
+ ```
110
+
111
+ ---
112
+
113
+ ## 📊 Output Example
114
+
115
+ ```json
116
+ {
117
+ "audio_info": {
118
+ "duration_seconds": 185.2,
119
+ "sample_rate": 44100,
120
+ "file_size_mb": 2.8
121
+ },
122
+ "vad_analysis": {
123
+ "speech_ratio": 0.81,
124
+ "pause_count": 23,
125
+ "avg_pause_duration": 1.1
126
+ },
127
+ "speech_metrics": {
128
+ "words_total": 820,
129
+ "words_per_minute": 266
130
+ },
131
+ "prosody_metrics": {
132
+ "pitch_mean_hz": 145.3,
133
+ "energy_cv": 0.33
134
+ },
135
+ "filler_metrics": {
136
+ "filler_word_count": 18,
137
+ "fillers_per_100_words": 2.2
138
+ }
139
+ }
140
+ ```
141
+
142
+ ---
143
+
144
+ ## 🔧 Configuration
145
+
146
+ Create a `config.json` file:
147
+
148
+ ```json
149
+ {
150
+ "models": {
151
+ "speech_to_text": {
152
+ "provider": "whisper",
153
+ "model": "base",
154
+ "device": "auto"
155
+ },
156
+ "vad": {
157
+ "provider": "silero",
158
+ "threshold": 0.5
159
+ }
160
+ },
161
+ "audio_analysis": {
162
+ "enable_pitch": true,
163
+ "enable_energy": true,
164
+ "enable_pause": true
165
+ },
166
+ "features": {
167
+ "enable_emotion": true,
168
+ "skip_if_too_long": 3600
169
+ }
170
+ }
171
+ ```
172
+
173
+ ---
174
+
175
+ ## 💻 Development
176
+
177
+ ### Setup Development Environment
178
+
179
+ ```bash
180
+ # Clone repository
181
+ git clone https://github.com/i-whimsy/audio-metrics-cli.git
182
+ cd audio-metrics-cli
183
+
184
+ # Install with dev dependencies
185
+ pip install -e ".[dev]"
186
+
187
+ # Run tests
188
+ pytest tests/
189
+
190
+ # Format code
191
+ black src/
192
+ ruff check src/
193
+ ```
194
+
195
+ ### Project Structure
196
+
197
+ ```
198
+ audio-metrics-cli/
199
+ ├── src/
200
+ │ └── audio_metrics/
201
+ │ ├── cli.py # CLI entry point
202
+ │ ├── config.py # Configuration
203
+ │ └── modules/ # Core modules
204
+ │ ├── audio_loader.py
205
+ │ ├── vad_analyzer.py
206
+ │ ├── speech_to_text.py
207
+ │ ├── prosody_analyzer.py
208
+ │ ├── emotion_analyzer.py
209
+ │ ├── filler_detector.py
210
+ │ ├── metrics_builder.py
211
+ │ └── json_exporter.py
212
+ ├── tests/
213
+ ├── examples/
214
+ ├── pyproject.toml
215
+ └── README.md
216
+ ```
217
+
218
+ ---
219
+
220
+ ## 🤝 Contributing
221
+
222
+ 1. Fork the repository
223
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
224
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
225
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
226
+ 5. Open a Pull Request
227
+
228
+ ---
229
+
230
+ ## 📝 License
231
+
232
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
233
+
234
+ ---
235
+
236
+ ## 🙏 Acknowledgments
237
+
238
+ - [OpenAI Whisper](https://github.com/openai/whisper) - Speech-to-text
239
+ - [Silero VAD](https://github.com/snakers4/silero-vad) - Voice activity detection
240
+ - [Librosa](https://librosa.org/) - Audio analysis
241
+ - [SpeechBrain](https://speechbrain.github.io/) - Emotion recognition
242
+
243
+ ---
244
+
245
+ ## 📞 Support
246
+
247
+ - **Issues**: [GitHub Issues](https://github.com/i-whimsy/audio-metrics-cli/issues)
248
+ - **Discussions**: [GitHub Discussions](https://github.com/i-whimsy/audio-metrics-cli/discussions)
249
+ - **Email**: clawbot@openclaw.ai
250
+
251
+ ---
252
+
253
+ **Built with ❤️ by OpenClaw Team**
@@ -0,0 +1,88 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "audio-metrics-cli"
7
+ version = "0.1.0"
8
+ description = "Cross-platform audio analysis toolkit for speech metrics extraction"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ {name = "OpenClaw", email = "clawbot@openclaw.ai"}
14
+ ]
15
+ keywords = [
16
+ "audio",
17
+ "speech",
18
+ "analysis",
19
+ "metrics",
20
+ "whisper",
21
+ "vad",
22
+ "prosody",
23
+ "cli"
24
+ ]
25
+ classifiers = [
26
+ "Development Status :: 4 - Beta",
27
+ "Intended Audience :: Developers",
28
+ "License :: OSI Approved :: MIT License",
29
+ "Operating System :: OS Independent",
30
+ "Programming Language :: Python :: 3",
31
+ "Programming Language :: Python :: 3.8",
32
+ "Programming Language :: Python :: 3.9",
33
+ "Programming Language :: Python :: 3.10",
34
+ "Programming Language :: Python :: 3.11",
35
+ "Programming Language :: Python :: 3.12",
36
+ "Topic :: Multimedia :: Sound/Audio :: Analysis",
37
+ "Topic :: Scientific/Engineering :: Information Analysis",
38
+ ]
39
+
40
+ dependencies = [
41
+ "numpy>=1.23.0",
42
+ "librosa>=0.10.0",
43
+ "soundfile>=0.12.0",
44
+ "openai-whisper>=20230314",
45
+ "click>=8.1.0",
46
+ "tqdm>=4.65.0",
47
+ "pydantic>=2.0.0",
48
+ ]
49
+
50
+ [project.optional-dependencies]
51
+ dev = [
52
+ "pytest>=7.0.0",
53
+ "pytest-cov>=4.0.0",
54
+ "black>=23.0.0",
55
+ "ruff>=0.1.0",
56
+ ]
57
+ emotion = [
58
+ "torch>=2.0.0",
59
+ "torchaudio>=2.0.0",
60
+ "speechbrain>=0.5.14",
61
+ ]
62
+
63
+ [project.urls]
64
+ Homepage = "https://github.com/i-whimsy/audio-metrics-cli"
65
+ Repository = "https://github.com/i-whimsy/audio-metrics-cli.git"
66
+ Documentation = "https://github.com/i-whimsy/audio-metrics-cli#readme"
67
+
68
+ [project.scripts]
69
+ audio-metrics = "audio_metrics.cli:main"
70
+
71
+ [tool.setuptools.packages.find]
72
+ where = ["src"]
73
+
74
+ [tool.setuptools.package-data]
75
+ audio_metrics = ["*.json", "*.txt"]
76
+
77
+ [tool.black]
78
+ line-length = 100
79
+ target-version = ['py38', 'py39', 'py310', 'py311', 'py312']
80
+
81
+ [tool.ruff]
82
+ line-length = 100
83
+ target-version = "py38"
84
+
85
+ [tool.pytest.ini_options]
86
+ testpaths = ["tests"]
87
+ python_files = "test_*.py"
88
+ python_functions = "test_*"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ """
2
+ Audio Metrics CLI - Cross-platform audio analysis toolkit
3
+
4
+ Extract objective speech metrics from audio files.
5
+ Output structured JSON for further analysis.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+ __author__ = "OpenClaw"
10
+ __email__ = "clawbot@openclaw.ai"
11
+
12
+ from .cli import main
13
+
14
+ __all__ = ["main"]