revoxx 1.0.0.dev22__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/MANIFEST.in +1 -0
- {revoxx-1.0.0.dev22/revoxx.egg-info → revoxx-1.0.2}/PKG-INFO +65 -10
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/README.md +60 -9
- revoxx-1.0.2/doc/import_raw_text.png +0 -0
- revoxx-1.0.2/doc/screenshot1.png +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/pyproject.toml +27 -4
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/__init__.py +9 -1
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/app.py +6 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/display_controller.py +30 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/navigation_controller.py +4 -25
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/process_manager.py +34 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/session_controller.py +1 -4
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/dataset/exporter.py +121 -0
- revoxx-1.0.2/revoxx/doc/USER_GUIDE.md +272 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/dataset_dialog.py +108 -5
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/open_session_dialog.py +54 -6
- revoxx-1.0.2/revoxx/ui/dialogs/session_settings_dialog.py +381 -0
- revoxx-1.0.2/revoxx/ui/dialogs/user_guide_dialog.py +249 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/utterance_list_base.py +50 -13
- revoxx-1.0.2/revoxx/ui/icon.py +40 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/menus/application_menu.py +13 -1
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/window_base.py +8 -3
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/window_factory.py +23 -30
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/device_manager.py +1 -1
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/process_cleanup.py +12 -4
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/settings_manager.py +3 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2/revoxx.egg-info}/PKG-INFO +65 -10
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/SOURCES.txt +4 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/requires.txt +4 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/scripts_module/vadiate.py +19 -7
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_navigation_controller.py +17 -10
- revoxx-1.0.0.dev22/revoxx/ui/dialogs/session_settings_dialog.py +0 -207
- revoxx-1.0.0.dev22/revoxx/ui/icon.py +0 -83
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/LICENSE +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/__main__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/audio_buffer.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/audio_queue_processor.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/buffer_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/level_calculator.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/player.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/clipping_detector.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/mel_spectrogram.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/processor_base.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/queue_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/recorder.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/shared_state.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/constants.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/audio_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/device_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/dialog_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/file_operations_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/dataset/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/keyboard_shortcuts.txt +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/microphone.png +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/templates/dataset_readme.txt +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/templates/index_format_with_intensity.txt +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/templates/index_format_without_intensity.txt +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/inspector.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/models.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/script_parser.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/dialog_utils.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/find_dialog.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/help_dialog.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/import_text_dialog.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/new_session_dialog.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/progress_dialog.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/utterance_order_dialog.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/emotion_indicator.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/font_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/frequency_axis.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/info_overlay.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/level_meter/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/level_meter/config.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/level_meter/led_level_meter.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/menus/audio_devices.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/recording_display_state.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/clipping_visualizer.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/edge_indicator.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/playback_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/zoom_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/display_base.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/display_utils.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/mel_processor_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/playback_handler.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/recording_display.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/recording_handler.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/widget.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/style_config.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/themes.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/widget_initializer.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/window_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/active_recordings.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/audio_utils.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/config.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/file_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/spectrogram_utils.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/state.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/text_importer.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/text_utils.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/dependency_links.txt +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/entry_points.txt +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/top_level.txt +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/scripts_module/__init__.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/scripts_module/export.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/setup.cfg +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_active_recordings.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_audio_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_audio_queue_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_config.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_dataset_exporter.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_device_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_dialog_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_display_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_file_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_file_operations_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_ipc_communication.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_new_session_dialog.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_process_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_session_controller.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_session_manager.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_session_models.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_stable_sorting.py +0 -0
- {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_utterance_list_dialog_sorting.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: revoxx
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Speech recording application for creating high-quality speech datasets
|
|
5
5
|
Author-email: Grammatek ehf <info@grammatek.com>
|
|
6
6
|
Maintainer-email: Grammatek ehf <info@grammatek.com>
|
|
@@ -31,15 +31,19 @@ Requires-Dist: matplotlib>=3.8.0
|
|
|
31
31
|
Requires-Dist: sounddevice>=0.5.1
|
|
32
32
|
Requires-Dist: soundfile>=0.12.0
|
|
33
33
|
Requires-Dist: tqdm>=4.65.0
|
|
34
|
+
Requires-Dist: markdown2>2.5.1
|
|
35
|
+
Requires-Dist: tkinterweb>4.4.1
|
|
34
36
|
Provides-Extra: vad
|
|
35
37
|
Requires-Dist: torch>=2.0.0; extra == "vad"
|
|
36
38
|
Requires-Dist: silero-vad>=5.0; extra == "vad"
|
|
39
|
+
Requires-Dist: torchaudio<2.8.0; extra == "vad"
|
|
37
40
|
Provides-Extra: dev
|
|
38
41
|
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
39
42
|
Requires-Dist: isort>=5.10.0; extra == "dev"
|
|
40
43
|
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
41
44
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
42
45
|
Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
|
|
46
|
+
Requires-Dist: versioningit>=2.0.0; extra == "dev"
|
|
43
47
|
Dynamic: license-file
|
|
44
48
|
|
|
45
49
|
# Revoxx - Record Voices
|
|
@@ -72,8 +76,10 @@ This repository provides **Revoxx**, a graphical recording application for recor
|
|
|
72
76
|
|
|
73
77
|
## System Requirements
|
|
74
78
|
- **Operating System:** Linux/OS-X, should work on Windows
|
|
79
|
+
- **Python:** 3.9 - 3.13 with Tkinter support
|
|
75
80
|
- **Recording:** Audio Interface, good voice microphone and headphones
|
|
76
81
|
- **Linux:** Requires PortAudio library (`sudo apt-get install portaudio19-dev` on Ubuntu/Debian)
|
|
82
|
+
- **GUI:** Tkinter (usually included with Python, see installation notes below)
|
|
77
83
|
|
|
78
84
|
## Description
|
|
79
85
|
|
|
@@ -102,23 +108,57 @@ the Icelandic emotional speech dataset, and created this tool to minimize hassle
|
|
|
102
108
|
- Recordings are organized into **Recording Sessions**
|
|
103
109
|
- Record emotional sessions for each speaker or record more traditional LJSpeech-style sessions
|
|
104
110
|
- Seamless transitions between different recording sessions with automatic progress tracking: continue where you left-off
|
|
105
|
-
- Offers advanced search and navigation capabilities for utterances, with flexible sorting by label, emotion, text
|
|
106
|
-
content, and recorded takes
|
|
111
|
+
- Offers advanced search and navigation capabilities for utterances, with flexible sorting and ordering by label, emotion, text
|
|
112
|
+
content, text length and recorded takes
|
|
107
113
|
- Consistent audio settings & metadata for all recordings
|
|
108
114
|
- **Real-time monitoring** including toggable recording levels, mel spectrograms, maximum frequency detection, and more
|
|
109
115
|
- Customizable **industry-standard presets for Peak/RMS levels**
|
|
110
116
|
- Dedicated **Monitoring mode** for precise input calibration
|
|
111
117
|
- **Multi-Screen Support**
|
|
112
118
|
- You can use multiple monitors to **separate recording view from speaker view**
|
|
113
|
-
- We support Apple's
|
|
119
|
+
- We support Apple's [Sidecar](https://support.apple.com/en-us/102597) feature for a **convenient dual screen setup with an external iPad**
|
|
114
120
|
- Each screen appearance can be individually configured
|
|
115
|
-
- All screen layouts, placement & configuration
|
|
121
|
+
- All screen layouts, placement & configuration are preserved at exit
|
|
116
122
|
- Export Dataset
|
|
117
123
|
- Facilitates **batch export of multiple sessions** into T3 (Talrómur3) dataset format
|
|
118
124
|
- Groups different recording sessions of the same speaker into a common dataset
|
|
125
|
+
- **Add voice timestamps, if VAD is enabled**
|
|
119
126
|
|
|
120
127
|
## Installation
|
|
121
128
|
|
|
129
|
+
<details>
|
|
130
|
+
<summary><b>Prerequisites</b></summary>
|
|
131
|
+
|
|
132
|
+
### Tkinter
|
|
133
|
+
|
|
134
|
+
Revoxx requires Tkinter for its graphical user interface. Tkinter is usually included with Python, but may need separate installation on some systems:
|
|
135
|
+
|
|
136
|
+
**macOS**: Tkinter should be included with Python.org installers and Homebrew Python, but integration issues can occur. If you encounter problems:
|
|
137
|
+
- For Homebrew Python: Try `brew install python-tk`
|
|
138
|
+
- For Python.org installers: Reinstall Python with the official installer
|
|
139
|
+
- Consider using a virtual environment with a fresh Python installation
|
|
140
|
+
|
|
141
|
+
**Linux**:
|
|
142
|
+
```bash
|
|
143
|
+
# Ubuntu/Debian
|
|
144
|
+
sudo apt-get install python3-tk
|
|
145
|
+
|
|
146
|
+
# Fedora
|
|
147
|
+
sudo dnf install python3-tkinter
|
|
148
|
+
|
|
149
|
+
# Arch Linux
|
|
150
|
+
sudo pacman -S tk
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Windows**: Tkinter is included with the standard Python installer.
|
|
154
|
+
|
|
155
|
+
**Verify Tkinter installation**:
|
|
156
|
+
```bash
|
|
157
|
+
python3 -c "import tkinter; print('Tkinter is installed')"
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
</details>
|
|
161
|
+
|
|
122
162
|
<details>
|
|
123
163
|
<summary><b>Basic Installation</b></summary>
|
|
124
164
|
|
|
@@ -273,11 +313,15 @@ revoxx --show-devices # List available audio devices
|
|
|
273
313
|
revoxx --session path/to/session # Open specific session
|
|
274
314
|
```
|
|
275
315
|
|
|
316
|
+
## Usage
|
|
317
|
+
|
|
318
|
+
For a guide on using Revoxx, please see the [User Guide](https://github.com/icelandic-lt/revoxx/blob/main/revoxx/doc/USER_GUIDE.md).
|
|
319
|
+
|
|
276
320
|
## Prepare recordings
|
|
277
321
|
|
|
278
322
|
Before you start recording, you need to prepare an utterance script with the utterances you want to record. This can be simplified by using the "Import Text to Script" Dialog:
|
|
279
323
|
|
|
280
|
-
<img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="
|
|
324
|
+
<img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="50%"/>
|
|
281
325
|
|
|
282
326
|
This dialog takes an input script of raw text and converts it into an utterance script. You can redo this for the same input text as many times you want, e.g. if you want to use separate emotional levels for different speakers.
|
|
283
327
|
|
|
@@ -297,17 +341,28 @@ For a script without emotion levels. This format was used for recording our non-
|
|
|
297
341
|
( <unique id> "<utterance>" )
|
|
298
342
|
```
|
|
299
343
|
|
|
300
|
-
You can see for both formats an example in the directory [t3_scripts](t3_scripts).
|
|
344
|
+
You can see for both formats an example in the directory [t3_scripts](https://github.com/icelandic-lt/revoxx/tree/main/t3_scripts).
|
|
301
345
|
|
|
302
346
|
The emotion levels can be from any monotonic numerical value range you want. If you want to follow Talrómur 3 conventions, you can use emotion intensity levels 1-5 and 6 emotions: neutral, happy, sad, angry, surprised, and helpful.
|
|
303
347
|
The emotion intensity levels are used to control the emotion intensity of the speech in combination with the specific emotion.
|
|
304
348
|
Neutral speech is treated as intensity level 0 at dataset export.
|
|
305
349
|
|
|
306
|
-
##
|
|
350
|
+
## Known Issues
|
|
307
351
|
|
|
308
|
-
|
|
352
|
+
### macOS: System Python 3.9 Icon Loading Issue
|
|
309
353
|
|
|
310
|
-
|
|
354
|
+
On macOS with the system-provided Python 3.9 (3.9.6), the application icon may fail to load with the error:
|
|
355
|
+
- "couldn't recognize data in image file"
|
|
356
|
+
- "Error: too many values to unpack (expected 2)"
|
|
357
|
+
|
|
358
|
+
**Affected versions:**
|
|
359
|
+
- macOS system Python 3.9.6 (default installation)
|
|
360
|
+
|
|
361
|
+
**Solution:**
|
|
362
|
+
- Use Python 3.9.23 or newer (available via Homebrew, uv or python.org)
|
|
363
|
+
- Alternatively, use Python 3.10 or newer
|
|
364
|
+
|
|
365
|
+
This issue is related to Tkinter's PNG handling in the macOS system Python 3.9.6 and does not affect newer Python versions.
|
|
311
366
|
|
|
312
367
|
### Linux: USB Audio Output Devices
|
|
313
368
|
|
|
@@ -28,8 +28,10 @@ This repository provides **Revoxx**, a graphical recording application for recor
|
|
|
28
28
|
|
|
29
29
|
## System Requirements
|
|
30
30
|
- **Operating System:** Linux/OS-X, should work on Windows
|
|
31
|
+
- **Python:** 3.9 - 3.13 with Tkinter support
|
|
31
32
|
- **Recording:** Audio Interface, good voice microphone and headphones
|
|
32
33
|
- **Linux:** Requires PortAudio library (`sudo apt-get install portaudio19-dev` on Ubuntu/Debian)
|
|
34
|
+
- **GUI:** Tkinter (usually included with Python, see installation notes below)
|
|
33
35
|
|
|
34
36
|
## Description
|
|
35
37
|
|
|
@@ -58,23 +60,57 @@ the Icelandic emotional speech dataset, and created this tool to minimize hassle
|
|
|
58
60
|
- Recordings are organized into **Recording Sessions**
|
|
59
61
|
- Record emotional sessions for each speaker or record more traditional LJSpeech-style sessions
|
|
60
62
|
- Seamless transitions between different recording sessions with automatic progress tracking: continue where you left-off
|
|
61
|
-
- Offers advanced search and navigation capabilities for utterances, with flexible sorting by label, emotion, text
|
|
62
|
-
content, and recorded takes
|
|
63
|
+
- Offers advanced search and navigation capabilities for utterances, with flexible sorting and ordering by label, emotion, text
|
|
64
|
+
content, text length and recorded takes
|
|
63
65
|
- Consistent audio settings & metadata for all recordings
|
|
64
66
|
- **Real-time monitoring** including toggable recording levels, mel spectrograms, maximum frequency detection, and more
|
|
65
67
|
- Customizable **industry-standard presets for Peak/RMS levels**
|
|
66
68
|
- Dedicated **Monitoring mode** for precise input calibration
|
|
67
69
|
- **Multi-Screen Support**
|
|
68
70
|
- You can use multiple monitors to **separate recording view from speaker view**
|
|
69
|
-
- We support Apple's
|
|
71
|
+
- We support Apple's [Sidecar](https://support.apple.com/en-us/102597) feature for a **convenient dual screen setup with an external iPad**
|
|
70
72
|
- Each screen appearance can be individually configured
|
|
71
|
-
- All screen layouts, placement & configuration
|
|
73
|
+
- All screen layouts, placement & configuration are preserved at exit
|
|
72
74
|
- Export Dataset
|
|
73
75
|
- Facilitates **batch export of multiple sessions** into T3 (Talrómur3) dataset format
|
|
74
76
|
- Groups different recording sessions of the same speaker into a common dataset
|
|
77
|
+
- **Add voice timestamps, if VAD is enabled**
|
|
75
78
|
|
|
76
79
|
## Installation
|
|
77
80
|
|
|
81
|
+
<details>
|
|
82
|
+
<summary><b>Prerequisites</b></summary>
|
|
83
|
+
|
|
84
|
+
### Tkinter
|
|
85
|
+
|
|
86
|
+
Revoxx requires Tkinter for its graphical user interface. Tkinter is usually included with Python, but may need separate installation on some systems:
|
|
87
|
+
|
|
88
|
+
**macOS**: Tkinter should be included with Python.org installers and Homebrew Python, but integration issues can occur. If you encounter problems:
|
|
89
|
+
- For Homebrew Python: Try `brew install python-tk`
|
|
90
|
+
- For Python.org installers: Reinstall Python with the official installer
|
|
91
|
+
- Consider using a virtual environment with a fresh Python installation
|
|
92
|
+
|
|
93
|
+
**Linux**:
|
|
94
|
+
```bash
|
|
95
|
+
# Ubuntu/Debian
|
|
96
|
+
sudo apt-get install python3-tk
|
|
97
|
+
|
|
98
|
+
# Fedora
|
|
99
|
+
sudo dnf install python3-tkinter
|
|
100
|
+
|
|
101
|
+
# Arch Linux
|
|
102
|
+
sudo pacman -S tk
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**Windows**: Tkinter is included with the standard Python installer.
|
|
106
|
+
|
|
107
|
+
**Verify Tkinter installation**:
|
|
108
|
+
```bash
|
|
109
|
+
python3 -c "import tkinter; print('Tkinter is installed')"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
</details>
|
|
113
|
+
|
|
78
114
|
<details>
|
|
79
115
|
<summary><b>Basic Installation</b></summary>
|
|
80
116
|
|
|
@@ -229,11 +265,15 @@ revoxx --show-devices # List available audio devices
|
|
|
229
265
|
revoxx --session path/to/session # Open specific session
|
|
230
266
|
```
|
|
231
267
|
|
|
268
|
+
## Usage
|
|
269
|
+
|
|
270
|
+
For a guide on using Revoxx, please see the [User Guide](https://github.com/icelandic-lt/revoxx/blob/main/revoxx/doc/USER_GUIDE.md).
|
|
271
|
+
|
|
232
272
|
## Prepare recordings
|
|
233
273
|
|
|
234
274
|
Before you start recording, you need to prepare an utterance script with the utterances you want to record. This can be simplified by using the "Import Text to Script" Dialog:
|
|
235
275
|
|
|
236
|
-
<img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="
|
|
276
|
+
<img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="50%"/>
|
|
237
277
|
|
|
238
278
|
This dialog takes an input script of raw text and converts it into an utterance script. You can redo this for the same input text as many times you want, e.g. if you want to use separate emotional levels for different speakers.
|
|
239
279
|
|
|
@@ -253,17 +293,28 @@ For a script without emotion levels. This format was used for recording our non-
|
|
|
253
293
|
( <unique id> "<utterance>" )
|
|
254
294
|
```
|
|
255
295
|
|
|
256
|
-
You can see for both formats an example in the directory [t3_scripts](t3_scripts).
|
|
296
|
+
You can see for both formats an example in the directory [t3_scripts](https://github.com/icelandic-lt/revoxx/tree/main/t3_scripts).
|
|
257
297
|
|
|
258
298
|
The emotion levels can be from any monotonic numerical value range you want. If you want to follow Talrómur 3 conventions, you can use emotion intensity levels 1-5 and 6 emotions: neutral, happy, sad, angry, surprised, and helpful.
|
|
259
299
|
The emotion intensity levels are used to control the emotion intensity of the speech in combination with the specific emotion.
|
|
260
300
|
Neutral speech is treated as intensity level 0 at dataset export.
|
|
261
301
|
|
|
262
|
-
##
|
|
302
|
+
## Known Issues
|
|
263
303
|
|
|
264
|
-
|
|
304
|
+
### macOS: System Python 3.9 Icon Loading Issue
|
|
265
305
|
|
|
266
|
-
|
|
306
|
+
On macOS with the system-provided Python 3.9 (3.9.6), the application icon may fail to load with the error:
|
|
307
|
+
- "couldn't recognize data in image file"
|
|
308
|
+
- "Error: too many values to unpack (expected 2)"
|
|
309
|
+
|
|
310
|
+
**Affected versions:**
|
|
311
|
+
- macOS system Python 3.9.6 (default installation)
|
|
312
|
+
|
|
313
|
+
**Solution:**
|
|
314
|
+
- Use Python 3.9.23 or newer (available via Homebrew, uv or python.org)
|
|
315
|
+
- Alternatively, use Python 3.10 or newer
|
|
316
|
+
|
|
317
|
+
This issue is related to Tkinter's PNG handling in the macOS system Python 3.9.6 and does not affect newer Python versions.
|
|
267
318
|
|
|
268
319
|
### Linux: USB Audio Output Devices
|
|
269
320
|
|
|
Binary file
|
|
Binary file
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["setuptools>=65.5.1", "wheel"]
|
|
2
|
+
requires = ["setuptools>=65.5.1", "wheel", "versioningit>=2.0.0"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "revoxx"
|
|
7
|
-
|
|
7
|
+
dynamic = ["version"]
|
|
8
8
|
description = "Speech recording application for creating high-quality speech datasets"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -37,12 +37,15 @@ dependencies = [
|
|
|
37
37
|
"sounddevice>=0.5.1", # Updated for better Linux USB audio support
|
|
38
38
|
"soundfile>=0.12.0",
|
|
39
39
|
"tqdm>=4.65.0",
|
|
40
|
+
"markdown2>2.5.1", # User guide dependencies
|
|
41
|
+
"tkinterweb>4.4.1"
|
|
40
42
|
]
|
|
41
43
|
|
|
42
44
|
[project.optional-dependencies]
|
|
43
45
|
vad = [
|
|
44
46
|
"torch>=2.0.0",
|
|
45
47
|
"silero-vad>=5.0",
|
|
48
|
+
"torchaudio<2.8.0"
|
|
46
49
|
]
|
|
47
50
|
# Note: For CPU-only PyTorch (smaller download), install with:
|
|
48
51
|
# pip install torch --index-url https://download.pytorch.org/whl/cpu
|
|
@@ -53,6 +56,7 @@ dev = [
|
|
|
53
56
|
"flake8>=6.0.0",
|
|
54
57
|
"pytest>=7.0.0",
|
|
55
58
|
"pytest-cov>=3.0.0",
|
|
59
|
+
"versioningit>=2.0.0", # For dynamic version detection during development
|
|
56
60
|
]
|
|
57
61
|
|
|
58
62
|
[project.urls]
|
|
@@ -68,15 +72,17 @@ revoxx-vadiate = "scripts_module.vadiate:main"
|
|
|
68
72
|
|
|
69
73
|
[tool.setuptools]
|
|
70
74
|
packages = ["revoxx", "revoxx.audio", "revoxx.audio.processors", "revoxx.controllers",
|
|
71
|
-
"revoxx.dataset", "revoxx.resources", "revoxx.resources.templates",
|
|
75
|
+
"revoxx.dataset", "revoxx.doc", "revoxx.resources", "revoxx.resources.templates",
|
|
72
76
|
"revoxx.session", "revoxx.ui", "revoxx.ui.dialogs", "revoxx.ui.level_meter",
|
|
73
77
|
"revoxx.ui.menus", "revoxx.ui.spectrogram", "revoxx.ui.spectrogram.controllers",
|
|
74
78
|
"revoxx.utils", "scripts_module"]
|
|
79
|
+
include-package-data = true
|
|
75
80
|
|
|
76
81
|
[tool.setuptools.package-data]
|
|
77
82
|
revoxx = [
|
|
78
83
|
"resources/*.png",
|
|
79
84
|
"resources/templates/*.txt",
|
|
85
|
+
"doc/*.md",
|
|
80
86
|
]
|
|
81
87
|
|
|
82
88
|
[tool.black]
|
|
@@ -108,4 +114,21 @@ include_trailing_comma = true
|
|
|
108
114
|
force_grid_wrap = 0
|
|
109
115
|
use_parentheses = true
|
|
110
116
|
ensure_newline_before_comments = true
|
|
111
|
-
line_length = 88
|
|
117
|
+
line_length = 88
|
|
118
|
+
|
|
119
|
+
[tool.versioningit]
|
|
120
|
+
default-version = "1.0.0"
|
|
121
|
+
|
|
122
|
+
[tool.versioningit.format]
|
|
123
|
+
|
|
124
|
+
# Format used when there have been commits since the most recent tag:
|
|
125
|
+
distance = "{base_version}.post{distance}+{vcs}{rev}"
|
|
126
|
+
# Example formatted version: 1.2.3.post42+ge174a1f
|
|
127
|
+
|
|
128
|
+
# Format used when there are uncommitted changes:
|
|
129
|
+
dirty = "{base_version}+d{build_date:%Y%m%d}"
|
|
130
|
+
# Example formatted version: 1.2.3+d20230922
|
|
131
|
+
|
|
132
|
+
# Format used when there are both commits and uncommitted changes:
|
|
133
|
+
distance-dirty = "{base_version}.post{distance}+{vcs}{rev}.d{build_date:%Y%m%d}"
|
|
134
|
+
# Example formatted version: 1.2.3.post42+ge174a1f.d20230922
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
"""Revoxx Recorder - A tool for recording emotional speech."""
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
try:
|
|
4
|
+
# Try to use versioningit for dynamic version detection
|
|
5
|
+
from versioningit import get_version
|
|
6
|
+
|
|
7
|
+
__version__ = get_version(root="../", config={})
|
|
8
|
+
except (ImportError, Exception):
|
|
9
|
+
# Fallback if versioningit is not installed or fails
|
|
10
|
+
__version__ = "1.0.0+dev"
|
|
11
|
+
|
|
4
12
|
__author__ = "Grammatek"
|
|
5
13
|
|
|
6
14
|
# Only import main entry point to avoid circular imports
|
|
@@ -688,6 +688,12 @@ class Revoxx:
|
|
|
688
688
|
# Tkinter might have changed it during setup
|
|
689
689
|
self.cleanup_manager.refresh_sigint_handler()
|
|
690
690
|
|
|
691
|
+
# Show user guide dialog if configured
|
|
692
|
+
if self.settings_manager.get_setting("show_user_guide_at_startup", True):
|
|
693
|
+
from .ui.dialogs.user_guide_dialog import UserGuideDialog
|
|
694
|
+
|
|
695
|
+
UserGuideDialog(self.window.window, self.settings_manager)
|
|
696
|
+
|
|
691
697
|
self.window.focus_window()
|
|
692
698
|
self.window.window.mainloop()
|
|
693
699
|
|
|
@@ -220,6 +220,36 @@ class DisplayController:
|
|
|
220
220
|
"""Reset the level meter display."""
|
|
221
221
|
self.reset_level_meters()
|
|
222
222
|
|
|
223
|
+
def format_take_status(self, label: str) -> str:
|
|
224
|
+
"""Format the take status display string for a given label.
|
|
225
|
+
|
|
226
|
+
This returns current take information in the status bar.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
label: The utterance label (e.g., "utterance_001")
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
- Empty string if label is None or empty
|
|
233
|
+
- Just the label if no active_recordings exist
|
|
234
|
+
- Just the label if no takes exist for this utterance
|
|
235
|
+
- "label - Take X/Y" if takes exist, where X is the position of the
|
|
236
|
+
current take in the list and Y is the total number of takes
|
|
237
|
+
"""
|
|
238
|
+
if not label:
|
|
239
|
+
return ""
|
|
240
|
+
|
|
241
|
+
if not self.app.active_recordings:
|
|
242
|
+
return label
|
|
243
|
+
|
|
244
|
+
current_take = self.app.state.recording.get_current_take(label)
|
|
245
|
+
existing_takes = self.app.active_recordings.get_existing_takes(label)
|
|
246
|
+
|
|
247
|
+
if existing_takes and current_take in existing_takes:
|
|
248
|
+
position = existing_takes.index(current_take) + 1
|
|
249
|
+
return f"{label} - Take {position}/{len(existing_takes)}"
|
|
250
|
+
|
|
251
|
+
return label
|
|
252
|
+
|
|
223
253
|
def set_status(self, status: str, msg_type: MsgType = MsgType.TEMPORARY) -> None:
|
|
224
254
|
"""Set the status bar text.
|
|
225
255
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
|
-
from ..constants import FileConstants
|
|
5
|
+
from ..constants import FileConstants, MsgType
|
|
6
6
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
8
|
from ..app import Revoxx
|
|
@@ -134,10 +134,6 @@ class NavigationController:
|
|
|
134
134
|
# Update info overlay if visible
|
|
135
135
|
if self.app.window.info_panel_visible:
|
|
136
136
|
self.app.display_controller.update_info_panel()
|
|
137
|
-
else:
|
|
138
|
-
# No more takes in that direction
|
|
139
|
-
direction_text = "forward" if direction > 0 else "backward"
|
|
140
|
-
self.app.display_controller.set_status(f"No more takes {direction_text}")
|
|
141
137
|
|
|
142
138
|
def find_utterance(self, index: int) -> None:
|
|
143
139
|
"""Navigate directly to a specific utterance by index.
|
|
@@ -252,15 +248,8 @@ class NavigationController:
|
|
|
252
248
|
if not current_label:
|
|
253
249
|
return
|
|
254
250
|
|
|
255
|
-
current_take = self.app.state.recording.get_current_take(current_label)
|
|
256
|
-
if not self.app.active_recordings:
|
|
257
|
-
existing_takes = []
|
|
258
|
-
else:
|
|
259
|
-
existing_takes = self.app.active_recordings.get_existing_takes(
|
|
260
|
-
current_label
|
|
261
|
-
)
|
|
262
|
-
|
|
263
251
|
# Update label with filename if we have a recording
|
|
252
|
+
current_take = self.app.state.recording.get_current_take(current_label)
|
|
264
253
|
if current_take > 0:
|
|
265
254
|
filename = f"take_{current_take:03d}{FileConstants.AUDIO_FILE_EXTENSION}"
|
|
266
255
|
self.app.window.update_label_with_filename(current_label, filename)
|
|
@@ -277,18 +266,8 @@ class NavigationController:
|
|
|
277
266
|
if second:
|
|
278
267
|
second.update_label_with_filename(current_label)
|
|
279
268
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
position = existing_takes.index(current_take) + 1
|
|
283
|
-
total = len(existing_takes)
|
|
284
|
-
self.app.display_controller.set_status(
|
|
285
|
-
f"{current_label} - Take {position}/{total}"
|
|
286
|
-
)
|
|
287
|
-
elif not existing_takes:
|
|
288
|
-
# Show label even without recordings
|
|
289
|
-
self.app.display_controller.set_status(f"{current_label}")
|
|
290
|
-
else:
|
|
291
|
-
self.app.display_controller.set_status(f"{current_label}")
|
|
269
|
+
status_text = self.app.display_controller.format_take_status(current_label)
|
|
270
|
+
self.app.display_controller.set_status(status_text, MsgType.DEFAULT)
|
|
292
271
|
|
|
293
272
|
def after_recording_saved(self, label: str) -> None:
|
|
294
273
|
"""Called after a recording has been saved to disk.
|
|
@@ -77,6 +77,9 @@ class ProcessManager:
|
|
|
77
77
|
self.set_audio_queue_active(False)
|
|
78
78
|
self.set_save_path(None)
|
|
79
79
|
|
|
80
|
+
# Check for VAD availability
|
|
81
|
+
self._check_vad_availability()
|
|
82
|
+
|
|
80
83
|
def start_processes(self) -> None:
|
|
81
84
|
"""Start background recording and playback processes."""
|
|
82
85
|
if self.app.debug:
|
|
@@ -322,3 +325,34 @@ class ProcessManager:
|
|
|
322
325
|
and self.playback_process is not None
|
|
323
326
|
and self.playback_process.is_alive()
|
|
324
327
|
)
|
|
328
|
+
|
|
329
|
+
def _check_vad_availability(self) -> None:
|
|
330
|
+
"""Check if VAD support is available and store in manager_dict."""
|
|
331
|
+
try:
|
|
332
|
+
# Try to import the VAD module from scripts_module
|
|
333
|
+
from scripts_module import vadiate # noqa: F401
|
|
334
|
+
from silero_vad import load_silero_vad # noqa: F401
|
|
335
|
+
|
|
336
|
+
vad_available = True
|
|
337
|
+
if self.app.debug:
|
|
338
|
+
print("[ProcessManager] VAD support is available")
|
|
339
|
+
except ImportError:
|
|
340
|
+
vad_available = False
|
|
341
|
+
if self.app.debug:
|
|
342
|
+
print("[ProcessManager] VAD support is not available")
|
|
343
|
+
|
|
344
|
+
if self.manager_dict is not None:
|
|
345
|
+
self.manager_dict["vad_available"] = vad_available
|
|
346
|
+
|
|
347
|
+
def is_vad_available(self) -> bool:
|
|
348
|
+
"""Check if VAD support is available.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
True if VAD is available
|
|
352
|
+
"""
|
|
353
|
+
if self.manager_dict:
|
|
354
|
+
try:
|
|
355
|
+
return self.manager_dict.get("vad_available", False)
|
|
356
|
+
except (AttributeError, KeyError):
|
|
357
|
+
return False
|
|
358
|
+
return False
|
|
@@ -147,10 +147,7 @@ class SessionController:
|
|
|
147
147
|
self.reload_script_and_recordings()
|
|
148
148
|
|
|
149
149
|
# Then apply saved sort settings from session (after data is loaded)
|
|
150
|
-
|
|
151
|
-
self.app.active_recordings.set_sort(
|
|
152
|
-
session.sort_column, session.sort_reverse
|
|
153
|
-
)
|
|
150
|
+
self.app.active_recordings.set_sort(session.sort_column, session.sort_reverse)
|
|
154
151
|
|
|
155
152
|
self.app.window.window.title(f"Revoxx - {session.name}")
|
|
156
153
|
self.app.menu.update_recent_sessions()
|