revoxx 1.0.0.dev22__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/MANIFEST.in +1 -0
  2. {revoxx-1.0.0.dev22/revoxx.egg-info → revoxx-1.0.2}/PKG-INFO +65 -10
  3. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/README.md +60 -9
  4. revoxx-1.0.2/doc/import_raw_text.png +0 -0
  5. revoxx-1.0.2/doc/screenshot1.png +0 -0
  6. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/pyproject.toml +27 -4
  7. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/__init__.py +9 -1
  8. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/app.py +6 -0
  9. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/display_controller.py +30 -0
  10. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/navigation_controller.py +4 -25
  11. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/process_manager.py +34 -0
  12. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/session_controller.py +1 -4
  13. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/dataset/exporter.py +121 -0
  14. revoxx-1.0.2/revoxx/doc/USER_GUIDE.md +272 -0
  15. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/dataset_dialog.py +108 -5
  16. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/open_session_dialog.py +54 -6
  17. revoxx-1.0.2/revoxx/ui/dialogs/session_settings_dialog.py +381 -0
  18. revoxx-1.0.2/revoxx/ui/dialogs/user_guide_dialog.py +249 -0
  19. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/utterance_list_base.py +50 -13
  20. revoxx-1.0.2/revoxx/ui/icon.py +40 -0
  21. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/menus/application_menu.py +13 -1
  22. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/window_base.py +8 -3
  23. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/window_factory.py +23 -30
  24. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/device_manager.py +1 -1
  25. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/process_cleanup.py +12 -4
  26. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/settings_manager.py +3 -0
  27. {revoxx-1.0.0.dev22 → revoxx-1.0.2/revoxx.egg-info}/PKG-INFO +65 -10
  28. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/SOURCES.txt +4 -0
  29. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/requires.txt +4 -0
  30. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/scripts_module/vadiate.py +19 -7
  31. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_navigation_controller.py +17 -10
  32. revoxx-1.0.0.dev22/revoxx/ui/dialogs/session_settings_dialog.py +0 -207
  33. revoxx-1.0.0.dev22/revoxx/ui/icon.py +0 -83
  34. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/LICENSE +0 -0
  35. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/__main__.py +0 -0
  36. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/__init__.py +0 -0
  37. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/audio_buffer.py +0 -0
  38. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/audio_queue_processor.py +0 -0
  39. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/buffer_manager.py +0 -0
  40. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/level_calculator.py +0 -0
  41. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/player.py +0 -0
  42. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/__init__.py +0 -0
  43. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/clipping_detector.py +0 -0
  44. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/mel_spectrogram.py +0 -0
  45. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/processors/processor_base.py +0 -0
  46. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/queue_manager.py +0 -0
  47. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/recorder.py +0 -0
  48. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/audio/shared_state.py +0 -0
  49. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/constants.py +0 -0
  50. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/__init__.py +0 -0
  51. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/audio_controller.py +0 -0
  52. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/device_controller.py +0 -0
  53. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/dialog_controller.py +0 -0
  54. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/controllers/file_operations_controller.py +0 -0
  55. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/dataset/__init__.py +0 -0
  56. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/keyboard_shortcuts.txt +0 -0
  57. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/microphone.png +0 -0
  58. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/templates/dataset_readme.txt +0 -0
  59. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/templates/index_format_with_intensity.txt +0 -0
  60. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/resources/templates/index_format_without_intensity.txt +0 -0
  61. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/__init__.py +0 -0
  62. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/inspector.py +0 -0
  63. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/manager.py +0 -0
  64. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/models.py +0 -0
  65. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/session/script_parser.py +0 -0
  66. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/__init__.py +0 -0
  67. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/__init__.py +0 -0
  68. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/dialog_utils.py +0 -0
  69. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/find_dialog.py +0 -0
  70. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/help_dialog.py +0 -0
  71. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/import_text_dialog.py +0 -0
  72. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/new_session_dialog.py +0 -0
  73. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/progress_dialog.py +0 -0
  74. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/dialogs/utterance_order_dialog.py +0 -0
  75. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/emotion_indicator.py +0 -0
  76. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/font_manager.py +0 -0
  77. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/frequency_axis.py +0 -0
  78. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/info_overlay.py +0 -0
  79. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/level_meter/__init__.py +0 -0
  80. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/level_meter/config.py +0 -0
  81. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/level_meter/led_level_meter.py +0 -0
  82. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/menus/audio_devices.py +0 -0
  83. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/recording_display_state.py +0 -0
  84. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/__init__.py +0 -0
  85. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/__init__.py +0 -0
  86. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/clipping_visualizer.py +0 -0
  87. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/edge_indicator.py +0 -0
  88. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/playback_controller.py +0 -0
  89. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/controllers/zoom_controller.py +0 -0
  90. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/display_base.py +0 -0
  91. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/display_utils.py +0 -0
  92. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/mel_processor_manager.py +0 -0
  93. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/playback_handler.py +0 -0
  94. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/recording_display.py +0 -0
  95. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/recording_handler.py +0 -0
  96. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/spectrogram/widget.py +0 -0
  97. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/style_config.py +0 -0
  98. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/themes.py +0 -0
  99. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/widget_initializer.py +0 -0
  100. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/ui/window_manager.py +0 -0
  101. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/__init__.py +0 -0
  102. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/active_recordings.py +0 -0
  103. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/audio_utils.py +0 -0
  104. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/config.py +0 -0
  105. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/file_manager.py +0 -0
  106. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/spectrogram_utils.py +0 -0
  107. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/state.py +0 -0
  108. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/text_importer.py +0 -0
  109. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx/utils/text_utils.py +0 -0
  110. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/dependency_links.txt +0 -0
  111. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/entry_points.txt +0 -0
  112. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/revoxx.egg-info/top_level.txt +0 -0
  113. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/scripts_module/__init__.py +0 -0
  114. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/scripts_module/export.py +0 -0
  115. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/setup.cfg +0 -0
  116. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_active_recordings.py +0 -0
  117. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_audio_controller.py +0 -0
  118. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_audio_queue_manager.py +0 -0
  119. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_config.py +0 -0
  120. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_dataset_exporter.py +0 -0
  121. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_device_controller.py +0 -0
  122. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_dialog_controller.py +0 -0
  123. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_display_controller.py +0 -0
  124. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_file_manager.py +0 -0
  125. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_file_operations_controller.py +0 -0
  126. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_ipc_communication.py +0 -0
  127. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_new_session_dialog.py +0 -0
  128. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_process_manager.py +0 -0
  129. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_session_controller.py +0 -0
  130. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_session_manager.py +0 -0
  131. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_session_models.py +0 -0
  132. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_stable_sorting.py +0 -0
  133. {revoxx-1.0.0.dev22 → revoxx-1.0.2}/tests/test_utterance_list_dialog_sorting.py +0 -0
@@ -1,6 +1,7 @@
1
1
  include README.md
2
2
  include LICENSE
3
3
  include requirements.txt
4
+ recursive-include doc *.md *.png
4
5
  recursive-include revoxx/resources *
5
6
  recursive-include revoxx/resources/templates *
6
7
  global-exclude __pycache__
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: revoxx
3
- Version: 1.0.0.dev22
3
+ Version: 1.0.2
4
4
  Summary: Speech recording application for creating high-quality speech datasets
5
5
  Author-email: Grammatek ehf <info@grammatek.com>
6
6
  Maintainer-email: Grammatek ehf <info@grammatek.com>
@@ -31,15 +31,19 @@ Requires-Dist: matplotlib>=3.8.0
31
31
  Requires-Dist: sounddevice>=0.5.1
32
32
  Requires-Dist: soundfile>=0.12.0
33
33
  Requires-Dist: tqdm>=4.65.0
34
+ Requires-Dist: markdown2>2.5.1
35
+ Requires-Dist: tkinterweb>4.4.1
34
36
  Provides-Extra: vad
35
37
  Requires-Dist: torch>=2.0.0; extra == "vad"
36
38
  Requires-Dist: silero-vad>=5.0; extra == "vad"
39
+ Requires-Dist: torchaudio<2.8.0; extra == "vad"
37
40
  Provides-Extra: dev
38
41
  Requires-Dist: black>=22.0.0; extra == "dev"
39
42
  Requires-Dist: isort>=5.10.0; extra == "dev"
40
43
  Requires-Dist: flake8>=6.0.0; extra == "dev"
41
44
  Requires-Dist: pytest>=7.0.0; extra == "dev"
42
45
  Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
46
+ Requires-Dist: versioningit>=2.0.0; extra == "dev"
43
47
  Dynamic: license-file
44
48
 
45
49
  # Revoxx - Record Voices
@@ -72,8 +76,10 @@ This repository provides **Revoxx**, a graphical recording application for recor
72
76
 
73
77
  ## System Requirements
74
78
  - **Operating System:** Linux/OS-X, should work on Windows
79
+ - **Python:** 3.9 - 3.13 with Tkinter support
75
80
  - **Recording:** Audio Interface, good voice microphone and headphones
76
81
  - **Linux:** Requires PortAudio library (`sudo apt-get install portaudio19-dev` on Ubuntu/Debian)
82
+ - **GUI:** Tkinter (usually included with Python, see installation notes below)
77
83
 
78
84
  ## Description
79
85
 
@@ -102,23 +108,57 @@ the Icelandic emotional speech dataset, and created this tool to minimize hassle
102
108
  - Recordings are organized into **Recording Sessions**
103
109
  - Record emotional sessions for each speaker or record more traditional LJSpeech-style sessions
104
110
  - Seamless transitions between different recording sessions with automatic progress tracking: continue where you left-off
105
- - Offers advanced search and navigation capabilities for utterances, with flexible sorting by label, emotion, text
106
- content, and recorded takes
111
+ - Offers advanced search and navigation capabilities for utterances, with flexible sorting and ordering by label, emotion, text
112
+ content, text length and recorded takes
107
113
  - Consistent audio settings & metadata for all recordings
108
114
  - **Real-time monitoring** including toggable recording levels, mel spectrograms, maximum frequency detection, and more
109
115
  - Customizable **industry-standard presets for Peak/RMS levels**
110
116
  - Dedicated **Monitoring mode** for precise input calibration
111
117
  - **Multi-Screen Support**
112
118
  - You can use multiple monitors to **separate recording view from speaker view**
113
- - We support Apple's "Continuity" feature for a **convenient dual screen setup with an external iPad**
119
+ - We support Apple's [Sidecar](https://support.apple.com/en-us/102597) feature for a **convenient dual screen setup with an external iPad**
114
120
  - Each screen appearance can be individually configured
115
- - All screen layouts, placement & configuration is preserved at exit
121
+ - All screen layouts, placement & configuration are preserved at exit
116
122
  - Export Dataset
117
123
  - Facilitates **batch export of multiple sessions** into T3 (Talrómur3) dataset format
118
124
  - Groups different recording sessions of the same speaker into a common dataset
125
+ - **Add voice timestamps, if VAD is enabled**
119
126
 
120
127
  ## Installation
121
128
 
129
+ <details>
130
+ <summary><b>Prerequisites</b></summary>
131
+
132
+ ### Tkinter
133
+
134
+ Revoxx requires Tkinter for its graphical user interface. Tkinter is usually included with Python, but may need separate installation on some systems:
135
+
136
+ **macOS**: Tkinter should be included with Python.org installers and Homebrew Python, but integration issues can occur. If you encounter problems:
137
+ - For Homebrew Python: Try `brew install python-tk`
138
+ - For Python.org installers: Reinstall Python with the official installer
139
+ - Consider using a virtual environment with a fresh Python installation
140
+
141
+ **Linux**:
142
+ ```bash
143
+ # Ubuntu/Debian
144
+ sudo apt-get install python3-tk
145
+
146
+ # Fedora
147
+ sudo dnf install python3-tkinter
148
+
149
+ # Arch Linux
150
+ sudo pacman -S tk
151
+ ```
152
+
153
+ **Windows**: Tkinter is included with the standard Python installer.
154
+
155
+ **Verify Tkinter installation**:
156
+ ```bash
157
+ python3 -c "import tkinter; print('Tkinter is installed')"
158
+ ```
159
+
160
+ </details>
161
+
122
162
  <details>
123
163
  <summary><b>Basic Installation</b></summary>
124
164
 
@@ -273,11 +313,15 @@ revoxx --show-devices # List available audio devices
273
313
  revoxx --session path/to/session # Open specific session
274
314
  ```
275
315
 
316
+ ## Usage
317
+
318
+ For a guide on using Revoxx, please see the [User Guide](https://github.com/icelandic-lt/revoxx/blob/main/revoxx/doc/USER_GUIDE.md).
319
+
276
320
  ## Prepare recordings
277
321
 
278
322
  Before you start recording, you need to prepare an utterance script with the utterances you want to record. This can be simplified by using the "Import Text to Script" Dialog:
279
323
 
280
- <img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="30%"/>
324
+ <img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="50%"/>
281
325
 
282
326
  This dialog takes an input script of raw text and converts it into an utterance script. You can redo this for the same input text as many times you want, e.g. if you want to use separate emotional levels for different speakers.
283
327
 
@@ -297,17 +341,28 @@ For a script without emotion levels. This format was used for recording our non-
297
341
  ( <unique id> "<utterance>" )
298
342
  ```
299
343
 
300
- You can see for both formats an example in the directory [t3_scripts](t3_scripts).
344
+ You can see for both formats an example in the directory [t3_scripts](https://github.com/icelandic-lt/revoxx/tree/main/t3_scripts).
301
345
 
302
346
  The emotion levels can be from any monotonic numerical value range you want. If you want to follow Talrómur 3 conventions, you can use emotion intensity levels 1-5 and 6 emotions: neutral, happy, sad, angry, surprised, and helpful.
303
347
  The emotion intensity levels are used to control the emotion intensity of the speech in combination with the specific emotion.
304
348
  Neutral speech is treated as intensity level 0 at dataset export.
305
349
 
306
- ## Record dataset
350
+ ## Known Issues
307
351
 
308
- to be defined
352
+ ### macOS: System Python 3.9 Icon Loading Issue
309
353
 
310
- ## Known Issues
354
+ On macOS with the system-provided Python 3.9 (3.9.6), the application icon may fail to load with the error:
355
+ - "couldn't recognize data in image file"
356
+ - "Error: too many values to unpack (expected 2)"
357
+
358
+ **Affected versions:**
359
+ - macOS system Python 3.9.6 (default installation)
360
+
361
+ **Solution:**
362
+ - Use Python 3.9.23 or newer (available via Homebrew, uv or python.org)
363
+ - Alternatively, use Python 3.10 or newer
364
+
365
+ This issue is related to Tkinter's PNG handling in the macOS system Python 3.9.6 and does not affect newer Python versions.
311
366
 
312
367
  ### Linux: USB Audio Output Devices
313
368
 
@@ -28,8 +28,10 @@ This repository provides **Revoxx**, a graphical recording application for recor
28
28
 
29
29
  ## System Requirements
30
30
  - **Operating System:** Linux/OS-X, should work on Windows
31
+ - **Python:** 3.9 - 3.13 with Tkinter support
31
32
  - **Recording:** Audio Interface, good voice microphone and headphones
32
33
  - **Linux:** Requires PortAudio library (`sudo apt-get install portaudio19-dev` on Ubuntu/Debian)
34
+ - **GUI:** Tkinter (usually included with Python, see installation notes below)
33
35
 
34
36
  ## Description
35
37
 
@@ -58,23 +60,57 @@ the Icelandic emotional speech dataset, and created this tool to minimize hassle
58
60
  - Recordings are organized into **Recording Sessions**
59
61
  - Record emotional sessions for each speaker or record more traditional LJSpeech-style sessions
60
62
  - Seamless transitions between different recording sessions with automatic progress tracking: continue where you left-off
61
- - Offers advanced search and navigation capabilities for utterances, with flexible sorting by label, emotion, text
62
- content, and recorded takes
63
+ - Offers advanced search and navigation capabilities for utterances, with flexible sorting and ordering by label, emotion, text
64
+ content, text length and recorded takes
63
65
  - Consistent audio settings & metadata for all recordings
64
66
  - **Real-time monitoring** including toggable recording levels, mel spectrograms, maximum frequency detection, and more
65
67
  - Customizable **industry-standard presets for Peak/RMS levels**
66
68
  - Dedicated **Monitoring mode** for precise input calibration
67
69
  - **Multi-Screen Support**
68
70
  - You can use multiple monitors to **separate recording view from speaker view**
69
- - We support Apple's "Continuity" feature for a **convenient dual screen setup with an external iPad**
71
+ - We support Apple's [Sidecar](https://support.apple.com/en-us/102597) feature for a **convenient dual screen setup with an external iPad**
70
72
  - Each screen appearance can be individually configured
71
- - All screen layouts, placement & configuration is preserved at exit
73
+ - All screen layouts, placement & configuration are preserved at exit
72
74
  - Export Dataset
73
75
  - Facilitates **batch export of multiple sessions** into T3 (Talrómur3) dataset format
74
76
  - Groups different recording sessions of the same speaker into a common dataset
77
+ - **Add voice timestamps, if VAD is enabled**
75
78
 
76
79
  ## Installation
77
80
 
81
+ <details>
82
+ <summary><b>Prerequisites</b></summary>
83
+
84
+ ### Tkinter
85
+
86
+ Revoxx requires Tkinter for its graphical user interface. Tkinter is usually included with Python, but may need separate installation on some systems:
87
+
88
+ **macOS**: Tkinter should be included with Python.org installers and Homebrew Python, but integration issues can occur. If you encounter problems:
89
+ - For Homebrew Python: Try `brew install python-tk`
90
+ - For Python.org installers: Reinstall Python with the official installer
91
+ - Consider using a virtual environment with a fresh Python installation
92
+
93
+ **Linux**:
94
+ ```bash
95
+ # Ubuntu/Debian
96
+ sudo apt-get install python3-tk
97
+
98
+ # Fedora
99
+ sudo dnf install python3-tkinter
100
+
101
+ # Arch Linux
102
+ sudo pacman -S tk
103
+ ```
104
+
105
+ **Windows**: Tkinter is included with the standard Python installer.
106
+
107
+ **Verify Tkinter installation**:
108
+ ```bash
109
+ python3 -c "import tkinter; print('Tkinter is installed')"
110
+ ```
111
+
112
+ </details>
113
+
78
114
  <details>
79
115
  <summary><b>Basic Installation</b></summary>
80
116
 
@@ -229,11 +265,15 @@ revoxx --show-devices # List available audio devices
229
265
  revoxx --session path/to/session # Open specific session
230
266
  ```
231
267
 
268
+ ## Usage
269
+
270
+ For a guide on using Revoxx, please see the [User Guide](https://github.com/icelandic-lt/revoxx/blob/main/revoxx/doc/USER_GUIDE.md).
271
+
232
272
  ## Prepare recordings
233
273
 
234
274
  Before you start recording, you need to prepare an utterance script with the utterances you want to record. This can be simplified by using the "Import Text to Script" Dialog:
235
275
 
236
- <img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="30%"/>
276
+ <img src="https://raw.githubusercontent.com/icelandic-lt/revoxx/main/doc/import_raw_text.png" alt="Raw text import dialog" width="50%"/>
237
277
 
238
278
  This dialog takes an input script of raw text and converts it into an utterance script. You can redo this for the same input text as many times you want, e.g. if you want to use separate emotional levels for different speakers.
239
279
 
@@ -253,17 +293,28 @@ For a script without emotion levels. This format was used for recording our non-
253
293
  ( <unique id> "<utterance>" )
254
294
  ```
255
295
 
256
- You can see for both formats an example in the directory [t3_scripts](t3_scripts).
296
+ You can see for both formats an example in the directory [t3_scripts](https://github.com/icelandic-lt/revoxx/tree/main/t3_scripts).
257
297
 
258
298
  The emotion levels can be from any monotonic numerical value range you want. If you want to follow Talrómur 3 conventions, you can use emotion intensity levels 1-5 and 6 emotions: neutral, happy, sad, angry, surprised, and helpful.
259
299
  The emotion intensity levels are used to control the emotion intensity of the speech in combination with the specific emotion.
260
300
  Neutral speech is treated as intensity level 0 at dataset export.
261
301
 
262
- ## Record dataset
302
+ ## Known Issues
263
303
 
264
- to be defined
304
+ ### macOS: System Python 3.9 Icon Loading Issue
265
305
 
266
- ## Known Issues
306
+ On macOS with the system-provided Python 3.9 (3.9.6), the application icon may fail to load with the error:
307
+ - "couldn't recognize data in image file"
308
+ - "Error: too many values to unpack (expected 2)"
309
+
310
+ **Affected versions:**
311
+ - macOS system Python 3.9.6 (default installation)
312
+
313
+ **Solution:**
314
+ - Use Python 3.9.23 or newer (available via Homebrew, uv or python.org)
315
+ - Alternatively, use Python 3.10 or newer
316
+
317
+ This issue is related to Tkinter's PNG handling in the macOS system Python 3.9.6 and does not affect newer Python versions.
267
318
 
268
319
  ### Linux: USB Audio Output Devices
269
320
 
Binary file
Binary file
@@ -1,10 +1,10 @@
1
1
  [build-system]
2
- requires = ["setuptools>=65.5.1", "wheel"]
2
+ requires = ["setuptools>=65.5.1", "wheel", "versioningit>=2.0.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "revoxx"
7
- version = "1.0.0.dev22"
7
+ dynamic = ["version"]
8
8
  description = "Speech recording application for creating high-quality speech datasets"
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -37,12 +37,15 @@ dependencies = [
37
37
  "sounddevice>=0.5.1", # Updated for better Linux USB audio support
38
38
  "soundfile>=0.12.0",
39
39
  "tqdm>=4.65.0",
40
+ "markdown2>2.5.1", # User guide dependencies
41
+ "tkinterweb>4.4.1"
40
42
  ]
41
43
 
42
44
  [project.optional-dependencies]
43
45
  vad = [
44
46
  "torch>=2.0.0",
45
47
  "silero-vad>=5.0",
48
+ "torchaudio<2.8.0"
46
49
  ]
47
50
  # Note: For CPU-only PyTorch (smaller download), install with:
48
51
  # pip install torch --index-url https://download.pytorch.org/whl/cpu
@@ -53,6 +56,7 @@ dev = [
53
56
  "flake8>=6.0.0",
54
57
  "pytest>=7.0.0",
55
58
  "pytest-cov>=3.0.0",
59
+ "versioningit>=2.0.0", # For dynamic version detection during development
56
60
  ]
57
61
 
58
62
  [project.urls]
@@ -68,15 +72,17 @@ revoxx-vadiate = "scripts_module.vadiate:main"
68
72
 
69
73
  [tool.setuptools]
70
74
  packages = ["revoxx", "revoxx.audio", "revoxx.audio.processors", "revoxx.controllers",
71
- "revoxx.dataset", "revoxx.resources", "revoxx.resources.templates",
75
+ "revoxx.dataset", "revoxx.doc", "revoxx.resources", "revoxx.resources.templates",
72
76
  "revoxx.session", "revoxx.ui", "revoxx.ui.dialogs", "revoxx.ui.level_meter",
73
77
  "revoxx.ui.menus", "revoxx.ui.spectrogram", "revoxx.ui.spectrogram.controllers",
74
78
  "revoxx.utils", "scripts_module"]
79
+ include-package-data = true
75
80
 
76
81
  [tool.setuptools.package-data]
77
82
  revoxx = [
78
83
  "resources/*.png",
79
84
  "resources/templates/*.txt",
85
+ "doc/*.md",
80
86
  ]
81
87
 
82
88
  [tool.black]
@@ -108,4 +114,21 @@ include_trailing_comma = true
108
114
  force_grid_wrap = 0
109
115
  use_parentheses = true
110
116
  ensure_newline_before_comments = true
111
- line_length = 88
117
+ line_length = 88
118
+
119
+ [tool.versioningit]
120
+ default-version = "1.0.0"
121
+
122
+ [tool.versioningit.format]
123
+
124
+ # Format used when there have been commits since the most recent tag:
125
+ distance = "{base_version}.post{distance}+{vcs}{rev}"
126
+ # Example formatted version: 1.2.3.post42+ge174a1f
127
+
128
+ # Format used when there are uncommitted changes:
129
+ dirty = "{base_version}+d{build_date:%Y%m%d}"
130
+ # Example formatted version: 1.2.3+d20230922
131
+
132
+ # Format used when there are both commits and uncommitted changes:
133
+ distance-dirty = "{base_version}.post{distance}+{vcs}{rev}.d{build_date:%Y%m%d}"
134
+ # Example formatted version: 1.2.3.post42+ge174a1f.d20230922
@@ -1,6 +1,14 @@
1
1
  """Revoxx Recorder - A tool for recording emotional speech."""
2
2
 
3
- __version__ = "1.0.0"
3
+ try:
4
+ # Try to use versioningit for dynamic version detection
5
+ from versioningit import get_version
6
+
7
+ __version__ = get_version(root="../", config={})
8
+ except (ImportError, Exception):
9
+ # Fallback if versioningit is not installed or fails
10
+ __version__ = "1.0.0+dev"
11
+
4
12
  __author__ = "Grammatek"
5
13
 
6
14
  # Only import main entry point to avoid circular imports
@@ -688,6 +688,12 @@ class Revoxx:
688
688
  # Tkinter might have changed it during setup
689
689
  self.cleanup_manager.refresh_sigint_handler()
690
690
 
691
+ # Show user guide dialog if configured
692
+ if self.settings_manager.get_setting("show_user_guide_at_startup", True):
693
+ from .ui.dialogs.user_guide_dialog import UserGuideDialog
694
+
695
+ UserGuideDialog(self.window.window, self.settings_manager)
696
+
691
697
  self.window.focus_window()
692
698
  self.window.window.mainloop()
693
699
 
@@ -220,6 +220,36 @@ class DisplayController:
220
220
  """Reset the level meter display."""
221
221
  self.reset_level_meters()
222
222
 
223
+ def format_take_status(self, label: str) -> str:
224
+ """Format the take status display string for a given label.
225
+
226
+ This returns current take information in the status bar.
227
+
228
+ Args:
229
+ label: The utterance label (e.g., "utterance_001")
230
+
231
+ Returns:
232
+ - Empty string if label is None or empty
233
+ - Just the label if no active_recordings exist
234
+ - Just the label if no takes exist for this utterance
235
+ - "label - Take X/Y" if takes exist, where X is the position of the
236
+ current take in the list and Y is the total number of takes
237
+ """
238
+ if not label:
239
+ return ""
240
+
241
+ if not self.app.active_recordings:
242
+ return label
243
+
244
+ current_take = self.app.state.recording.get_current_take(label)
245
+ existing_takes = self.app.active_recordings.get_existing_takes(label)
246
+
247
+ if existing_takes and current_take in existing_takes:
248
+ position = existing_takes.index(current_take) + 1
249
+ return f"{label} - Take {position}/{len(existing_takes)}"
250
+
251
+ return label
252
+
223
253
  def set_status(self, status: str, msg_type: MsgType = MsgType.TEMPORARY) -> None:
224
254
  """Set the status bar text.
225
255
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  from typing import TYPE_CHECKING
4
4
 
5
- from ..constants import FileConstants
5
+ from ..constants import FileConstants, MsgType
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from ..app import Revoxx
@@ -134,10 +134,6 @@ class NavigationController:
134
134
  # Update info overlay if visible
135
135
  if self.app.window.info_panel_visible:
136
136
  self.app.display_controller.update_info_panel()
137
- else:
138
- # No more takes in that direction
139
- direction_text = "forward" if direction > 0 else "backward"
140
- self.app.display_controller.set_status(f"No more takes {direction_text}")
141
137
 
142
138
  def find_utterance(self, index: int) -> None:
143
139
  """Navigate directly to a specific utterance by index.
@@ -252,15 +248,8 @@ class NavigationController:
252
248
  if not current_label:
253
249
  return
254
250
 
255
- current_take = self.app.state.recording.get_current_take(current_label)
256
- if not self.app.active_recordings:
257
- existing_takes = []
258
- else:
259
- existing_takes = self.app.active_recordings.get_existing_takes(
260
- current_label
261
- )
262
-
263
251
  # Update label with filename if we have a recording
252
+ current_take = self.app.state.recording.get_current_take(current_label)
264
253
  if current_take > 0:
265
254
  filename = f"take_{current_take:03d}{FileConstants.AUDIO_FILE_EXTENSION}"
266
255
  self.app.window.update_label_with_filename(current_label, filename)
@@ -277,18 +266,8 @@ class NavigationController:
277
266
  if second:
278
267
  second.update_label_with_filename(current_label)
279
268
 
280
- if existing_takes and current_take in existing_takes:
281
- # Find position in the list
282
- position = existing_takes.index(current_take) + 1
283
- total = len(existing_takes)
284
- self.app.display_controller.set_status(
285
- f"{current_label} - Take {position}/{total}"
286
- )
287
- elif not existing_takes:
288
- # Show label even without recordings
289
- self.app.display_controller.set_status(f"{current_label}")
290
- else:
291
- self.app.display_controller.set_status(f"{current_label}")
269
+ status_text = self.app.display_controller.format_take_status(current_label)
270
+ self.app.display_controller.set_status(status_text, MsgType.DEFAULT)
292
271
 
293
272
  def after_recording_saved(self, label: str) -> None:
294
273
  """Called after a recording has been saved to disk.
@@ -77,6 +77,9 @@ class ProcessManager:
77
77
  self.set_audio_queue_active(False)
78
78
  self.set_save_path(None)
79
79
 
80
+ # Check for VAD availability
81
+ self._check_vad_availability()
82
+
80
83
  def start_processes(self) -> None:
81
84
  """Start background recording and playback processes."""
82
85
  if self.app.debug:
@@ -322,3 +325,34 @@ class ProcessManager:
322
325
  and self.playback_process is not None
323
326
  and self.playback_process.is_alive()
324
327
  )
328
+
329
+ def _check_vad_availability(self) -> None:
330
+ """Check if VAD support is available and store in manager_dict."""
331
+ try:
332
+ # Try to import the VAD module from scripts_module
333
+ from scripts_module import vadiate # noqa: F401
334
+ from silero_vad import load_silero_vad # noqa: F401
335
+
336
+ vad_available = True
337
+ if self.app.debug:
338
+ print("[ProcessManager] VAD support is available")
339
+ except ImportError:
340
+ vad_available = False
341
+ if self.app.debug:
342
+ print("[ProcessManager] VAD support is not available")
343
+
344
+ if self.manager_dict is not None:
345
+ self.manager_dict["vad_available"] = vad_available
346
+
347
+ def is_vad_available(self) -> bool:
348
+ """Check if VAD support is available.
349
+
350
+ Returns:
351
+ True if VAD is available
352
+ """
353
+ if self.manager_dict:
354
+ try:
355
+ return self.manager_dict.get("vad_available", False)
356
+ except (AttributeError, KeyError):
357
+ return False
358
+ return False
@@ -147,10 +147,7 @@ class SessionController:
147
147
  self.reload_script_and_recordings()
148
148
 
149
149
  # Then apply saved sort settings from session (after data is loaded)
150
- if session:
151
- self.app.active_recordings.set_sort(
152
- session.sort_column, session.sort_reverse
153
- )
150
+ self.app.active_recordings.set_sort(session.sort_column, session.sort_reverse)
154
151
 
155
152
  self.app.window.window.title(f"Revoxx - {session.name}")
156
153
  self.app.menu.update_recent_sessions()