pelican-nlp 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pelican_nlp/Nils_backup/__init__.py +0 -0
  2. pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
  3. pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
  4. pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
  5. pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
  6. pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
  7. pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
  8. pelican_nlp/Nils_backup/fluency/config.py +231 -0
  9. pelican_nlp/Nils_backup/fluency/main.py +182 -0
  10. pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
  11. pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
  12. pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
  13. pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
  14. pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
  15. pelican_nlp/Nils_backup/fluency/utils.py +41 -0
  16. pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
  17. pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
  18. pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
  19. pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
  20. pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
  21. pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
  22. pelican_nlp/Nils_backup/transcription/test.json +1 -0
  23. pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
  24. pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
  25. pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
  26. pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
  27. pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
  28. pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
  29. pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
  30. pelican_nlp/__init__.py +1 -1
  31. pelican_nlp/_version.py +1 -0
  32. pelican_nlp/configuration_files/config_audio.yml +150 -0
  33. pelican_nlp/configuration_files/config_discourse.yml +104 -0
  34. pelican_nlp/configuration_files/config_fluency.yml +108 -0
  35. pelican_nlp/configuration_files/config_general.yml +131 -0
  36. pelican_nlp/configuration_files/config_morteza.yml +103 -0
  37. pelican_nlp/praat/__init__.py +29 -0
  38. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/METADATA +15 -14
  39. pelican_nlp-0.1.3.dist-info/RECORD +75 -0
  40. pelican_nlp-0.1.1.dist-info/RECORD +0 -39
  41. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/WHEEL +0 -0
  42. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/licenses/LICENSE +0 -0
  43. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,985 @@
1
+ import sys
2
+ import os
3
+ import json
4
+ import tempfile
5
+ import numpy as np
6
+ from PyQt5.QtWidgets import (
7
+ QApplication, QMainWindow, QWidget, QTableWidget, QTableWidgetItem,
8
+ QVBoxLayout, QHBoxLayout, QPushButton, QComboBox, QFileDialog, QMessageBox,
9
+ QInputDialog, QMenu, QAction, QScrollBar, Q
10
+ )
11
+ from PyQt5.QtCore import Qt, QTimer, QObject, pyqtSignal, QThread
12
+ from PyQt5.QtGui import QColor
13
+ import pyqtgraph as pg
14
+ from pydub import AudioSegment
15
+ from pydub.playback import _play_with_simpleaudio as play_audio
16
+ from PyQt5.QtWidgets import QUndoStack, QUndoCommand
17
+
18
+
19
+ class AddRowCommand(QUndoCommand):
20
+ def __init__(self, tool, row_position, row_data, description="Add Row"):
21
+ super().__init__(description)
22
+ self.tool = tool
23
+ self.row_position = row_position
24
+ self.row_data = row_data
25
+
26
+ def redo(self):
27
+ self.tool.insert_row(self.row_position, self.row_data)
28
+
29
+ def undo(self):
30
+ self.tool.remove_row(self.row_position)
31
+
32
+
33
+ class DeleteRowsCommand(QUndoCommand):
34
+ def __init__(self, tool, rows_data, row_positions, description="Delete Rows"):
35
+ super().__init__(description)
36
+ self.tool = tool
37
+ self.rows_data = rows_data
38
+ self.row_positions = row_positions
39
+
40
+ def redo(self):
41
+ for row in sorted(self.row_positions, reverse=True):
42
+ self.tool.remove_row(row)
43
+
44
+ def undo(self):
45
+ for row, data in sorted(zip(self.row_positions, self.rows_data)):
46
+ self.tool.insert_row(row, data)
47
+
48
+
49
+ class EditCellCommand(QUndoCommand):
50
+ def __init__(self, tool, row, column, old_value, new_value, description="Edit Cell"):
51
+ super().__init__(description)
52
+ self.tool = tool
53
+ self.row = row
54
+ self.column = column
55
+ self.old_value = old_value
56
+ self.new_value = new_value
57
+
58
+ def redo(self):
59
+ self.tool.set_cell(self.row, self.column, self.new_value)
60
+
61
+ def undo(self):
62
+ self.tool.set_cell(self.row, self.column, self.old_value)
63
+
64
+
65
+ class BulkEditSpeakerCommand(QUndoCommand):
66
+ def __init__(self, tool, row_positions, old_speakers, new_speaker, description="Bulk Edit Speaker"):
67
+ super().__init__(description)
68
+ self.tool = tool
69
+ self.row_positions = row_positions
70
+ self.old_speakers = old_speakers
71
+ self.new_speaker = new_speaker
72
+
73
+ def redo(self):
74
+ for row in self.row_positions:
75
+ self.tool.set_speaker(row, self.new_speaker)
76
+
77
+ def undo(self):
78
+ for row, speaker in zip(self.row_positions, self.old_speakers):
79
+ self.tool.set_speaker(row, speaker)
80
+
81
+
82
+ class AudioLoader(QObject):
83
+ finished = pyqtSignal(AudioSegment, np.ndarray, float)
84
+ error = pyqtSignal(str)
85
+
86
+ def __init__(self, file_path, downsample_factor=100):
87
+ super().__init__()
88
+ self.file_path = file_path
89
+ self.downsample_factor = downsample_factor
90
+
91
+ def run(self):
92
+ try:
93
+ audio = AudioSegment.from_file(self.file_path).set_channels(1)
94
+ samples = np.array(audio.get_array_of_samples()).astype(np.float32)
95
+ duration = audio.duration_seconds
96
+ samples /= np.max(np.abs(samples)) if np.max(np.abs(samples)) != 0 else 1.0
97
+ if len(samples) > 1_000_000:
98
+ samples = self.downsample_waveform(samples, self.downsample_factor)
99
+ self.finished.emit(audio, samples, duration)
100
+ except Exception as e:
101
+ self.error.emit(str(e))
102
+
103
+ def downsample_waveform(self, samples, factor):
104
+ num_blocks = len(samples) // factor
105
+ return np.array([samples[i * factor:(i + 1) * factor].mean() for i in range(num_blocks)])
106
+
107
+
108
+ class DraggableLine(pg.InfiniteLine):
109
+ positionChangedFinished = pyqtSignal(object)
110
+
111
+ def __init__(self, *args, **kwargs):
112
+ super().__init__(*args, **kwargs)
113
+ self.setMovable(True)
114
+ self.setCursor(Qt.SizeHorCursor)
115
+ self._old_value = self.value()
116
+
117
+ def mouseReleaseEvent(self, event):
118
+ super().mouseReleaseEvent(event)
119
+ new_value = self.value()
120
+ if self._old_value != new_value:
121
+ self.positionChangedFinished.emit(self)
122
+ self._old_value = new_value
123
+
124
+ def mouseMoveEvent(self, event):
125
+ super().mouseMoveEvent(event)
126
+ self.positionChangedFinished.emit(self)
127
+
128
+
129
+ class AnnotationTool(QMainWindow):
130
+ def __init__(self):
131
+ super().__init__()
132
+ self.setWindowTitle("PELICAn Transcription Tool")
133
+ self.setGeometry(100, 100, 1600, 900)
134
+
135
+ # Initialize variables
136
+ self.audio_segment = None
137
+ self.waveform_data = None
138
+ self.duration = 0.0
139
+ self.play_obj = None
140
+ self.current_time = 0.0
141
+ self.is_playing = False
142
+ self.speakers = []
143
+ self.undo_stack = QUndoStack(self)
144
+ self.old_values = {}
145
+ self.word_lines = []
146
+ self.temp_file_path = tempfile.NamedTemporaryFile(delete=False, suffix='.json').name
147
+
148
+ self.row_to_lines = {}
149
+
150
+ # Setup UI components
151
+ self.setup_ui()
152
+ self.setup_signals()
153
+
154
+ # Load autosave if exists
155
+ self.load_autosave()
156
+
157
+ def setup_ui(self):
158
+ self.layout = QVBoxLayout()
159
+ self.setup_undo_redo_buttons()
160
+ self.setup_waveform_and_transcript()
161
+ self.setup_audio_controls()
162
+ self.setup_buttons()
163
+ self.populate_table([])
164
+
165
+ container = QWidget()
166
+ container.setLayout(self.layout)
167
+ self.setCentralWidget(container)
168
+
169
+ def setup_signals(self):
170
+ self.autosave_timer = QTimer()
171
+ self.autosave_timer.timeout.connect(self.autosave)
172
+ self.autosave_timer.start(5000)
173
+
174
+ self.auto_scroll_timer = QTimer()
175
+ self.auto_scroll_timer.timeout.connect(self.highlight_current_row)
176
+ self.auto_scroll_timer.start(100)
177
+
178
+ self.table.cellDoubleClicked.connect(self.on_cell_double_clicked)
179
+ self.table.itemChanged.connect(self.on_item_changed)
180
+ self.table.cellClicked.connect(self.on_table_clicked)
181
+ self.table.currentCellChanged.connect(self.on_current_cell_changed)
182
+ self.table.selectionModel().selectionChanged.connect(self.on_selection_changed)
183
+ self.waveform_plot.scene().sigMouseClicked.connect(self.on_waveform_clicked)
184
+
185
+ def setup_undo_redo_buttons(self):
186
+ undo_redo_layout = QHBoxLayout()
187
+ undo_button = QPushButton("Undo")
188
+ undo_button.clicked.connect(self.undo_stack.undo)
189
+ redo_button = QPushButton("Redo")
190
+ redo_button.clicked.connect(self.undo_stack.redo)
191
+ undo_redo_layout.addWidget(undo_button)
192
+ undo_redo_layout.addWidget(redo_button)
193
+ self.layout.addLayout(undo_redo_layout)
194
+
195
+ def setup_waveform_and_transcript(self):
196
+ waveform_transcript_layout = QHBoxLayout()
197
+ waveform_layout = QVBoxLayout()
198
+ self.waveform_plot = pg.PlotWidget()
199
+ self.waveform_plot.setYRange(-1, 1)
200
+ self.waveform_plot.showGrid(x=True, y=False)
201
+ self.waveform_plot.setLabel('bottom', 'Time', 's')
202
+ waveform_layout.addWidget(self.waveform_plot)
203
+
204
+ self.waveform_scrollbar = QScrollBar(Qt.Horizontal)
205
+ self.waveform_scrollbar.setMinimum(0)
206
+ self.waveform_scrollbar.valueChanged.connect(self.on_scrollbar_moved)
207
+ waveform_layout.addWidget(self.waveform_scrollbar)
208
+ waveform_transcript_layout.addLayout(waveform_layout)
209
+
210
+ self.table = QTableWidget()
211
+ self.table.setColumnCount(4)
212
+ self.table.setHorizontalHeaderLabels(["Start", "End", "Word", "Speaker"])
213
+ self.table.setSelectionBehavior(QAbstractItemView.SelectRows)
214
+ self.table.setSelectionMode(QAbstractItemView.ExtendedSelection)
215
+ self.table.setEditTriggers(QAbstractItemView.DoubleClicked | QAbstractItemView.SelectedClicked)
216
+ self.table.setStyleSheet("selection-background-color: lightblue;")
217
+ self.table.verticalHeader().setVisible(False)
218
+ self.table.setAlternatingRowColors(True)
219
+ self.table.setContextMenuPolicy(Qt.CustomContextMenu)
220
+ self.table.customContextMenuRequested.connect(self.show_context_menu)
221
+ self.table.setSortingEnabled(False)
222
+ waveform_transcript_layout.addWidget(self.table)
223
+ self.layout.addLayout(waveform_transcript_layout)
224
+ self.playtime_line = self.waveform_plot.addLine(x=0, pen='r')
225
+
226
+ def setup_audio_controls(self):
227
+ audio_control_layout = QHBoxLayout()
228
+ self.play_button = QPushButton("Play")
229
+ self.play_button.clicked.connect(self.toggle_playback)
230
+ self.stop_button = QPushButton("Stop")
231
+ self.stop_button.clicked.connect(self.stop_playback)
232
+ self.return_button = QPushButton("Return to Current Selection (X)")
233
+ self.return_button.clicked.connect(self.return_to_selection)
234
+ audio_control_layout.addWidget(self.play_button)
235
+ audio_control_layout.addWidget(self.stop_button)
236
+ audio_control_layout.addWidget(self.return_button)
237
+ self.layout.addLayout(audio_control_layout)
238
+
239
+ def setup_buttons(self):
240
+ button_layout = QHBoxLayout()
241
+ load_audio_button = QPushButton("Load Audio")
242
+ load_audio_button.clicked.connect(self.load_audio_file)
243
+ load_audio_button.setObjectName("Load Audio")
244
+ load_transcript_button = QPushButton("Load Transcript")
245
+ load_transcript_button.clicked.connect(self.load_transcript)
246
+ save_button = QPushButton("Save Annotations")
247
+ save_button.clicked.connect(self.save_annotations)
248
+ add_below_button = QPushButton("Add Below")
249
+ add_below_button.clicked.connect(self.add_below)
250
+ delete_button = QPushButton("Delete Selected")
251
+ delete_button.clicked.connect(self.delete_selected)
252
+ bulk_edit_button = QPushButton("Bulk Edit Speaker")
253
+ bulk_edit_button.clicked.connect(self.bulk_edit_speaker)
254
+ button_layout.addWidget(load_audio_button)
255
+ button_layout.addWidget(load_transcript_button)
256
+ button_layout.addWidget(save_button)
257
+ button_layout.addWidget(add_below_button)
258
+ button_layout.addWidget(delete_button)
259
+ button_layout.addWidget(bulk_edit_button)
260
+ self.layout.addLayout(button_layout)
261
+
262
+ def keyPressEvent(self, event):
263
+ if event.key() == Qt.Key_X:
264
+ self.return_to_selection()
265
+ else:
266
+ super().keyPressEvent(event)
267
+
268
+ def populate_table(self, data):
269
+ self.table.setRowCount(0)
270
+ self.speakers = []
271
+ # Clear existing lines
272
+ for item in self.word_lines:
273
+ self.waveform_plot.removeItem(item['line'])
274
+ self.word_lines = []
275
+ self.row_to_lines = {}
276
+
277
+ for row_idx, entry in enumerate(data):
278
+ self.table.insertRow(row_idx)
279
+ start_time = round(float(entry.get('start_time', 0.0)), 2)
280
+ end_time = round(float(entry.get('end_time', 1.0)), 2)
281
+ word = entry.get('word', "")
282
+ speaker = entry.get('speaker', "")
283
+
284
+ start_item = QTableWidgetItem(f"{start_time:.2f}")
285
+ end_item = QTableWidgetItem(f"{end_time:.2f}")
286
+ word_item = QTableWidgetItem(word)
287
+ start_item.setTextAlignment(Qt.AlignCenter)
288
+ end_item.setTextAlignment(Qt.AlignCenter)
289
+ word_item.setTextAlignment(Qt.AlignLeft | Qt.AlignVCenter)
290
+ word_item.setBackground(QColor("black"))
291
+ word_item.setForeground(QColor("white"))
292
+ self.table.setItem(row_idx, 0, start_item)
293
+ self.table.setItem(row_idx, 1, end_item)
294
+ self.table.setItem(row_idx, 2, word_item)
295
+
296
+ speaker_dropdown = QComboBox()
297
+ speaker_dropdown.addItems(self.speakers + [""])
298
+ speaker_dropdown.setCurrentText(speaker)
299
+ speaker_dropdown.currentTextChanged.connect(self.on_speaker_changed)
300
+ self.table.setCellWidget(row_idx, 3, speaker_dropdown)
301
+
302
+ def add_word_lines(self, row, start_time, end_time):
303
+ """Add draggable lines for the word boundaries."""
304
+ start_line = DraggableLine(pos=start_time, angle=90, pen=pg.mkPen('g', width=1))
305
+ end_line = DraggableLine(pos=end_time, angle=90, pen=pg.mkPen('g', width=1))
306
+ self.waveform_plot.addItem(start_line)
307
+ self.waveform_plot.addItem(end_line)
308
+ self.word_lines.extend([
309
+ {'line': start_line, 'row': row, 'column': 0},
310
+ {'line': end_line, 'row': row, 'column': 1}
311
+ ])
312
+ self.row_to_lines.setdefault(row, []).extend([start_line, end_line])
313
+ start_line.positionChangedFinished.connect(self.on_line_moved_finished)
314
+ end_line.positionChangedFinished.connect(self.on_line_moved_finished)
315
+
316
+ def on_speaker_changed(self, new_speaker):
317
+ if new_speaker and new_speaker not in self.speakers:
318
+ self.speakers.append(new_speaker)
319
+ self.update_speaker_dropdowns()
320
+
321
+ def update_speaker_dropdowns(self):
322
+ for row in range(self.table.rowCount()):
323
+ speaker_dropdown = self.table.cellWidget(row, 3)
324
+ if speaker_dropdown:
325
+ current_speaker = speaker_dropdown.currentText()
326
+ speaker_dropdown.blockSignals(True)
327
+ speaker_dropdown.clear()
328
+ speaker_dropdown.addItems(self.speakers + [""])
329
+ speaker_dropdown.setCurrentText(current_speaker)
330
+ speaker_dropdown.blockSignals(False)
331
+
332
+ def save_annotations(self):
333
+ if not self.validate_annotations():
334
+ return
335
+
336
+ annotations = []
337
+ for row_idx in range(self.table.rowCount()):
338
+ try:
339
+ start = float(self.table.item(row_idx, 0).text())
340
+ end = float(self.table.item(row_idx, 1).text())
341
+ start = round(start, 2)
342
+ end = round(end, 2)
343
+ except (ValueError, AttributeError):
344
+ QMessageBox.warning(self, "Invalid Input", f"Invalid start or end time at row {row_idx + 1}.")
345
+ return
346
+
347
+ word = self.table.item(row_idx, 2).text()
348
+ speaker = self.table.cellWidget(row_idx, 3).currentText()
349
+ annotations.append({
350
+ "start_time": start,
351
+ "end_time": end,
352
+ "word": word,
353
+ "speaker": speaker if speaker else None
354
+ })
355
+
356
+ options = QFileDialog.Options()
357
+ file_path, _ = QFileDialog.getSaveFileName(
358
+ self, "Save Annotations", "", "JSON Files (*.json);;All Files (*)", options=options
359
+ )
360
+ if file_path:
361
+ try:
362
+ with open(file_path, "w") as file:
363
+ json.dump(annotations, file, indent=4)
364
+ QMessageBox.information(self, "Success", "Annotations saved successfully!")
365
+ except Exception as e:
366
+ QMessageBox.critical(self, "Error", f"Failed to save annotations:\n{str(e)}")
367
+
368
+ def load_transcript(self):
369
+ options = QFileDialog.Options()
370
+ file_path, _ = QFileDialog.getOpenFileName(
371
+ self, "Open Transcript", "", "JSON Files (*.json);;All Files (*)", options=options
372
+ )
373
+ if file_path:
374
+ try:
375
+ with open(file_path, "r") as file:
376
+ transcript = json.load(file)
377
+ self.populate_table(transcript)
378
+ except Exception as e:
379
+ QMessageBox.critical(self, "Error", f"Failed to load transcript:\n{str(e)}")
380
+
381
+ def load_audio_file(self):
382
+ options = QFileDialog.Options()
383
+ file_path, _ = QFileDialog.getOpenFileName(
384
+ self, "Open Audio File", "",
385
+ "Audio Files (*.wav *.mp3 *.flac *.ogg);;All Files (*)", options=options
386
+ )
387
+ if file_path:
388
+ load_audio_button = self.findChild(QPushButton, "Load Audio")
389
+ if load_audio_button:
390
+ load_audio_button.setEnabled(False)
391
+
392
+ self.statusBar().showMessage("Loading audio...")
393
+ self.audio_loader_worker = AudioLoader(file_path)
394
+ self.audio_loader_thread = QThread()
395
+ self.audio_loader_worker.moveToThread(self.audio_loader_thread)
396
+ self.audio_loader_thread.started.connect(self.audio_loader_worker.run)
397
+ self.audio_loader_worker.finished.connect(self.on_audio_loaded)
398
+ self.audio_loader_worker.finished.connect(self.audio_loader_thread.quit)
399
+ self.audio_loader_worker.finished.connect(self.audio_loader_worker.deleteLater)
400
+ self.audio_loader_thread.finished.connect(self.audio_loader_thread.deleteLater)
401
+ self.audio_loader_worker.error.connect(self.on_audio_load_error)
402
+ self.audio_loader_thread.start()
403
+
404
+ def on_audio_loaded(self, audio_segment, waveform_data, duration):
405
+ self.audio_segment = audio_segment
406
+ self.waveform_data = waveform_data
407
+ self.duration = duration
408
+ self.waveform_plot.clear()
409
+ self.waveform_plot.plot(
410
+ np.linspace(0, self.duration, num=len(waveform_data)),
411
+ waveform_data,
412
+ pen="b",
413
+ )
414
+ self.playtime_line = self.waveform_plot.addLine(x=0, pen='r')
415
+ self.current_time = 0.0
416
+ self.waveform_plot.setLimits(xMin=0.0, xMax=self.duration)
417
+ self.adjust_view_range()
418
+ self.waveform_scrollbar.setMaximum(int(self.duration * 1000))
419
+ load_audio_button = self.findChild(QPushButton, "Load Audio")
420
+ if load_audio_button:
421
+ load_audio_button.setEnabled(True)
422
+ self.statusBar().showMessage("Audio loaded successfully.", 5000)
423
+ self.redraw_word_lines()
424
+
425
+
426
+ def redraw_word_lines(self):
427
+ """Redraw the word boundary lines based on current table data."""
428
+ # Clear existing lines
429
+ for item in self.word_lines:
430
+ self.waveform_plot.removeItem(item['line'])
431
+ self.word_lines = []
432
+ self.row_to_lines = {}
433
+
434
+ # Add lines for each word
435
+ for row in range(self.table.rowCount()):
436
+ start_item = self.table.item(row, 0)
437
+ end_item = self.table.item(row, 1)
438
+ if start_item and end_item:
439
+ try:
440
+ start_time = float(start_item.text())
441
+ end_time = float(end_item.text())
442
+ self.add_word_lines(row, start_time, end_time)
443
+ except ValueError:
444
+ continue
445
+
446
+
447
+ def on_audio_load_error(self, error_message):
448
+ QMessageBox.critical(self, "Audio Load Error", f"Failed to load audio file:\n{error_message}")
449
+ load_audio_button = self.findChild(QPushButton, "Load Audio")
450
+ if load_audio_button:
451
+ load_audio_button.setEnabled(True)
452
+ self.statusBar().showMessage("Failed to load audio.", 5000)
453
+
454
+ def toggle_playback(self):
455
+ if self.audio_segment is None:
456
+ QMessageBox.warning(self, "No Audio", "Please load an audio file first.")
457
+ return
458
+ if self.is_playing:
459
+ self.pause_playback()
460
+ else:
461
+ self.start_playback()
462
+
463
+ def start_playback(self):
464
+ if self.audio_segment is not None:
465
+ try:
466
+ start_ms = int(self.current_time * 1000)
467
+ sliced_audio = self.audio_segment[start_ms:]
468
+ self.play_obj = play_audio(sliced_audio)
469
+ self.is_playing = True
470
+ self.play_button.setText("Pause")
471
+ self.playback_timer = QTimer()
472
+ self.playback_timer.timeout.connect(self.update_current_time)
473
+ self.playback_timer.start(100)
474
+ except Exception as e:
475
+ QMessageBox.critical(self, "Playback Error", f"Failed to play audio:\n{str(e)}")
476
+
477
+ def pause_playback(self):
478
+ if self.play_obj:
479
+ self.play_obj.stop()
480
+ self.play_obj = None
481
+ self.is_playing = False
482
+ self.play_button.setText("Play")
483
+ if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
484
+ self.playback_timer.stop()
485
+
486
+ def stop_playback(self):
487
+ if self.play_obj:
488
+ self.play_obj.stop()
489
+ self.play_obj = None
490
+ self.is_playing = False
491
+ self.play_button.setText("Play")
492
+ self.playtime_line.setValue(0)
493
+ self.current_time = 0.0
494
+ if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
495
+ self.playback_timer.stop()
496
+
497
+ def update_current_time(self):
498
+ self.current_time += 0.1
499
+ if self.current_time >= self.duration:
500
+ self.stop_playback()
501
+ return
502
+ self.playtime_line.setValue(round(self.current_time, 2))
503
+ self.adjust_view_range()
504
+ self.waveform_scrollbar.blockSignals(True)
505
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
506
+ self.waveform_scrollbar.blockSignals(False)
507
+ self.highlight_current_row()
508
+
509
+ def add_below(self):
510
+ selected_rows = self.get_selected_rows()
511
+ if selected_rows:
512
+ selected_row = selected_rows[-1]
513
+ try:
514
+ base_start = float(self.table.item(selected_row, 1).text())
515
+ except (ValueError, AttributeError):
516
+ base_start = 0.0
517
+ new_start = round(base_start, 2)
518
+ insert_position = selected_row + 1
519
+ else:
520
+ new_start = round(self.current_time, 2)
521
+ insert_position = self.table.rowCount()
522
+
523
+ new_end = round(new_start + 1.0, 2)
524
+ if self.audio_segment is not None and new_end > self.duration:
525
+ new_end = round(self.duration, 2)
526
+
527
+ row_data = {
528
+ 'start_time': new_start,
529
+ 'end_time': new_end,
530
+ 'word': "",
531
+ 'speaker': ""
532
+ }
533
+ command = AddRowCommand(self, insert_position, row_data)
534
+ self.undo_stack.push(command)
535
+
536
+
537
+ def delete_selected(self):
538
+ selected_rows = self.get_selected_rows()
539
+ if not selected_rows:
540
+ QMessageBox.information(self, "No Selection", "Please select at least one row to delete.")
541
+ return
542
+
543
+ # Gather data
544
+ rows_data = []
545
+ for row in selected_rows:
546
+ row_data = {
547
+ 'start_time': self.table.item(row, 0).text(),
548
+ 'end_time': self.table.item(row, 1).text(),
549
+ 'word': self.table.item(row, 2).text(),
550
+ 'speaker': self.table.cellWidget(row, 3).currentText()
551
+ }
552
+ rows_data.append(row_data)
553
+
554
+ # **Remove associated lines**
555
+ for line in self.row_to_lines.get(row, []):
556
+ self.waveform_plot.removeItem(line)
557
+ self.word_lines = [wl for wl in self.word_lines if wl['line'] != line]
558
+ self.row_to_lines.pop(row, None)
559
+
560
+ confirm = QMessageBox.question(
561
+ self,
562
+ "Confirm Deletion",
563
+ f"Are you sure you want to delete {len(selected_rows)} selected row(s)?",
564
+ QMessageBox.Yes | QMessageBox.No
565
+ )
566
+ if confirm == QMessageBox.Yes:
567
+ command = DeleteRowsCommand(self, rows_data, selected_rows)
568
+ self.undo_stack.push(command)
569
+
570
+ def bulk_edit_speaker(self):
571
+ selected_rows = self.get_selected_rows()
572
+ if not selected_rows:
573
+ QMessageBox.information(self, "No Selection", "Please select at least one row to edit.")
574
+ return
575
+
576
+ speaker, ok = QInputDialog.getItem(
577
+ self,
578
+ "Select Speaker",
579
+ "Choose a speaker to assign to selected rows:",
580
+ self.speakers + [""],
581
+ 0,
582
+ False
583
+ )
584
+
585
+ if ok:
586
+ old_speakers = [self.table.cellWidget(row, 3).currentText() for row in selected_rows]
587
+ command = BulkEditSpeakerCommand(self, selected_rows, old_speakers, speaker)
588
+ self.undo_stack.push(command)
589
+ if speaker and speaker not in self.speakers:
590
+ self.speakers.append(speaker)
591
+ self.update_speaker_dropdowns()
592
+
593
+ def autosave(self):
594
+ annotations = []
595
+ for row_idx in range(self.table.rowCount()):
596
+ try:
597
+ start = float(self.table.item(row_idx, 0).text())
598
+ end = float(self.table.item(row_idx, 1).text())
599
+ start = round(start, 2)
600
+ end = round(end, 2)
601
+ except (ValueError, AttributeError):
602
+ continue
603
+
604
+ word = self.table.item(row_idx, 2).text()
605
+ speaker = self.table.cellWidget(row_idx, 3).currentText()
606
+
607
+ annotations.append({
608
+ "start_time": start,
609
+ "end_time": end,
610
+ "word": word,
611
+ "speaker": speaker if speaker else None
612
+ })
613
+
614
+ try:
615
+ with open(self.temp_file_path, 'w') as f:
616
+ json.dump(annotations, f, indent=4)
617
+ except Exception as e:
618
+ print(f"Autosave failed: {e}")
619
+
620
+ def load_autosave(self):
621
+ if os.path.exists(self.temp_file_path):
622
+ try:
623
+ with open(self.temp_file_path, "r") as file:
624
+ annotations = json.load(file)
625
+ self.populate_table(annotations)
626
+ QMessageBox.information(self, "Recovery", "Recovered annotations from autosave.")
627
+ except Exception as e:
628
+ print(f"Failed to recover autosave: {e}")
629
+
630
+ def closeEvent(self, event):
631
+ reply = QMessageBox.question(
632
+ self, 'Exit',
633
+ "Do you want to save your annotations before exiting?",
634
+ QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel,
635
+ QMessageBox.Yes
636
+ )
637
+
638
+ if reply == QMessageBox.Yes:
639
+ if not self.validate_annotations():
640
+ event.ignore()
641
+ return
642
+ self.save_annotations()
643
+ if os.path.exists(self.temp_file_path):
644
+ os.remove(self.temp_file_path)
645
+ event.accept()
646
+ elif reply == QMessageBox.No:
647
+ if os.path.exists(self.temp_file_path):
648
+ os.remove(self.temp_file_path)
649
+ event.accept()
650
+ else:
651
+ event.ignore()
652
+
653
+ def on_cell_double_clicked(self, row, column):
654
+ item = self.table.item(row, column)
655
+ if item:
656
+ self.old_values[(row, column)] = item.text()
657
+
658
+ def on_item_changed(self, item):
659
+ if self.table.signalsBlocked():
660
+ return
661
+
662
+ row = item.row()
663
+ column = item.column()
664
+ key = (row, column)
665
+ old_value = self.old_values.get(key, "")
666
+ new_value = item.text()
667
+
668
+ if old_value != new_value:
669
+ if column in [0, 1]:
670
+ try:
671
+ new_time = float(new_value)
672
+ self.update_line_position(row, column, new_time)
673
+ except ValueError:
674
+ QMessageBox.warning(self, "Invalid Input", "Start and End times must be numeric.")
675
+ self.table.blockSignals(True)
676
+ item.setText(old_value)
677
+ self.table.blockSignals(False)
678
+ return
679
+
680
+ command = EditCellCommand(self, row, column, old_value, new_value)
681
+ self.undo_stack.push(command)
682
+
683
+ if key in self.old_values:
684
+ del self.old_values[key]
685
+
686
+ def insert_row(self, row_position, row_data):
687
+ self.table.blockSignals(True)
688
+ self.table.insertRow(row_position)
689
+
690
+ start_time = round(float(row_data.get('start_time', 0.0)), 2)
691
+ end_time = round(float(row_data.get('end_time', 1.0)), 2)
692
+ word = row_data.get('word', "")
693
+ speaker = row_data.get('speaker', "")
694
+
695
+ if speaker and speaker not in self.speakers:
696
+ self.speakers.append(speaker)
697
+ self.update_speaker_dropdowns()
698
+
699
+ start_item = QTableWidgetItem(f"{start_time:.2f}")
700
+ end_item = QTableWidgetItem(f"{end_time:.2f}")
701
+ word_item = QTableWidgetItem(word)
702
+ start_item.setTextAlignment(Qt.AlignCenter)
703
+ end_item.setTextAlignment(Qt.AlignCenter)
704
+ word_item.setTextAlignment(Qt.AlignLeft | Qt.AlignVCenter)
705
+ self.table.setItem(row_position, 0, start_item)
706
+ self.table.setItem(row_position, 1, end_item)
707
+ self.table.setItem(row_position, 2, word_item)
708
+
709
+ speaker_dropdown = QComboBox()
710
+ speaker_dropdown.addItems(self.speakers + [""])
711
+ speaker_dropdown.setCurrentText(speaker)
712
+ speaker_dropdown.currentTextChanged.connect(self.on_speaker_changed)
713
+ self.table.setCellWidget(row_position, 3, speaker_dropdown)
714
+ self.table.blockSignals(False)
715
+
716
+ if self.audio_segment is not None:
717
+ self.add_word_lines(row_position, start_time, end_time)
718
+
719
+ def remove_row(self, row_position):
720
+ # **Remove associated lines**
721
+ for line in self.row_to_lines.get(row_position, []):
722
+ self.waveform_plot.removeItem(line)
723
+ self.word_lines = [wl for wl in self.word_lines if wl['line'] != line]
724
+ self.row_to_lines.pop(row_position, None)
725
+
726
+ self.table.blockSignals(True)
727
+ self.table.removeRow(row_position)
728
+ self.table.blockSignals(False)
729
+
730
+ def set_cell(self, row, column, value):
731
+ self.table.blockSignals(True)
732
+ if column in [0, 1]:
733
+ try:
734
+ float_val = float(value)
735
+ value = f"{float_val:.2f}"
736
+ except ValueError:
737
+ pass
738
+ item = self.table.item(row, column)
739
+ if item:
740
+ item.setText(value)
741
+ self.table.blockSignals(False)
742
+ if column in [0, 1]:
743
+ try:
744
+ self.update_line_position(row, column, float(value))
745
+ except ValueError:
746
+ pass
747
+
748
+ def set_speaker(self, row, speaker):
749
+ self.table.blockSignals(True)
750
+ speaker_dropdown = self.table.cellWidget(row, 3)
751
+ if speaker_dropdown:
752
+ speaker_dropdown.setCurrentText(speaker)
753
+ self.table.blockSignals(False)
754
+ if speaker and speaker not in self.speakers:
755
+ self.speakers.append(speaker)
756
+ self.update_speaker_dropdowns()
757
+
758
+
759
+ def update_line_position(self, row, column, new_value):
760
+ """Update the position of the line corresponding to the cell."""
761
+ for item in self.word_lines:
762
+ if item['row'] == row and item['column'] == column:
763
+ item['line'].blockSignals(True)
764
+ item['line'].setValue(new_value)
765
+ item['line'].blockSignals(False)
766
+ item['line']._old_value = new_value # Update the old value
767
+ break
768
+
769
+ def on_waveform_clicked(self, event):
770
+ if self.audio_segment is None:
771
+ return
772
+ pos = event.scenePos()
773
+ if not self.waveform_plot.sceneBoundingRect().contains(pos):
774
+ return
775
+ mouse_point = self.waveform_plot.getPlotItem().vb.mapSceneToView(pos)
776
+ clicked_time = mouse_point.x()
777
+ clicked_time = max(0.0, min(clicked_time, self.duration))
778
+ self.current_time = clicked_time
779
+ self.playtime_line.setValue(round(self.current_time, 2))
780
+ self.adjust_view_range()
781
+ self.waveform_scrollbar.blockSignals(True)
782
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
783
+ self.waveform_scrollbar.blockSignals(False)
784
+ self.select_row_by_time(self.current_time)
785
+
786
+ def select_row_by_time(self, time):
787
+ for row in range(self.table.rowCount()):
788
+ start_item = self.table.item(row, 0)
789
+ end_item = self.table.item(row, 1)
790
+ if start_item and end_item:
791
+ try:
792
+ start_time = float(start_item.text())
793
+ end_time = float(end_item.text())
794
+ if start_time <= time < end_time:
795
+ self.table.selectRow(row)
796
+ break
797
+ except ValueError:
798
+ continue
799
+
800
+ def on_table_clicked(self, row, column):
801
+ start_item = self.table.item(row, 0)
802
+ if start_item:
803
+ try:
804
+ start_time = float(start_item.text())
805
+ self.current_time = start_time
806
+ self.playtime_line.setValue(round(self.current_time, 2))
807
+ self.adjust_view_range()
808
+ self.waveform_scrollbar.blockSignals(True)
809
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
810
+ self.waveform_scrollbar.blockSignals(False)
811
+ except ValueError:
812
+ pass
813
+
814
+ def on_current_cell_changed(self, current_row, current_column, previous_row, previous_column):
815
+ if current_row >= 0:
816
+ start_item = self.table.item(current_row, 0)
817
+ if start_item:
818
+ try:
819
+ start_time = float(start_item.text())
820
+ self.current_time = start_time
821
+ self.playtime_line.setValue(round(self.current_time, 2))
822
+ self.adjust_view_range()
823
+ self.waveform_scrollbar.blockSignals(True)
824
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
825
+ self.waveform_scrollbar.blockSignals(False)
826
+ except ValueError:
827
+ pass
828
+
829
+ def on_selection_changed(self, selected, deselected):
830
+ for item in getattr(self, 'word_lines', []):
831
+ self.waveform_plot.removeItem(item['line'])
832
+ self.word_lines = []
833
+ selected_rows = self.get_selected_rows()
834
+
835
+ for row in selected_rows:
836
+ start_item = self.table.item(row, 0)
837
+ end_item = self.table.item(row, 1)
838
+ if start_item and end_item:
839
+ try:
840
+ start_time = float(start_item.text())
841
+ end_time = float(end_item.text())
842
+ start_line = DraggableLine(pos=start_time, angle=90, pen=pg.mkPen('g', width=1))
843
+ end_line = DraggableLine(pos=end_time, angle=90, pen=pg.mkPen('g', width=1))
844
+ self.waveform_plot.addItem(start_line)
845
+ self.waveform_plot.addItem(end_line)
846
+ self.word_lines.extend([
847
+ {'line': start_line, 'row': row, 'column': 0},
848
+ {'line': end_line, 'row': row, 'column': 1}
849
+ ])
850
+ start_line.positionChangedFinished.connect(self.on_line_moved_finished)
851
+ end_line.positionChangedFinished.connect(self.on_line_moved_finished)
852
+ except ValueError:
853
+ continue
854
+
855
+ def on_line_moved_finished(self, line):
856
+ """Handle the line movement and update the table."""
857
+ for item in self.word_lines:
858
+ if item['line'] == line:
859
+ row = item['row']
860
+ column = item['column']
861
+ old_value = self.table.item(row, column).text()
862
+ new_value = line.value()
863
+ new_value = max(0.0, min(new_value, self.duration))
864
+ new_value_str = f"{new_value:.2f}"
865
+
866
+ if old_value != new_value_str:
867
+ # Update the table item
868
+ self.table.blockSignals(True)
869
+ self.table.item(row, column).setText(new_value_str)
870
+ self.table.blockSignals(False)
871
+ # Create an undo command
872
+ command = EditCellCommand(self, row, column, old_value, new_value_str)
873
+ self.undo_stack.push(command)
874
+ line._old_value = new_value
875
+ break
876
+
877
+ def highlight_current_row(self):
878
+ current_row = self.get_current_row()
879
+ for row in range(self.table.rowCount()):
880
+ for column in range(self.table.columnCount()):
881
+ item = self.table.item(row, column)
882
+ if item:
883
+ if row == current_row:
884
+ item.setBackground(QColor("blue"))
885
+ else:
886
+ item.setBackground(QColor("black"))
887
+ if current_row != -1:
888
+ self.table.scrollToItem(self.table.item(current_row, 0), QAbstractItemView.PositionAtCenter)
889
+
890
+ def get_current_row(self):
891
+ for row in range(self.table.rowCount()):
892
+ start_item = self.table.item(row, 0)
893
+ end_item = self.table.item(row, 1)
894
+ if start_item and end_item:
895
+ try:
896
+ start_time = float(start_item.text())
897
+ end_time = float(end_item.text())
898
+ if start_time <= self.current_time < end_time:
899
+ return row
900
+ except ValueError:
901
+ continue
902
+ return -1
903
+
904
+ def adjust_view_range(self):
905
+ window_size = 5.0
906
+ half_window = window_size / 2.0
907
+ start = max(0.0, self.current_time - half_window)
908
+ end = min(self.duration, self.current_time + half_window)
909
+ self.waveform_plot.setXRange(start, end, padding=0)
910
+
911
+ def on_scrollbar_moved(self, value):
912
+ self.current_time = value / 1000.0
913
+ self.playtime_line.setValue(round(self.current_time, 2))
914
+ self.adjust_view_range()
915
+ self.highlight_current_row()
916
+
917
+ def return_to_selection(self):
918
+ selected_rows = self.get_selected_rows()
919
+ if selected_rows:
920
+ first_row = selected_rows[0]
921
+ start_item = self.table.item(first_row, 0)
922
+ end_item = self.table.item(selected_rows[-1], 1)
923
+ if start_item and end_item:
924
+ try:
925
+ start_time = float(start_item.text())
926
+ self.current_time = start_time
927
+ self.playtime_line.setValue(round(self.current_time, 2))
928
+ self.adjust_view_range()
929
+ self.waveform_scrollbar.blockSignals(True)
930
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
931
+ self.waveform_scrollbar.blockSignals(False)
932
+ except ValueError:
933
+ pass
934
+
935
+ def show_context_menu(self, position):
936
+ menu = QMenu()
937
+ add_below_action = QAction("Add Below", self)
938
+ add_below_action.triggered.connect(self.add_below)
939
+ delete_selected_action = QAction("Delete Selected", self)
940
+ delete_selected_action.triggered.connect(self.delete_selected)
941
+ bulk_edit_action = QAction("Bulk Edit Speaker", self)
942
+ bulk_edit_action.triggered.connect(self.bulk_edit_speaker)
943
+ menu.addAction(add_below_action)
944
+ menu.addAction(delete_selected_action)
945
+ menu.addAction(bulk_edit_action)
946
+ menu.exec_(self.table.viewport().mapToGlobal(position))
947
+
948
+ def validate_annotations(self):
949
+ sorted_rows = sorted(range(self.table.rowCount()), key=lambda r: float(self.table.item(r, 0).text()) if self.table.item(r, 0).text() else 0.0)
950
+ for i in range(len(sorted_rows)):
951
+ row = sorted_rows[i]
952
+ start_item = self.table.item(row, 0)
953
+ end_item = self.table.item(row, 1)
954
+ if not start_item or not end_item:
955
+ QMessageBox.warning(self, "Invalid Annotation", f"Missing start or end time at row {row + 1}.")
956
+ return False
957
+ try:
958
+ start_time = float(start_item.text())
959
+ end_time = float(end_item.text())
960
+ if start_time >= end_time:
961
+ QMessageBox.warning(self, "Invalid Annotation", f"Start time must be less than end time at row {row + 1}.")
962
+ return False
963
+ if i < len(sorted_rows) - 1:
964
+ next_row = sorted_rows[i + 1]
965
+ next_start = float(self.table.item(next_row, 0).text())
966
+ if end_time > next_start:
967
+ QMessageBox.warning(
968
+ self, "Invalid Annotation",
969
+ f"Annotations at rows {row + 1} and {next_row + 1} overlap."
970
+ )
971
+ return False
972
+ except ValueError:
973
+ QMessageBox.warning(self, "Invalid Annotation", f"Non-numeric start or end time at row {row + 1}.")
974
+ return False
975
+ return True
976
+
977
+ def get_selected_rows(self):
978
+ return sorted(set(index.row() for index in self.table.selectionModel().selectedRows()))
979
+
980
+
981
+ if __name__ == "__main__":
982
+ app = QApplication(sys.argv)
983
+ window = AnnotationTool()
984
+ window.show()
985
+ sys.exit(app.exec_())