pelican-nlp 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pelican_nlp/Nils_backup/__init__.py +0 -0
  2. pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
  3. pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
  4. pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
  5. pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
  6. pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
  7. pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
  8. pelican_nlp/Nils_backup/fluency/config.py +231 -0
  9. pelican_nlp/Nils_backup/fluency/main.py +182 -0
  10. pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
  11. pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
  12. pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
  13. pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
  14. pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
  15. pelican_nlp/Nils_backup/fluency/utils.py +41 -0
  16. pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
  17. pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
  18. pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
  19. pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
  20. pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
  21. pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
  22. pelican_nlp/Nils_backup/transcription/test.json +1 -0
  23. pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
  24. pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
  25. pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
  26. pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
  27. pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
  28. pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
  29. pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
  30. pelican_nlp/__init__.py +1 -1
  31. pelican_nlp/_version.py +1 -0
  32. pelican_nlp/configuration_files/config_audio.yml +150 -0
  33. pelican_nlp/configuration_files/config_discourse.yml +104 -0
  34. pelican_nlp/configuration_files/config_fluency.yml +108 -0
  35. pelican_nlp/configuration_files/config_general.yml +131 -0
  36. pelican_nlp/configuration_files/config_morteza.yml +103 -0
  37. pelican_nlp/praat/__init__.py +29 -0
  38. {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/METADATA +14 -21
  39. pelican_nlp-0.1.2.dist-info/RECORD +75 -0
  40. pelican_nlp-0.1.0.dist-info/RECORD +0 -39
  41. {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/WHEEL +0 -0
  42. {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/licenses/LICENSE +0 -0
  43. {pelican_nlp-0.1.0.dist-info → pelican_nlp-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1001 @@
1
+ import sys
2
+ import numpy as np
3
+ from PyQt5.QtWidgets import (
4
+ QApplication, QMainWindow, QTableWidget, QTableWidgetItem, QVBoxLayout,
5
+ QPushButton, QWidget, QComboBox, QFileDialog, QHBoxLayout, QMessageBox,
6
+ QLabel, QCheckBox, QSlider, QAbstractItemView, QInputDialog, QUndoStack, QUndoCommand,
7
+ QMenu, QAction, QScrollBar
8
+ )
9
+ from PyQt5.QtCore import Qt, QTimer, QObject, pyqtSignal, QThread, QPoint
10
+ from PyQt5.QtGui import QPixmap, QColor
11
+ import pyqtgraph as pg
12
+ from pydub import AudioSegment
13
+ from pydub.playback import _play_with_simpleaudio as play_audio
14
+ import json
15
+ import tempfile
16
+ import os
17
+
18
+
19
+ class AddRowCommand(QUndoCommand):
20
+ def __init__(self, tool, row_position, row_data, description="Add Row"):
21
+ super().__init__(description)
22
+ self.tool = tool
23
+ self.row_position = row_position
24
+ self.row_data = row_data
25
+
26
+ def redo(self):
27
+ self.tool.insert_row(self.row_position, self.row_data)
28
+
29
+ def undo(self):
30
+ self.tool.remove_row(self.row_position)
31
+
32
+
33
+ class DeleteRowsCommand(QUndoCommand):
34
+ def __init__(self, tool, rows_data, row_positions, description="Delete Rows"):
35
+ super().__init__(description)
36
+ self.tool = tool
37
+ self.rows_data = rows_data # List of row data dictionaries
38
+ self.row_positions = row_positions # List of row indices
39
+
40
+ def redo(self):
41
+ # Delete rows in reverse order to avoid shifting
42
+ for row in sorted(self.row_positions, reverse=True):
43
+ self.tool.remove_row(row)
44
+
45
+ def undo(self):
46
+ # Insert rows back in original order
47
+ for row, data in sorted(zip(self.row_positions, self.rows_data)):
48
+ self.tool.insert_row(row, data)
49
+
50
+
51
+ class EditCellCommand(QUndoCommand):
52
+ def __init__(self, tool, row, column, old_value, new_value, description="Edit Cell"):
53
+ super().__init__(description)
54
+ self.tool = tool
55
+ self.row = row
56
+ self.column = column
57
+ self.old_value = old_value
58
+ self.new_value = new_value
59
+
60
+ def redo(self):
61
+ self.tool.set_cell(self.row, self.column, self.new_value)
62
+
63
+ def undo(self):
64
+ self.tool.set_cell(self.row, self.column, self.old_value)
65
+
66
+
67
+ class BulkEditSpeakerCommand(QUndoCommand):
68
+ def __init__(self, tool, row_positions, old_speakers, new_speaker, description="Bulk Edit Speaker"):
69
+ super().__init__(description)
70
+ self.tool = tool
71
+ self.row_positions = row_positions
72
+ self.old_speakers = old_speakers
73
+ self.new_speaker = new_speaker
74
+
75
+ def redo(self):
76
+ for row in self.row_positions:
77
+ self.tool.set_speaker(row, self.new_speaker)
78
+
79
+ def undo(self):
80
+ for row, speaker in zip(self.row_positions, self.old_speakers):
81
+ self.tool.set_speaker(row, speaker)
82
+
83
+
84
+ class AudioLoader(QObject):
85
+ finished = pyqtSignal(AudioSegment, np.ndarray, float) # AudioSegment, waveform_data, duration
86
+ error = pyqtSignal(str) # Error message
87
+
88
+ def __init__(self, file_path, downsample_factor=100):
89
+ super().__init__()
90
+ self.file_path = file_path
91
+ self.downsample_factor = downsample_factor
92
+
93
+ def run(self):
94
+ try:
95
+ # Load audio with pydub
96
+ audio = AudioSegment.from_file(self.file_path).set_channels(1) # Convert to mono
97
+ samples = np.array(audio.get_array_of_samples()).astype(np.float32)
98
+ duration = audio.duration_seconds
99
+
100
+ # Normalize samples
101
+ samples /= np.max(np.abs(samples)) if np.max(np.abs(samples)) != 0 else 1.0
102
+
103
+ # Downsample the waveform data for plotting if necessary
104
+ if len(samples) > 1000000: # Threshold can be adjusted
105
+ samples = self.downsample_waveform(samples, self.downsample_factor)
106
+
107
+ # Emit the processed data
108
+ self.finished.emit(audio, samples, duration)
109
+ except Exception as e:
110
+ self.error.emit(str(e))
111
+
112
+ def downsample_waveform(self, samples, factor):
113
+ """Downsample the waveform by taking the mean of every 'factor' samples."""
114
+ num_blocks = len(samples) // factor
115
+ downsampled = np.array([samples[i * factor:(i + 1) * factor].mean() for i in range(num_blocks)])
116
+ return downsampled
117
+
118
+
119
+ class AnnotationTool(QMainWindow):
120
+ def __init__(self):
121
+ super().__init__()
122
+ self.setWindowTitle("Annotation Tool with Waveform and Transcript Synchronization")
123
+ self.setGeometry(100, 100, 1600, 900) # Increased width and height for better layout
124
+
125
+ self.audio_segment = None # To store the original AudioSegment
126
+ self.waveform_data = None # To store waveform data for plotting
127
+ self.duration = 0.0
128
+ self.play_obj = None
129
+ self.current_time = 0.0
130
+ self.is_playing = False
131
+ self.updating = False # Flag to prevent recursive updates
132
+ self.speakers = [] # List to store unique speakers
133
+
134
+ # Initialize QUndoStack
135
+ self.undo_stack = QUndoStack(self)
136
+
137
+ # Initialize autosave components
138
+ self.autosave_timer = QTimer()
139
+ self.autosave_timer.timeout.connect(self.autosave)
140
+ self.autosave_timer.start(5000) # Autosave every 5 seconds
141
+
142
+ # Create a temporary file for autosave
143
+ self.temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.json')
144
+ self.temp_file_path = self.temp_file.name
145
+ self.temp_file.close() # We'll manage the file manually
146
+
147
+ # Main layout
148
+ self.layout = QVBoxLayout()
149
+
150
+ # Undo/Redo buttons layout
151
+ undo_redo_layout = QHBoxLayout()
152
+ undo_button = QPushButton("Undo")
153
+ undo_button.clicked.connect(self.undo_stack.undo)
154
+ redo_button = QPushButton("Redo")
155
+ redo_button.clicked.connect(self.undo_stack.redo)
156
+ undo_redo_layout.addWidget(undo_button)
157
+ undo_redo_layout.addWidget(redo_button)
158
+ self.layout.addLayout(undo_redo_layout)
159
+
160
+ # Waveform and Transcript layout
161
+ waveform_transcript_layout = QHBoxLayout()
162
+
163
+ # Waveform plot and scrollbar layout
164
+ waveform_layout = QVBoxLayout()
165
+ self.waveform_plot = pg.PlotWidget()
166
+ self.waveform_plot.setYRange(-1, 1)
167
+ self.waveform_plot.showGrid(x=True, y=False)
168
+ self.waveform_plot.setLabel('bottom', 'Time', 's')
169
+ waveform_layout.addWidget(self.waveform_plot)
170
+
171
+ # Scrollbar under waveform
172
+ self.waveform_scrollbar = QScrollBar(Qt.Horizontal)
173
+ self.waveform_scrollbar.setMinimum(0)
174
+ self.waveform_scrollbar.setMaximum(0) # Will be set when audio is loaded
175
+ self.waveform_scrollbar.valueChanged.connect(self.on_scrollbar_moved)
176
+ waveform_layout.addWidget(self.waveform_scrollbar)
177
+
178
+ waveform_transcript_layout.addLayout(waveform_layout)
179
+
180
+ # Transcript (Table)
181
+ self.table = QTableWidget()
182
+ self.table.setColumnCount(4)
183
+ self.table.setHorizontalHeaderLabels(["Start", "End", "Word", "Speaker"])
184
+ self.table.setSelectionBehavior(QAbstractItemView.SelectRows)
185
+ self.table.setSelectionMode(QAbstractItemView.ExtendedSelection) # Allow multiple selection
186
+ self.table.setEditTriggers(QAbstractItemView.DoubleClicked | QAbstractItemView.SelectedClicked)
187
+ self.table.setStyleSheet("selection-background-color: lightblue;")
188
+ self.table.verticalHeader().setVisible(False)
189
+ self.table.setAlternatingRowColors(True)
190
+ self.table.setContextMenuPolicy(Qt.CustomContextMenu)
191
+ self.table.customContextMenuRequested.connect(self.show_context_menu)
192
+ waveform_transcript_layout.addWidget(self.table)
193
+
194
+ self.layout.addLayout(waveform_transcript_layout)
195
+
196
+ # Playtime indicator
197
+ self.playtime_line = self.waveform_plot.addLine(x=0, pen='r')
198
+
199
+ # Connect mouse click event for seeking
200
+ self.waveform_plot.scene().sigMouseClicked.connect(self.on_waveform_clicked)
201
+
202
+ # Audio controls
203
+ audio_control_layout = QHBoxLayout()
204
+ self.play_button = QPushButton("Play")
205
+ self.play_button.clicked.connect(self.toggle_playback)
206
+ audio_control_layout.addWidget(self.play_button)
207
+
208
+ self.stop_button = QPushButton("Stop")
209
+ self.stop_button.clicked.connect(self.stop_playback)
210
+ audio_control_layout.addWidget(self.stop_button)
211
+
212
+ self.layout.addLayout(audio_control_layout)
213
+
214
+ # Connect table signals
215
+ self.table.cellDoubleClicked.connect(self.on_cell_double_clicked)
216
+ self.table.itemChanged.connect(self.on_item_changed)
217
+ self.table.cellClicked.connect(self.on_table_clicked) # For click-to-seek
218
+ self.old_values = {} # To store old values before editing
219
+
220
+ # New: Handle selection changes
221
+ self.word_lines = []
222
+ self.table.selectionModel().selectionChanged.connect(self.on_selection_changed)
223
+
224
+ # Buttons for controls
225
+ button_layout = QHBoxLayout()
226
+ load_audio_button = QPushButton("Load Audio")
227
+ load_audio_button.clicked.connect(self.load_audio_file)
228
+ load_audio_button.setObjectName("Load Audio") # Set object name for easy access
229
+ button_layout.addWidget(load_audio_button)
230
+
231
+ load_transcript_button = QPushButton("Load Transcript")
232
+ load_transcript_button.clicked.connect(self.load_transcript)
233
+ button_layout.addWidget(load_transcript_button)
234
+
235
+ save_button = QPushButton("Save Annotations")
236
+ save_button.clicked.connect(self.save_annotations)
237
+ button_layout.addWidget(save_button)
238
+
239
+ # New buttons
240
+ add_below_button = QPushButton("Add Below")
241
+ add_below_button.clicked.connect(self.add_below)
242
+ button_layout.addWidget(add_below_button)
243
+
244
+ delete_button = QPushButton("Delete Selected")
245
+ delete_button.clicked.connect(self.delete_selected)
246
+ button_layout.addWidget(delete_button)
247
+
248
+ bulk_edit_button = QPushButton("Bulk Edit Speaker")
249
+ bulk_edit_button.clicked.connect(self.bulk_edit_speaker)
250
+ button_layout.addWidget(bulk_edit_button)
251
+
252
+ self.layout.addLayout(button_layout)
253
+
254
+ # Add rows of data
255
+ self.sample_data = []
256
+ self.populate_table(self.sample_data)
257
+
258
+ # Auto-scroll timer
259
+ self.auto_scroll_timer = QTimer()
260
+ self.auto_scroll_timer.timeout.connect(self.highlight_current_row)
261
+ self.auto_scroll_timer.start(500) # Check every 500 ms
262
+
263
+ # Main widget
264
+ container = QWidget()
265
+ container.setLayout(self.layout)
266
+ self.setCentralWidget(container)
267
+
268
+ # Initialize Worker attributes
269
+ self.audio_loader_thread = None
270
+ self.audio_loader_worker = None
271
+
272
+ # Load autosave if exists
273
+ self.load_autosave()
274
+
275
+ def populate_table(self, data):
276
+ """Populate the table with transcript data."""
277
+ self.table.setRowCount(0) # Clear existing rows
278
+ self.speakers = [] # Reset speakers list
279
+
280
+ # Collect unique speakers
281
+ for entry in data:
282
+ speaker = entry.get('speaker', "")
283
+ if speaker and speaker not in self.speakers:
284
+ self.speakers.append(speaker)
285
+
286
+ for row_idx, entry in enumerate(data):
287
+ self.table.insertRow(row_idx)
288
+
289
+ # Round start and end times to two decimals
290
+ try:
291
+ start_time = round(float(entry['start_time']), 2)
292
+ end_time = round(float(entry['end_time']), 2)
293
+ except (ValueError, KeyError):
294
+ start_time = 0.00
295
+ end_time = 1.00
296
+
297
+ word = entry.get('word', "")
298
+ speaker = entry.get('speaker', "")
299
+
300
+ start_item = QTableWidgetItem(f"{start_time:.2f}")
301
+ end_item = QTableWidgetItem(f"{end_time:.2f}")
302
+ word_item = QTableWidgetItem(word)
303
+
304
+ # Align numbers to center
305
+ start_item.setTextAlignment(Qt.AlignCenter)
306
+ end_item.setTextAlignment(Qt.AlignCenter)
307
+ word_item.setTextAlignment(Qt.AlignLeft | Qt.AlignVCenter)
308
+ word_item.setBackground(QColor("black")) # Set background color
309
+ word_item.setForeground(QColor("white")) # Set text color
310
+ self.table.setItem(row_idx, 0, start_item)
311
+ self.table.setItem(row_idx, 1, end_item)
312
+ self.table.setItem(row_idx, 2, word_item)
313
+
314
+ # Dropdown for speaker selection
315
+ speaker_dropdown = QComboBox()
316
+ speaker_dropdown.addItems(self.speakers + [""])
317
+ speaker_dropdown.setCurrentText(speaker)
318
+ speaker_dropdown.currentTextChanged.connect(self.on_speaker_changed)
319
+ self.table.setCellWidget(row_idx, 3, speaker_dropdown)
320
+
321
+ def on_speaker_changed(self, new_speaker):
322
+ """Handle changes to the speaker dropdown."""
323
+ if new_speaker and new_speaker not in self.speakers:
324
+ self.speakers.append(new_speaker)
325
+ self.update_speaker_dropdowns()
326
+
327
+ def update_speaker_dropdowns(self):
328
+ """Update all speaker dropdowns with the current list of speakers."""
329
+ for row in range(self.table.rowCount()):
330
+ speaker_dropdown = self.table.cellWidget(row, 3)
331
+ if speaker_dropdown:
332
+ current_speaker = speaker_dropdown.currentText()
333
+ speaker_dropdown.blockSignals(True)
334
+ speaker_dropdown.clear()
335
+ speaker_dropdown.addItems(self.speakers + [""])
336
+ speaker_dropdown.setCurrentText(current_speaker)
337
+ speaker_dropdown.blockSignals(False)
338
+
339
+ def save_annotations(self):
340
+ """Save current annotations to a JSON file."""
341
+ if not self.validate_annotations():
342
+ return
343
+
344
+ annotations = []
345
+ for row_idx in range(self.table.rowCount()):
346
+ try:
347
+ start = float(self.table.item(row_idx, 0).text())
348
+ end = float(self.table.item(row_idx, 1).text())
349
+ # Round to two decimals
350
+ start = round(start, 2)
351
+ end = round(end, 2)
352
+ except (ValueError, AttributeError):
353
+ QMessageBox.warning(self, "Invalid Input", f"Invalid start or end time at row {row_idx + 1}.")
354
+ return
355
+
356
+ word = self.table.item(row_idx, 2).text()
357
+ speaker = self.table.cellWidget(row_idx, 3).currentText()
358
+ annotations.append({
359
+ "start_time": start,
360
+ "end_time": end,
361
+ "word": word,
362
+ "speaker": speaker if speaker else None
363
+ })
364
+
365
+ options = QFileDialog.Options()
366
+ file_path, _ = QFileDialog.getSaveFileName(
367
+ self, "Save Annotations", "", "JSON Files (*.json);;All Files (*)", options=options
368
+ )
369
+ if file_path:
370
+ try:
371
+ with open(file_path, "w") as file:
372
+ json.dump(annotations, file, indent=4)
373
+ QMessageBox.information(self, "Success", "Annotations saved successfully!")
374
+ except Exception as e:
375
+ QMessageBox.critical(self, "Error", f"Failed to save annotations:\n{str(e)}")
376
+
377
+ def load_transcript(self):
378
+ """Load transcript from a JSON file."""
379
+ options = QFileDialog.Options()
380
+ file_path, _ = QFileDialog.getOpenFileName(
381
+ self, "Open Transcript", "", "JSON Files (*.json);;All Files (*)", options=options
382
+ )
383
+ if file_path:
384
+ try:
385
+ with open(file_path, "r") as file:
386
+ transcript = json.load(file)
387
+ self.populate_table(transcript)
388
+ except Exception as e:
389
+ QMessageBox.critical(self, "Error", f"Failed to load transcript:\n{str(e)}")
390
+
391
+ def load_audio_file(self):
392
+ """Load an audio file."""
393
+ options = QFileDialog.Options()
394
+ file_path, _ = QFileDialog.getOpenFileName(
395
+ self, "Open Audio File", "",
396
+ "Audio Files (*.wav *.mp3 *.flac *.ogg);;All Files (*)", options=options
397
+ )
398
+ if file_path:
399
+ try:
400
+ # Disable the load_audio_button to prevent multiple clicks
401
+ load_audio_button = self.findChild(QPushButton, "Load Audio")
402
+ if load_audio_button:
403
+ load_audio_button.setEnabled(False)
404
+
405
+ # Show a loading message
406
+ self.statusBar().showMessage("Loading audio...")
407
+
408
+ # Initialize the worker and thread
409
+ self.audio_loader_worker = AudioLoader(file_path)
410
+ self.audio_loader_thread = QThread()
411
+ self.audio_loader_worker.moveToThread(self.audio_loader_thread)
412
+
413
+ # Connect signals
414
+ self.audio_loader_thread.started.connect(self.audio_loader_worker.run)
415
+ self.audio_loader_worker.finished.connect(self.on_audio_loaded)
416
+ self.audio_loader_worker.finished.connect(self.audio_loader_thread.quit)
417
+ self.audio_loader_worker.finished.connect(self.audio_loader_worker.deleteLater)
418
+ self.audio_loader_thread.finished.connect(self.audio_loader_thread.deleteLater)
419
+ self.audio_loader_worker.error.connect(self.on_audio_load_error)
420
+
421
+ # Start the thread
422
+ self.audio_loader_thread.start()
423
+ except Exception as e:
424
+ QMessageBox.critical(self, "Error", f"Failed to load audio file:\n{str(e)}")
425
+ if load_audio_button:
426
+ load_audio_button.setEnabled(True)
427
+
428
+ def on_audio_loaded(self, audio_segment, waveform_data, duration):
429
+ """Handle the loaded audio data."""
430
+ self.audio_segment = audio_segment # Store the AudioSegment for playback
431
+ self.waveform_data = waveform_data # Store waveform data for plotting
432
+ self.duration = duration
433
+
434
+ # Update the waveform plot
435
+ self.waveform_plot.clear() # Clear previous plot
436
+ self.waveform_plot.plot(
437
+ np.linspace(0, self.duration, num=len(waveform_data)),
438
+ waveform_data,
439
+ pen="b",
440
+ )
441
+ self.playtime_line = self.waveform_plot.addLine(x=0, pen='r')
442
+ self.current_time = 0.0
443
+
444
+ # Set waveform plot limits
445
+ self.waveform_plot.setLimits(xMin=0.0, xMax=self.duration)
446
+
447
+ # Adjust initial view range
448
+ self.adjust_view_range()
449
+
450
+ # Update scrollbar maximum
451
+ self.waveform_scrollbar.setMaximum(int(self.duration * 1000)) # Convert to milliseconds
452
+
453
+ # Re-enable the load_audio_button
454
+ load_audio_button = self.findChild(QPushButton, "Load Audio")
455
+ if load_audio_button:
456
+ load_audio_button.setEnabled(True)
457
+
458
+ # Update status bar
459
+ self.statusBar().showMessage("Audio loaded successfully.", 5000) # Message disappears after 5 seconds
460
+
461
+ def on_audio_load_error(self, error_message):
462
+ """Handle errors during audio loading."""
463
+ QMessageBox.critical(self, "Audio Load Error", f"Failed to load audio file:\n{error_message}")
464
+
465
+ # Re-enable the load_audio_button
466
+ load_audio_button = self.findChild(QPushButton, "Load Audio")
467
+ if load_audio_button:
468
+ load_audio_button.setEnabled(True)
469
+
470
+ # Update status bar
471
+ self.statusBar().showMessage("Failed to load audio.", 5000)
472
+
473
+ def toggle_playback(self):
474
+ """Toggle audio playback between play and pause."""
475
+ if self.audio_segment is None:
476
+ QMessageBox.warning(self, "No Audio", "Please load an audio file first.")
477
+ return
478
+
479
+ if self.is_playing:
480
+ self.pause_playback()
481
+ else:
482
+ self.start_playback()
483
+
484
+ def start_playback(self):
485
+ """Start audio playback from the current_time."""
486
+ if self.audio_segment is not None:
487
+ try:
488
+ # Slice the AudioSegment from current_time
489
+ start_ms = int(self.current_time * 1000) # Convert to milliseconds
490
+ sliced_audio = self.audio_segment[start_ms:]
491
+
492
+ # Play the sliced audio
493
+ self.play_obj = play_audio(sliced_audio)
494
+ self.is_playing = True
495
+ self.play_button.setText("Pause")
496
+
497
+ # Start a timer to update current_time
498
+ self.playback_timer = QTimer()
499
+ self.playback_timer.timeout.connect(self.update_current_time)
500
+ self.playback_timer.start(100) # Update every 100 ms
501
+ except Exception as e:
502
+ QMessageBox.critical(self, "Playback Error", f"Failed to play audio:\n{str(e)}")
503
+
504
+ def pause_playback(self):
505
+ """Pause audio playback."""
506
+ if self.play_obj:
507
+ self.play_obj.stop()
508
+ self.play_obj = None
509
+ self.is_playing = False
510
+ self.play_button.setText("Play")
511
+
512
+ # Stop the playback timer
513
+ if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
514
+ self.playback_timer.stop()
515
+
516
+ def stop_playback(self):
517
+ """Stop audio playback and reset playback position."""
518
+ if self.play_obj:
519
+ self.play_obj.stop()
520
+ self.play_obj = None
521
+ self.is_playing = False
522
+ self.play_button.setText("Play")
523
+ self.playtime_line.setValue(0)
524
+ self.current_time = 0.0
525
+
526
+ # Stop the playback timer
527
+ if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
528
+ self.playback_timer.stop()
529
+
530
+ def update_current_time(self):
531
+ """Update the current playback time."""
532
+ self.current_time += 0.1 # Increment by 100 ms
533
+
534
+ if self.current_time >= self.duration:
535
+ self.stop_playback()
536
+ return
537
+
538
+ # Update the playtime indicator
539
+ self.playtime_line.setValue(round(self.current_time, 2))
540
+
541
+ # Adjust view range
542
+ self.adjust_view_range()
543
+
544
+ # Update scrollbar position
545
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
546
+
547
+ # Highlight the current row in the transcript
548
+ self.highlight_current_row()
549
+
550
+ def add_below(self):
551
+ """Add a new annotation below the selected row or at current playback time."""
552
+ selected_rows = self.table.selectionModel().selectedRows()
553
+ if selected_rows:
554
+ selected_row = selected_rows[-1].row() # Get the last selected row
555
+ try:
556
+ base_start = float(self.table.item(selected_row, 1).text()) # End time of selected row
557
+ except (ValueError, AttributeError):
558
+ base_start = 0.0
559
+ new_start = round(base_start, 2)
560
+ new_end = round(new_start + 1.0, 2) # Default duration 1 second
561
+ insert_position = selected_row + 1
562
+ else:
563
+ new_start = round(self.current_time, 2)
564
+ new_end = round(new_start + 1.0, 2)
565
+ insert_position = self.table.rowCount()
566
+
567
+ # Ensure new_end does not exceed audio duration
568
+ if self.audio_segment is not None and new_end > self.duration:
569
+ new_end = round(self.duration, 2)
570
+
571
+ row_data = {
572
+ 'start_time': new_start,
573
+ 'end_time': new_end,
574
+ 'word': "",
575
+ 'speaker': ""
576
+ }
577
+
578
+ command = AddRowCommand(self, insert_position, row_data)
579
+ self.undo_stack.push(command)
580
+
581
+ def delete_selected(self):
582
+ """Delete the selected annotation rows."""
583
+ selected_rows = sorted(set(index.row() for index in self.table.selectionModel().selectedRows()), reverse=True)
584
+ if not selected_rows:
585
+ QMessageBox.information(self, "No Selection", "Please select at least one row to delete.")
586
+ return
587
+
588
+ # Gather data
589
+ rows_data = []
590
+ for row in selected_rows:
591
+ row_data = {
592
+ 'start_time': self.table.item(row, 0).text(),
593
+ 'end_time': self.table.item(row, 1).text(),
594
+ 'word': self.table.item(row, 2).text(),
595
+ 'speaker': self.table.cellWidget(row, 3).currentText()
596
+ }
597
+ rows_data.append(row_data)
598
+
599
+ confirm = QMessageBox.question(
600
+ self,
601
+ "Confirm Deletion",
602
+ f"Are you sure you want to delete {len(selected_rows)} selected row(s)?",
603
+ QMessageBox.Yes | QMessageBox.No
604
+ )
605
+ if confirm == QMessageBox.Yes:
606
+ command = DeleteRowsCommand(self, rows_data, selected_rows)
607
+ self.undo_stack.push(command)
608
+
609
+ def bulk_edit_speaker(self):
610
+ """Bulk edit the speaker field for selected rows."""
611
+ selected_rows = sorted(set(index.row() for index in self.table.selectionModel().selectedRows()))
612
+ if not selected_rows:
613
+ QMessageBox.information(self, "No Selection", "Please select at least one row to edit.")
614
+ return
615
+
616
+ # Prompt user to select a speaker
617
+ speaker, ok = QInputDialog.getItem(
618
+ self,
619
+ "Select Speaker",
620
+ "Choose a speaker to assign to selected rows:",
621
+ self.speakers + [""],
622
+ 0,
623
+ False
624
+ )
625
+
626
+ if ok:
627
+ # Gather old speakers
628
+ old_speakers = [self.table.cellWidget(row, 3).currentText() for row in selected_rows]
629
+ command = BulkEditSpeakerCommand(self, selected_rows, old_speakers, speaker)
630
+ self.undo_stack.push(command)
631
+
632
+ # Update speakers list if necessary
633
+ if speaker and speaker not in self.speakers:
634
+ self.speakers.append(speaker)
635
+ self.update_speaker_dropdowns()
636
+
637
+ def autosave(self):
638
+ """Automatically save annotations to a temporary file."""
639
+ annotations = []
640
+ for row_idx in range(self.table.rowCount()):
641
+ try:
642
+ start = float(self.table.item(row_idx, 0).text())
643
+ end = float(self.table.item(row_idx, 1).text())
644
+ start = round(start, 2)
645
+ end = round(end, 2)
646
+ except (ValueError, AttributeError):
647
+ continue # Skip invalid rows
648
+
649
+ word = self.table.item(row_idx, 2).text()
650
+ speaker = self.table.cellWidget(row_idx, 3).currentText()
651
+
652
+ annotations.append({
653
+ "start_time": start,
654
+ "end_time": end,
655
+ "word": word,
656
+ "speaker": speaker if speaker else None
657
+ })
658
+
659
+ try:
660
+ with open(self.temp_file_path, 'w') as f:
661
+ json.dump(annotations, f, indent=4)
662
+ print(f"Autosaved annotations to {self.temp_file_path}")
663
+ except Exception as e:
664
+ # Optionally, log the error or notify the user
665
+ print(f"Autosave failed: {e}")
666
+
667
+ def load_autosave(self):
668
+ """Load annotations from the autosave file if it exists."""
669
+ if os.path.exists(self.temp_file_path):
670
+ try:
671
+ with open(self.temp_file_path, "r") as file:
672
+ annotations = json.load(file)
673
+ self.populate_table(annotations)
674
+ QMessageBox.information(self, "Recovery", "Recovered annotations from autosave.")
675
+ except Exception as e:
676
+ print(f"Failed to recover autosave: {e}")
677
+
678
+ def closeEvent(self, event):
679
+ """Handle the application close event."""
680
+ reply = QMessageBox.question(
681
+ self, 'Exit',
682
+ "Do you want to save your annotations before exiting?",
683
+ QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel,
684
+ QMessageBox.Yes
685
+ )
686
+
687
+ if reply == QMessageBox.Yes:
688
+ if not self.validate_annotations():
689
+ event.ignore()
690
+ return
691
+ self.save_annotations()
692
+ # Delete temp file after saving
693
+ if os.path.exists(self.temp_file_path):
694
+ os.remove(self.temp_file_path)
695
+ event.accept()
696
+ elif reply == QMessageBox.No:
697
+ # Delete temp file without saving
698
+ if os.path.exists(self.temp_file_path):
699
+ os.remove(self.temp_file_path)
700
+ event.accept()
701
+ else:
702
+ event.ignore()
703
+
704
+ def on_cell_double_clicked(self, row, column):
705
+ """Store the old value before editing."""
706
+ item = self.table.item(row, column)
707
+ if item:
708
+ self.old_values[(row, column)] = item.text()
709
+
710
+ def on_item_changed(self, item):
711
+ """Handle cell edits and push EditCellCommand to the undo stack."""
712
+ if self.updating:
713
+ return
714
+
715
+ row = item.row()
716
+ column = item.column()
717
+ key = (row, column)
718
+
719
+ old_value = self.old_values.get(key, "")
720
+ new_value = item.text()
721
+
722
+ if old_value != new_value:
723
+ # Validate input if necessary
724
+ if column in [0, 1]: # Start or End times
725
+ try:
726
+ float(new_value)
727
+ except ValueError:
728
+ QMessageBox.warning(self, "Invalid Input", "Start and End times must be numeric.")
729
+ self.updating = True
730
+ item.setText(old_value)
731
+ self.updating = False
732
+ return
733
+
734
+ command = EditCellCommand(self, row, column, old_value, new_value)
735
+ self.undo_stack.push(command)
736
+
737
+ # Remove the old value from the dict
738
+ if key in self.old_values:
739
+ del self.old_values[key]
740
+
741
+ def insert_row(self, row_position, row_data):
742
+ """Insert a row at the specified position with the provided data."""
743
+ self.updating = True
744
+ self.table.insertRow(row_position)
745
+
746
+ # Round start and end times to two decimals
747
+ try:
748
+ start_time = round(float(row_data.get('start_time', 0.0)), 2)
749
+ end_time = round(float(row_data.get('end_time', 1.0)), 2)
750
+ except (ValueError, KeyError):
751
+ start_time = 0.00
752
+ end_time = 1.00
753
+
754
+ word = row_data.get('word', "")
755
+ speaker = row_data.get('speaker', "")
756
+
757
+ if speaker and speaker not in self.speakers:
758
+ self.speakers.append(speaker)
759
+ self.update_speaker_dropdowns()
760
+
761
+ start_item = QTableWidgetItem(f"{start_time:.2f}")
762
+ end_item = QTableWidgetItem(f"{end_time:.2f}")
763
+ word_item = QTableWidgetItem(word)
764
+
765
+ # Align numbers to center
766
+ start_item.setTextAlignment(Qt.AlignCenter)
767
+ end_item.setTextAlignment(Qt.AlignCenter)
768
+ word_item.setTextAlignment(Qt.AlignLeft | Qt.AlignVCenter)
769
+
770
+ self.table.setItem(row_position, 0, start_item)
771
+ self.table.setItem(row_position, 1, end_item)
772
+ self.table.setItem(row_position, 2, word_item)
773
+
774
+ # Dropdown for speaker selection
775
+ speaker_dropdown = QComboBox()
776
+ speaker_dropdown.addItems(self.speakers + [""])
777
+ speaker_dropdown.setCurrentText(speaker)
778
+ speaker_dropdown.currentTextChanged.connect(self.on_speaker_changed)
779
+ self.table.setCellWidget(row_position, 3, speaker_dropdown)
780
+
781
+ self.updating = False
782
+
783
+ def remove_row(self, row_position):
784
+ """Remove the row at the specified position."""
785
+ self.updating = True
786
+ self.table.removeRow(row_position)
787
+ self.updating = False
788
+
789
+ def set_cell(self, row, column, value):
790
+ """Set the value of a specific cell."""
791
+ self.updating = True
792
+ if column in [0, 1]: # Start or End times
793
+ try:
794
+ float_val = float(value)
795
+ value = f"{float_val:.2f}"
796
+ except ValueError:
797
+ pass # Optionally, handle invalid input
798
+ item = self.table.item(row, column)
799
+ if item:
800
+ item.setText(value)
801
+ self.updating = False
802
+
803
+ def set_speaker(self, row, speaker):
804
+ """Set the speaker for a specific row."""
805
+ self.updating = True
806
+ speaker_dropdown = self.table.cellWidget(row, 3)
807
+ if speaker_dropdown:
808
+ speaker_dropdown.setCurrentText(speaker)
809
+ self.updating = False
810
+
811
+ if speaker and speaker not in self.speakers:
812
+ self.speakers.append(speaker)
813
+ self.update_speaker_dropdowns()
814
+
815
+ def on_waveform_clicked(self, event):
816
+ """Handle clicks on the waveform plot to seek playback."""
817
+ if self.audio_segment is None:
818
+ return
819
+
820
+ pos = event.scenePos()
821
+ if not self.waveform_plot.sceneBoundingRect().contains(pos):
822
+ return
823
+
824
+ mouse_point = self.waveform_plot.getPlotItem().vb.mapSceneToView(pos)
825
+ clicked_time = mouse_point.x()
826
+
827
+ # Clamp the clicked_time to the duration
828
+ clicked_time = max(0.0, min(clicked_time, self.duration))
829
+
830
+ self.current_time = clicked_time
831
+ self.playtime_line.setValue(round(self.current_time, 2))
832
+
833
+ # Adjust view range
834
+ self.adjust_view_range()
835
+
836
+ # Update scrollbar position
837
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
838
+
839
+ # Highlight the corresponding row in the transcript
840
+ self.highlight_current_row()
841
+
842
+ if self.is_playing:
843
+ self.pause_playback()
844
+ self.start_playback()
845
+
846
+ def on_table_clicked(self, row, column):
847
+ """Handle clicks on the transcript table."""
848
+ start_item = self.table.item(row, 0)
849
+ if start_item:
850
+ try:
851
+ start_time = float(start_item.text())
852
+ self.current_time = start_time
853
+ self.playtime_line.setValue(round(self.current_time, 2))
854
+ self.highlight_current_row()
855
+ self.adjust_view_range()
856
+ # Update scrollbar position
857
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
858
+ # Playback is not started automatically
859
+ except ValueError:
860
+ pass
861
+
862
+ def on_selection_changed(self, selected, deselected):
863
+ """Handle changes in table selection."""
864
+ # Remove existing green lines
865
+ for line in getattr(self, 'word_lines', []):
866
+ self.waveform_plot.removeItem(line)
867
+ self.word_lines = []
868
+
869
+ # For each selected row, add start and end lines
870
+ selected_rows = sorted(set(index.row() for index in self.table.selectionModel().selectedRows()))
871
+ if selected_rows:
872
+ # Adjust view to show selected word(s)
873
+ first_row = selected_rows[0]
874
+ start_item = self.table.item(first_row, 0)
875
+ end_item = self.table.item(first_row, 1)
876
+ if start_item and end_item:
877
+ try:
878
+ start_time = float(start_item.text())
879
+ end_time = float(end_item.text())
880
+ self.current_time = start_time
881
+ self.adjust_view_range(start_time, end_time)
882
+ self.playtime_line.setValue(round(self.current_time, 2))
883
+ self.waveform_scrollbar.setValue(int(self.current_time * 1000))
884
+ except ValueError:
885
+ pass
886
+
887
+ for row in selected_rows:
888
+ start_item = self.table.item(row, 0)
889
+ end_item = self.table.item(row, 1)
890
+ if start_item and end_item:
891
+ try:
892
+ start_time = float(start_item.text())
893
+ end_time = float(end_item.text())
894
+ # Add vertical lines at start_time and end_time
895
+ start_line = self.waveform_plot.addLine(x=start_time, pen=pg.mkPen('g', width=1))
896
+ end_line = self.waveform_plot.addLine(x=end_time, pen=pg.mkPen('g', width=1))
897
+ self.word_lines.extend([start_line, end_line])
898
+ except ValueError:
899
+ continue
900
+
901
+ def get_current_row(self):
902
+ """Find the current row based on self.current_time."""
903
+ for row in range(self.table.rowCount()):
904
+ start_item = self.table.item(row, 0)
905
+ end_item = self.table.item(row, 1)
906
+ if start_item and end_item:
907
+ try:
908
+ start_time = float(start_item.text())
909
+ end_time = float(end_item.text())
910
+ if start_time <= self.current_time < end_time:
911
+ return row
912
+ except ValueError:
913
+ continue
914
+ return -1
915
+
916
+ def highlight_current_row(self):
917
+ """Highlight the current row in the transcript based on playback position."""
918
+ current_row = self.get_current_row()
919
+ for row in range(self.table.rowCount()):
920
+ for column in range(self.table.columnCount()):
921
+ item = self.table.item(row, column)
922
+ if item:
923
+ if row == current_row:
924
+ item.setBackground(QColor("blue"))
925
+ else:
926
+ item.setBackground(QColor("black"))
927
+
928
+ if current_row != -1:
929
+ # Scroll to the current row
930
+ self.table.scrollToItem(self.table.item(current_row, 0), QAbstractItemView.PositionAtCenter)
931
+
932
+ def adjust_view_range(self, start=None, end=None):
933
+ """Adjust the waveform plot's X-axis range."""
934
+ if start is None or end is None:
935
+ window_size = 5.0 # 5 seconds
936
+ half_window = window_size / 2.0
937
+ start = max(0.0, self.current_time - half_window)
938
+ end = min(self.duration, self.current_time + half_window)
939
+ self.waveform_plot.setXRange(start, end, padding=0)
940
+
941
+ def on_scrollbar_moved(self, value):
942
+ """Handle scrollbar movement to adjust waveform view."""
943
+ self.current_time = value / 1000.0 # Convert milliseconds to seconds
944
+ self.playtime_line.setValue(round(self.current_time, 2))
945
+ self.adjust_view_range()
946
+ self.highlight_current_row()
947
+
948
+ def show_context_menu(self, position):
949
+ """Show context menu for the transcript table."""
950
+ menu = QMenu()
951
+
952
+ add_below_action = QAction("Add Below", self)
953
+ add_below_action.triggered.connect(self.add_below)
954
+ menu.addAction(add_below_action)
955
+
956
+ delete_selected_action = QAction("Delete Selected", self)
957
+ delete_selected_action.triggered.connect(self.delete_selected)
958
+ menu.addAction(delete_selected_action)
959
+
960
+ bulk_edit_action = QAction("Bulk Edit Speaker", self)
961
+ bulk_edit_action.triggered.connect(self.bulk_edit_speaker)
962
+ menu.addAction(bulk_edit_action)
963
+
964
+ menu.exec_(self.table.viewport().mapToGlobal(position))
965
+
966
+ def validate_annotations(self):
967
+ """Validate that annotations do not overlap and start times are less than end times."""
968
+ sorted_rows = sorted(range(self.table.rowCount()), key=lambda r: float(self.table.item(r, 0).text()) if self.table.item(r, 0).text() else 0.0)
969
+ for i in range(len(sorted_rows)):
970
+ row = sorted_rows[i]
971
+ start_item = self.table.item(row, 0)
972
+ end_item = self.table.item(row, 1)
973
+ if not start_item or not end_item:
974
+ QMessageBox.warning(self, "Invalid Annotation", f"Missing start or end time at row {row + 1}.")
975
+ return False
976
+ try:
977
+ start_time = float(start_item.text())
978
+ end_time = float(end_item.text())
979
+ if start_time >= end_time:
980
+ QMessageBox.warning(self, "Invalid Annotation", f"Start time must be less than end time at row {row + 1}.")
981
+ return False
982
+ if i < len(sorted_rows) - 1:
983
+ next_row = sorted_rows[i + 1]
984
+ next_start = float(self.table.item(next_row, 0).text())
985
+ if end_time > next_start:
986
+ QMessageBox.warning(
987
+ self, "Invalid Annotation",
988
+ f"Annotations at rows {row + 1} and {next_row + 1} overlap."
989
+ )
990
+ return False
991
+ except ValueError:
992
+ QMessageBox.warning(self, "Invalid Annotation", f"Non-numeric start or end time at row {row + 1}.")
993
+ return False
994
+ return True
995
+
996
+
997
+ if __name__ == "__main__":
998
+ app = QApplication(sys.argv)
999
+ window = AnnotationTool()
1000
+ window.show()
1001
+ sys.exit(app.exec_())