pelican-nlp 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pelican_nlp/Nils_backup/__init__.py +0 -0
- pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
- pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
- pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
- pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
- pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
- pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
- pelican_nlp/Nils_backup/fluency/config.py +231 -0
- pelican_nlp/Nils_backup/fluency/main.py +182 -0
- pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
- pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
- pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
- pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
- pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
- pelican_nlp/Nils_backup/fluency/utils.py +41 -0
- pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
- pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
- pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
- pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
- pelican_nlp/Nils_backup/transcription/test.json +1 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
- pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
- pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
- pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
- pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
- pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
- pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
- pelican_nlp/__init__.py +1 -1
- pelican_nlp/_version.py +1 -0
- pelican_nlp/configuration_files/config_audio.yml +150 -0
- pelican_nlp/configuration_files/config_discourse.yml +104 -0
- pelican_nlp/configuration_files/config_fluency.yml +108 -0
- pelican_nlp/configuration_files/config_general.yml +131 -0
- pelican_nlp/configuration_files/config_morteza.yml +103 -0
- pelican_nlp/praat/__init__.py +29 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/METADATA +4 -3
- pelican_nlp-0.1.2.dist-info/RECORD +75 -0
- pelican_nlp-0.1.1.dist-info/RECORD +0 -39
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/WHEEL +0 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1122 @@
|
|
1
|
+
import sys
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
import tempfile
|
5
|
+
import numpy as np
|
6
|
+
import librosa
|
7
|
+
import time
|
8
|
+
|
9
|
+
from PyQt5.QtWidgets import (
|
10
|
+
QApplication, QMainWindow, QWidget, QTableWidget, QTableWidgetItem,
|
11
|
+
QVBoxLayout, QHBoxLayout, QPushButton, QComboBox, QFileDialog, QMessageBox,
|
12
|
+
QInputDialog, QMenu, QAction, QAbstractItemView, QSplitter, QUndoStack, QUndoCommand, QScrollBar
|
13
|
+
)
|
14
|
+
from PyQt5.QtCore import Qt, QTimer, QObject, pyqtSignal, QThread
|
15
|
+
from PyQt5.QtGui import QColor
|
16
|
+
|
17
|
+
import pyqtgraph as pg
|
18
|
+
from pydub import AudioSegment
|
19
|
+
from pydub.playback import _play_with_simpleaudio as play_audio
|
20
|
+
|
21
|
+
# --- Undo/Redo Command Classes ---
|
22
|
+
|
23
|
+
class AddRowCommand(QUndoCommand):
|
24
|
+
def __init__(self, main_window, row_position, row_data, description="Add Row"):
|
25
|
+
super().__init__(description)
|
26
|
+
self.main_window = main_window
|
27
|
+
self.row_position = row_position
|
28
|
+
self.row_data = row_data
|
29
|
+
|
30
|
+
def redo(self):
|
31
|
+
self.main_window.insert_row(self.row_position, self.row_data)
|
32
|
+
|
33
|
+
def undo(self):
|
34
|
+
self.main_window.remove_row(self.row_position)
|
35
|
+
|
36
|
+
|
37
|
+
class DeleteRowsCommand(QUndoCommand):
|
38
|
+
def __init__(self, main_window, rows_data, row_positions, description="Delete Rows"):
|
39
|
+
super().__init__(description)
|
40
|
+
self.main_window = main_window
|
41
|
+
self.rows_data = rows_data
|
42
|
+
self.row_positions = row_positions
|
43
|
+
|
44
|
+
def redo(self):
|
45
|
+
for row in sorted(self.row_positions, reverse=True):
|
46
|
+
self.main_window.remove_row(row)
|
47
|
+
|
48
|
+
def undo(self):
|
49
|
+
for row, data in sorted(zip(self.row_positions, self.rows_data)):
|
50
|
+
self.main_window.insert_row(row, data)
|
51
|
+
|
52
|
+
|
53
|
+
class EditCellCommand(QUndoCommand):
|
54
|
+
def __init__(self, main_window, row, column, old_value, new_value, description="Edit Cell"):
|
55
|
+
super().__init__(description)
|
56
|
+
self.main_window = main_window
|
57
|
+
self.row = row
|
58
|
+
self.column = column
|
59
|
+
self.old_value = old_value
|
60
|
+
self.new_value = new_value
|
61
|
+
|
62
|
+
def redo(self):
|
63
|
+
self.main_window.set_cell(self.row, self.column, self.new_value)
|
64
|
+
|
65
|
+
def undo(self):
|
66
|
+
self.main_window.set_cell(self.row, self.column, self.old_value)
|
67
|
+
|
68
|
+
|
69
|
+
class BulkEditSpeakerCommand(QUndoCommand):
|
70
|
+
def __init__(self, main_window, row_positions, old_speakers, new_speaker, description="Bulk Edit Speaker"):
|
71
|
+
super().__init__(description)
|
72
|
+
self.main_window = main_window
|
73
|
+
self.row_positions = row_positions
|
74
|
+
self.old_speakers = old_speakers
|
75
|
+
self.new_speaker = new_speaker
|
76
|
+
|
77
|
+
def redo(self):
|
78
|
+
for row in self.row_positions:
|
79
|
+
self.main_window.set_speaker(row, self.new_speaker)
|
80
|
+
|
81
|
+
def undo(self):
|
82
|
+
for row, speaker in zip(self.row_positions, self.old_speakers):
|
83
|
+
self.main_window.set_speaker(row, speaker)
|
84
|
+
|
85
|
+
|
86
|
+
# --- Audio Loader for Asynchronous Loading ---
|
87
|
+
|
88
|
+
class AudioLoader(QObject):
|
89
|
+
finished = pyqtSignal(np.ndarray, int)
|
90
|
+
error = pyqtSignal(str)
|
91
|
+
|
92
|
+
def __init__(self, file_path, downsample_factor=100):
|
93
|
+
super().__init__()
|
94
|
+
self.file_path = file_path
|
95
|
+
self.downsample_factor = downsample_factor
|
96
|
+
|
97
|
+
def run(self):
|
98
|
+
try:
|
99
|
+
# Load audio using librosa for consistent sampling rate
|
100
|
+
y, sr = librosa.load(self.file_path, sr=None, mono=True)
|
101
|
+
duration = librosa.get_duration(y=y, sr=sr)
|
102
|
+
samples = y
|
103
|
+
|
104
|
+
# Normalize samples
|
105
|
+
max_abs_sample = np.max(np.abs(samples))
|
106
|
+
samples = samples / max_abs_sample if max_abs_sample != 0 else samples
|
107
|
+
|
108
|
+
# Downsample if necessary
|
109
|
+
if len(samples) > 1_000_000:
|
110
|
+
samples = self.downsample_waveform(samples, self.downsample_factor)
|
111
|
+
|
112
|
+
self.finished.emit(samples, sr)
|
113
|
+
except Exception as e:
|
114
|
+
self.error.emit(str(e))
|
115
|
+
|
116
|
+
def downsample_waveform(self, samples, factor):
|
117
|
+
num_blocks = len(samples) // factor
|
118
|
+
return np.array([samples[i * factor:(i + 1) * factor].mean() for i in range(num_blocks)])
|
119
|
+
|
120
|
+
|
121
|
+
# --- Draggable Line Class for pyqtgraph ---
|
122
|
+
|
123
|
+
class DraggableLine(pg.InfiniteLine):
|
124
|
+
def __init__(self, pos, color, idx, boundary_type, pen=None, movable=True):
|
125
|
+
pen = pen or pg.mkPen(color=color, width=2)
|
126
|
+
super().__init__(pos=pos, angle=90, pen=pen, movable=movable)
|
127
|
+
self.idx = idx
|
128
|
+
self.boundary_type = boundary_type
|
129
|
+
self.setHoverPen(pen.color().lighter())
|
130
|
+
self.setCursor(Qt.SizeHorCursor)
|
131
|
+
|
132
|
+
|
133
|
+
# --- WaveformCanvas Class Using pyqtgraph ---
|
134
|
+
|
135
|
+
class WaveformCanvas(QWidget):
|
136
|
+
boundary_changed = pyqtSignal(int, str, float) # idx, 'start'/'end', new position
|
137
|
+
waveform_clicked = pyqtSignal(float)
|
138
|
+
audio_loaded = pyqtSignal()
|
139
|
+
loading_error = pyqtSignal(str)
|
140
|
+
|
141
|
+
def __init__(self, parent=None):
|
142
|
+
super().__init__(parent)
|
143
|
+
self.layout = QVBoxLayout(self)
|
144
|
+
self.plot_widget = pg.PlotWidget()
|
145
|
+
self.plot_widget.setYRange(-1, 1)
|
146
|
+
self.plot_widget.showGrid(x=True, y=False)
|
147
|
+
self.plot_widget.setLabel('bottom', 'Time', 's')
|
148
|
+
self.layout.addWidget(self.plot_widget)
|
149
|
+
|
150
|
+
self.scrollbar = QScrollBar(Qt.Horizontal)
|
151
|
+
self.layout.addWidget(self.scrollbar)
|
152
|
+
self.scrollbar.valueChanged.connect(self.on_scrollbar_value_changed)
|
153
|
+
|
154
|
+
self.plot_widget.plotItem.vb.sigXRangeChanged.connect(self.on_x_range_changed)
|
155
|
+
|
156
|
+
self.words = []
|
157
|
+
self.lines = []
|
158
|
+
self.connecting_lines = []
|
159
|
+
|
160
|
+
self.dragging_line = None
|
161
|
+
|
162
|
+
self.utterances = []
|
163
|
+
self.utterance_items = []
|
164
|
+
self.utterance_regions = []
|
165
|
+
|
166
|
+
self.plot_widget.scene().sigMouseClicked.connect(self.on_waveform_click)
|
167
|
+
|
168
|
+
self.audio_data = None
|
169
|
+
self.sr = None
|
170
|
+
self.duration = None
|
171
|
+
self.window_size = 5.0
|
172
|
+
|
173
|
+
def load_audio(self, file_path):
|
174
|
+
self.thread = QThread()
|
175
|
+
self.loader = AudioLoader(file_path)
|
176
|
+
self.loader.moveToThread(self.thread)
|
177
|
+
self.thread.started.connect(self.loader.run)
|
178
|
+
self.loader.finished.connect(self.on_audio_loaded)
|
179
|
+
self.loader.finished.connect(self.thread.quit)
|
180
|
+
self.loader.finished.connect(self.loader.deleteLater)
|
181
|
+
self.thread.finished.connect(self.thread.deleteLater)
|
182
|
+
self.loader.error.connect(self.on_loading_error)
|
183
|
+
self.thread.start()
|
184
|
+
|
185
|
+
def on_audio_loaded(self, samples, sr):
|
186
|
+
self.audio_data = samples
|
187
|
+
self.sr = sr
|
188
|
+
effective_sr = sr / self.loader.downsample_factor
|
189
|
+
self.duration = len(samples) / effective_sr
|
190
|
+
t = np.linspace(0, self.duration, num=len(samples))
|
191
|
+
|
192
|
+
self.plot_widget.clear()
|
193
|
+
self.plot_widget.plot(t, samples, pen='b')
|
194
|
+
self.playtime_line = pg.InfiniteLine(pos=0, angle=90, pen=pg.mkPen('y', width=4))
|
195
|
+
self.playtime_line.setZValue(1000)
|
196
|
+
self.plot_widget.addItem(self.playtime_line)
|
197
|
+
self.plot_widget.setLimits(xMin=0, xMax=self.duration)
|
198
|
+
self.plot_widget.setXRange(0, min(self.window_size, self.duration))
|
199
|
+
self.plot_widget.setLabel('bottom', 'Time', 's')
|
200
|
+
self.draw_lines()
|
201
|
+
self.boundary_changed.emit(-1, '', 0.0) # Reset
|
202
|
+
print("Audio loaded successfully.")
|
203
|
+
print(f"Duration: {self.duration}, Window Size: {self.window_size}")
|
204
|
+
self.scrollbar.setMinimum(0)
|
205
|
+
self.scrollbar.setMaximum(int(self.duration * 1000))
|
206
|
+
self.scrollbar.setSingleStep(100)
|
207
|
+
self.scrollbar.setPageStep(int(self.window_size * 1000))
|
208
|
+
self.scrollbar.setValue(0)
|
209
|
+
|
210
|
+
self.audio_loaded.emit()
|
211
|
+
|
212
|
+
def load_utterances(self, utterances):
|
213
|
+
self.utterances = utterances
|
214
|
+
self.draw_utterances()
|
215
|
+
|
216
|
+
def draw_utterances(self):
|
217
|
+
self.clear_utterance_items()
|
218
|
+
self.utterance_items = []
|
219
|
+
self.utterance_regions = []
|
220
|
+
|
221
|
+
speaker_colors = {
|
222
|
+
"SPEAKER_00": QColor(255, 200, 200, 100), # Light red
|
223
|
+
"SPEAKER_01": QColor(200, 255, 200, 100), # Light green
|
224
|
+
"SPEAKER_02": QColor(200, 200, 255, 100), # Light blue
|
225
|
+
"UNKNOWN": QColor(200, 200, 200, 100),
|
226
|
+
"": QColor(200, 200, 200, 100), # Light gray
|
227
|
+
}
|
228
|
+
|
229
|
+
for idx, utterance in enumerate(self.utterances):
|
230
|
+
start = float(utterance['start_time'])
|
231
|
+
end = float(utterance['end_time'])
|
232
|
+
speaker = utterance.get('speaker', '')
|
233
|
+
confidence = utterance.get('confidence', '')
|
234
|
+
color = speaker_colors.get(speaker, QColor(200, 200, 200, 100))
|
235
|
+
|
236
|
+
# Add background region for utterance
|
237
|
+
region = pg.LinearRegionItem(values=[start+0.01, end-0.01], brush=color)
|
238
|
+
region.setMovable(False)
|
239
|
+
self.plot_widget.addItem(region)
|
240
|
+
self.utterance_regions.append(region)
|
241
|
+
|
242
|
+
# Break utterance text into words
|
243
|
+
text = utterance.get('text', '')
|
244
|
+
words = text.strip().split()
|
245
|
+
|
246
|
+
num_words = len(words)
|
247
|
+
if num_words == 0:
|
248
|
+
continue
|
249
|
+
|
250
|
+
# Add a label for utterance metadata (e.g., speaker, index, duration)
|
251
|
+
label_text = f"Utterance: {idx + 1}, Speaker: {speaker}, Speaker Confidence: {confidence}, Duration: {round(end - start, 2)}s"
|
252
|
+
meta_label = pg.TextItem(label_text, anchor=(0.5, 0), color='yellow')
|
253
|
+
meta_label.setPos((start + end) / 2, -0.25) # Centered above the utterance
|
254
|
+
self.plot_widget.addItem(meta_label)
|
255
|
+
self.utterance_items.append(meta_label)
|
256
|
+
|
257
|
+
word_times = np.linspace(start, end, num_words+2)
|
258
|
+
|
259
|
+
for word, word_time in zip(words, word_times[1:-1]):
|
260
|
+
# Plot the word as a text label at y=-0.5
|
261
|
+
label = pg.TextItem(word, anchor=(0.5, 0), color='white')
|
262
|
+
label.setPos(word_time, -0.5)
|
263
|
+
self.plot_widget.addItem(label)
|
264
|
+
self.utterance_items.append(label)
|
265
|
+
|
266
|
+
def clear_utterance_items(self):
|
267
|
+
if hasattr(self, 'utterance_items'):
|
268
|
+
for item in self.utterance_items:
|
269
|
+
self.plot_widget.removeItem(item)
|
270
|
+
self.utterance_items = []
|
271
|
+
if hasattr(self, 'utterance_regions'):
|
272
|
+
for region in self.utterance_regions:
|
273
|
+
self.plot_widget.removeItem(region)
|
274
|
+
self.utterance_regions = []
|
275
|
+
|
276
|
+
def on_loading_error(self, error_message):
|
277
|
+
self.loading_error.emit(error_message)
|
278
|
+
|
279
|
+
def load_words(self, words):
|
280
|
+
self.words = words
|
281
|
+
self.draw_lines()
|
282
|
+
|
283
|
+
def clear_lines(self):
|
284
|
+
for line in self.lines:
|
285
|
+
self.plot_widget.removeItem(line['line'])
|
286
|
+
self.lines = []
|
287
|
+
for cline in self.connecting_lines:
|
288
|
+
self.plot_widget.removeItem(cline)
|
289
|
+
self.connecting_lines = []
|
290
|
+
|
291
|
+
def draw_lines(self):
|
292
|
+
self.clear_lines()
|
293
|
+
for idx, word in enumerate(self.words):
|
294
|
+
|
295
|
+
if idx % 2 == 0:
|
296
|
+
y_pos_line = 0.55
|
297
|
+
else:
|
298
|
+
y_pos_line = 0.45
|
299
|
+
|
300
|
+
### I adjust the line positions slightly because otherwise start and endlines of consecutive words ###
|
301
|
+
start_line = DraggableLine(pos=(word['start']+0.005), color='green', idx=idx, boundary_type='start')
|
302
|
+
end_line = DraggableLine(pos=(word['end']-0.005), color='red', idx=idx, boundary_type='end')
|
303
|
+
self.plot_widget.addItem(start_line)
|
304
|
+
self.plot_widget.addItem(end_line)
|
305
|
+
self.lines.append({'line': start_line, 'idx': idx, 'type': 'start'})
|
306
|
+
self.lines.append({'line': end_line, 'idx': idx, 'type': 'end'})
|
307
|
+
|
308
|
+
# Connecting line at y=0.5
|
309
|
+
connecting_line = pg.PlotCurveItem(
|
310
|
+
[word['start'] +0.005, word['end']-0.005],
|
311
|
+
[y_pos_line, y_pos_line], # Position the line at y=0.5
|
312
|
+
pen=pg.mkPen('blue', width=2),
|
313
|
+
)
|
314
|
+
self.plot_widget.addItem(connecting_line)
|
315
|
+
|
316
|
+
# Create arrowheads
|
317
|
+
start_arrow = self.create_arrow(word['start'] + 0.005, y_pos_line, 0)
|
318
|
+
end_arrow = self.create_arrow(word['end'] - 0.005, y_pos_line, 180)
|
319
|
+
|
320
|
+
# Create label
|
321
|
+
label = pg.TextItem(word['word'], anchor=(0.5, 0), color='white')
|
322
|
+
self.plot_widget.addItem(label)
|
323
|
+
|
324
|
+
# Store all items in the connecting_lines list
|
325
|
+
self.connecting_lines.append({
|
326
|
+
"line": connecting_line,
|
327
|
+
"start_arrow": start_arrow,
|
328
|
+
"end_arrow": end_arrow,
|
329
|
+
"label": label,
|
330
|
+
})
|
331
|
+
|
332
|
+
# Position label initially
|
333
|
+
self.update_connecting_line(idx)
|
334
|
+
|
335
|
+
# Connect signals to update arrows and labels
|
336
|
+
start_line.sigPositionChangeFinished.connect(lambda _, line=start_line: self.on_line_moved(line))
|
337
|
+
end_line.sigPositionChangeFinished.connect(lambda _, line=end_line: self.on_line_moved(line))
|
338
|
+
|
339
|
+
self.plot_widget.update()
|
340
|
+
|
341
|
+
def create_arrow(self, x, y, angle):
|
342
|
+
|
343
|
+
arrow = pg.ArrowItem(
|
344
|
+
pos=(x, y),
|
345
|
+
angle=angle, # Direction of the arrow in degrees
|
346
|
+
tipAngle=30,
|
347
|
+
baseAngle=20,
|
348
|
+
headLen=15,
|
349
|
+
brush='blue',
|
350
|
+
)
|
351
|
+
self.plot_widget.addItem(arrow)
|
352
|
+
return arrow
|
353
|
+
|
354
|
+
def update_connecting_line(self, idx):
|
355
|
+
word = self.words[idx]
|
356
|
+
start = word['start']
|
357
|
+
end = word['end']
|
358
|
+
|
359
|
+
if idx % 2 == 0:
|
360
|
+
y_pos_line = 0.55
|
361
|
+
else:
|
362
|
+
y_pos_line = 0.45
|
363
|
+
|
364
|
+
# Update the connecting line's x-coordinates and keep y fixed at 0.5
|
365
|
+
self.connecting_lines[idx]['line'].setData([start+ 0.005, end- 0.005], [y_pos_line, y_pos_line])
|
366
|
+
|
367
|
+
# Update arrowhead positions
|
368
|
+
self.connecting_lines[idx]['start_arrow'].setPos(start+ 0.005, y_pos_line)
|
369
|
+
self.connecting_lines[idx]['end_arrow'].setPos(end- 0.005, y_pos_line)
|
370
|
+
|
371
|
+
# Update label position (middle of the line, slightly above)
|
372
|
+
mid_x = (start + end) / 2
|
373
|
+
|
374
|
+
if word["speaker"] in ["", "UNKOWN"]:
|
375
|
+
mid_y = 0.7 # Slightly above y=0.5
|
376
|
+
elif word["speaker"] == "SPEAKER_00":
|
377
|
+
mid_y = 0.4
|
378
|
+
elif word["speaker"] == "SPEAKER_01":
|
379
|
+
mid_y = 0.6
|
380
|
+
else:
|
381
|
+
mid_y = 0.0
|
382
|
+
|
383
|
+
self.connecting_lines[idx]['label'].setPos(mid_x, mid_y)
|
384
|
+
|
385
|
+
def on_line_moved(self, line):
|
386
|
+
idx = line.idx
|
387
|
+
boundary_type = line.boundary_type
|
388
|
+
new_pos = line.value()
|
389
|
+
new_pos = max(0.0, min(new_pos, self.duration))
|
390
|
+
self.words[idx][boundary_type] = new_pos
|
391
|
+
self.boundary_changed.emit(idx, boundary_type, new_pos)
|
392
|
+
self.update_connecting_line(idx)
|
393
|
+
|
394
|
+
def on_waveform_click(self, event):
|
395
|
+
pos = event.scenePos()
|
396
|
+
if not self.plot_widget.sceneBoundingRect().contains(pos):
|
397
|
+
return
|
398
|
+
mouse_point = self.plot_widget.plotItem.vb.mapSceneToView(pos)
|
399
|
+
clicked_time = mouse_point.x()
|
400
|
+
clicked_time = max(0.0, min(clicked_time, self.duration))
|
401
|
+
self.waveform_clicked.emit(clicked_time)
|
402
|
+
|
403
|
+
def update_playtime_line(self, current_time):
|
404
|
+
self.playtime_line.setPos(current_time)
|
405
|
+
# Adjust view range
|
406
|
+
self.adjust_view_range(current_time)
|
407
|
+
|
408
|
+
def add_word_lines(self, row, start_time, end_time):
|
409
|
+
"""Add draggable lines for a new word."""
|
410
|
+
start_line = DraggableLine(pos=start_time, color='green', idx=row, boundary_type='start')
|
411
|
+
end_line = DraggableLine(pos=end_time, color='red', idx=row, boundary_type='end')
|
412
|
+
self.plot_widget.addItem(start_line)
|
413
|
+
self.plot_widget.addItem(end_line)
|
414
|
+
self.lines.append({'line': start_line, 'idx': row, 'type': 'start'})
|
415
|
+
self.lines.append({'line': end_line, 'idx': row, 'type': 'end'})
|
416
|
+
|
417
|
+
# Connecting line with arrows
|
418
|
+
connecting_line = pg.ArrowItem(
|
419
|
+
pos=((start_time + end_time) / 2, 0),
|
420
|
+
angle=0,
|
421
|
+
tipAngle=30,
|
422
|
+
baseAngle=20,
|
423
|
+
headLen=15,
|
424
|
+
tailLen=0,
|
425
|
+
tailWidth=0,
|
426
|
+
brush='blue'
|
427
|
+
)
|
428
|
+
connecting_line.setParentItem(self.plot_widget.plotItem)
|
429
|
+
self.connecting_lines.append(connecting_line)
|
430
|
+
|
431
|
+
start_line.sigPositionChangeFinished.connect(lambda _, line=start_line: self.on_line_moved(line))
|
432
|
+
end_line.sigPositionChangeFinished.connect(lambda _, line=end_line: self.on_line_moved(line))
|
433
|
+
|
434
|
+
def adjust_view_range(self, current_time, window_size=None):
|
435
|
+
if window_size is None:
|
436
|
+
window_size = self.window_size
|
437
|
+
half_window = window_size / 2.0
|
438
|
+
start = max(0.0, current_time - half_window)
|
439
|
+
end = min(self.duration, current_time + half_window)
|
440
|
+
self.plot_widget.setXRange(start, end, padding=0)
|
441
|
+
self.scrollbar.blockSignals(True)
|
442
|
+
self.scrollbar.setValue(int(start * 1000))
|
443
|
+
self.scrollbar.blockSignals(False)
|
444
|
+
|
445
|
+
def update_line_position(self, idx, boundary_type, new_pos):
|
446
|
+
# Find the line and update its position
|
447
|
+
for line_info in self.lines:
|
448
|
+
if line_info['idx'] == idx and line_info['type'] == boundary_type:
|
449
|
+
line_info['line'].setValue(new_pos)
|
450
|
+
break
|
451
|
+
self.words[idx][boundary_type] = new_pos
|
452
|
+
self.update_connecting_line(idx)
|
453
|
+
|
454
|
+
def on_scrollbar_value_changed(self, value):
|
455
|
+
start = min(value / 1000.0, self.duration - self.window_size)
|
456
|
+
end = min(start + self.window_size, self.duration)
|
457
|
+
self.plot_widget.setXRange(start, end, padding=0)
|
458
|
+
|
459
|
+
def on_x_range_changed(self, view_box, range):
|
460
|
+
start, end = max(0, range[0]), min(self.duration, range[1])
|
461
|
+
self.scrollbar.blockSignals(True)
|
462
|
+
self.scrollbar.setValue(int(start * 1000))
|
463
|
+
self.scrollbar.blockSignals(False)
|
464
|
+
|
465
|
+
|
466
|
+
# --- MainWindow Class ---
|
467
|
+
|
468
|
+
class MainWindow(QMainWindow):
|
469
|
+
def __init__(self):
|
470
|
+
super().__init__()
|
471
|
+
self.setWindowTitle("PELICAn Transcription Tool")
|
472
|
+
self.setGeometry(100, 100, 1600, 900)
|
473
|
+
|
474
|
+
# Initialize variables
|
475
|
+
self.audio_segment = None
|
476
|
+
self.is_playing = False
|
477
|
+
self.play_obj = None
|
478
|
+
self.current_time = 0.0
|
479
|
+
self.speakers = []
|
480
|
+
self.undo_stack = QUndoStack(self)
|
481
|
+
self.old_values = {}
|
482
|
+
self.temp_file_path = tempfile.NamedTemporaryFile(delete=False, suffix='.json').name
|
483
|
+
self.previous_current_row = None # Initialize previous_current_row
|
484
|
+
# Setup UI components
|
485
|
+
self.setup_ui()
|
486
|
+
self.setup_signals()
|
487
|
+
|
488
|
+
# Start the autosave timer
|
489
|
+
self.autosave_timer = QTimer(self)
|
490
|
+
self.autosave_timer.timeout.connect(self.autosave)
|
491
|
+
self.autosave_timer.start(5000) # Trigger autosave every 5 seconds
|
492
|
+
|
493
|
+
# Load autosave if exists
|
494
|
+
self.load_autosave()
|
495
|
+
|
496
|
+
def setup_ui(self):
|
497
|
+
splitter = QSplitter(Qt.Horizontal)
|
498
|
+
|
499
|
+
# Left panel: Waveform
|
500
|
+
self.waveform_widget = QWidget()
|
501
|
+
waveform_layout = QVBoxLayout(self.waveform_widget)
|
502
|
+
self.canvas = WaveformCanvas(parent=self.waveform_widget)
|
503
|
+
waveform_layout.addWidget(self.canvas)
|
504
|
+
|
505
|
+
# Playback Controls
|
506
|
+
playback_layout = QHBoxLayout()
|
507
|
+
self.play_button = QPushButton("Play")
|
508
|
+
self.play_button.clicked.connect(self.toggle_playback)
|
509
|
+
self.stop_button = QPushButton("Stop")
|
510
|
+
self.stop_button.clicked.connect(self.stop_playback)
|
511
|
+
self.return_button = QPushButton("Return to Selection (X)")
|
512
|
+
self.return_button.clicked.connect(self.return_to_selection)
|
513
|
+
playback_layout.addWidget(self.play_button)
|
514
|
+
playback_layout.addWidget(self.stop_button)
|
515
|
+
playback_layout.addWidget(self.return_button)
|
516
|
+
waveform_layout.addLayout(playback_layout)
|
517
|
+
|
518
|
+
# Load and Save Buttons
|
519
|
+
buttons_layout = QHBoxLayout()
|
520
|
+
load_audio_button = QPushButton("Load Audio")
|
521
|
+
load_audio_button.clicked.connect(self.load_audio)
|
522
|
+
load_transcript_button = QPushButton("Load Transcript")
|
523
|
+
load_transcript_button.clicked.connect(self.load_transcript)
|
524
|
+
save_button = QPushButton("Save Annotations")
|
525
|
+
save_button.clicked.connect(self.save_annotations)
|
526
|
+
buttons_layout.addWidget(load_audio_button)
|
527
|
+
buttons_layout.addWidget(load_transcript_button)
|
528
|
+
buttons_layout.addWidget(save_button)
|
529
|
+
waveform_layout.addLayout(buttons_layout)
|
530
|
+
|
531
|
+
# Undo/Redo Buttons
|
532
|
+
undo_redo_layout = QHBoxLayout()
|
533
|
+
undo_button = QPushButton("Undo")
|
534
|
+
undo_button.clicked.connect(self.undo_stack.undo)
|
535
|
+
redo_button = QPushButton("Redo")
|
536
|
+
redo_button.clicked.connect(self.undo_stack.redo)
|
537
|
+
undo_redo_layout.addWidget(undo_button)
|
538
|
+
undo_redo_layout.addWidget(redo_button)
|
539
|
+
waveform_layout.addLayout(undo_redo_layout)
|
540
|
+
|
541
|
+
splitter.addWidget(self.waveform_widget)
|
542
|
+
|
543
|
+
# Right panel: Transcript Table
|
544
|
+
self.table_widget = QTableWidget()
|
545
|
+
self.table_widget.setColumnCount(4)
|
546
|
+
self.table_widget.setHorizontalHeaderLabels(["Word", "Start Time", "End Time", "Speaker"])
|
547
|
+
self.table_widget.setSelectionBehavior(QAbstractItemView.SelectRows)
|
548
|
+
self.table_widget.setSelectionMode(QAbstractItemView.ExtendedSelection)
|
549
|
+
self.table_widget.setContextMenuPolicy(Qt.CustomContextMenu)
|
550
|
+
self.table_widget.customContextMenuRequested.connect(self.show_context_menu)
|
551
|
+
splitter.addWidget(self.table_widget)
|
552
|
+
|
553
|
+
self.setCentralWidget(splitter)
|
554
|
+
|
555
|
+
def setup_signals(self):
|
556
|
+
self.canvas.boundary_changed.connect(self.on_boundary_changed)
|
557
|
+
self.canvas.waveform_clicked.connect(self.on_waveform_clicked)
|
558
|
+
self.canvas.audio_loaded.connect(self.on_audio_loaded)
|
559
|
+
self.canvas.loading_error.connect(self.on_audio_load_error)
|
560
|
+
self.table_widget.itemChanged.connect(self.on_item_changed)
|
561
|
+
self.table_widget.cellDoubleClicked.connect(self.on_cell_double_clicked)
|
562
|
+
self.table_widget.currentCellChanged.connect(self.on_current_cell_changed)
|
563
|
+
self.table_widget.selectionModel().selectionChanged.connect(self.on_selection_changed)
|
564
|
+
|
565
|
+
def load_audio(self):
|
566
|
+
options = QFileDialog.Options()
|
567
|
+
file_path, _ = QFileDialog.getOpenFileName(
|
568
|
+
self,
|
569
|
+
"Open Audio File",
|
570
|
+
"",
|
571
|
+
"Audio Files (*.wav *.mp3 *.flac *.ogg);;All Files (*)",
|
572
|
+
options=options,
|
573
|
+
)
|
574
|
+
if file_path:
|
575
|
+
self.canvas.load_audio(file_path)
|
576
|
+
# Load audio segment for playback
|
577
|
+
try:
|
578
|
+
self.audio_segment = AudioSegment.from_file(file_path).set_channels(1)
|
579
|
+
except Exception as e:
|
580
|
+
QMessageBox.critical(self, "Audio Load Error", f"Failed to load audio for playback:\n{str(e)}")
|
581
|
+
|
582
|
+
def on_audio_loaded(self):
|
583
|
+
self.statusBar().showMessage("Audio loaded successfully.", 5000)
|
584
|
+
self.canvas.adjust_view_range(self.current_time)
|
585
|
+
|
586
|
+
def on_audio_load_error(self, error_message):
|
587
|
+
QMessageBox.critical(self, "Audio Load Error", f"Failed to load audio file:\n{error_message}")
|
588
|
+
self.statusBar().showMessage("Failed to load audio.", 5000)
|
589
|
+
|
590
|
+
def load_transcript(self):
|
591
|
+
options = QFileDialog.Options()
|
592
|
+
file_path, _ = QFileDialog.getOpenFileName(
|
593
|
+
self,
|
594
|
+
"Open Transcript File",
|
595
|
+
"",
|
596
|
+
"JSON Files (*.json);;All Files (*)",
|
597
|
+
options=options,
|
598
|
+
)
|
599
|
+
if file_path:
|
600
|
+
try:
|
601
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
602
|
+
transcript = json.load(f)
|
603
|
+
words = transcript.get("combined_data", [])
|
604
|
+
speaker_segments = transcript.get("speaker_segments", [])
|
605
|
+
utterances = transcript.get("utterance_data", [])
|
606
|
+
self.canvas.load_utterances(utterances)
|
607
|
+
# Validate and process words
|
608
|
+
for word in words:
|
609
|
+
if 'word' not in word or 'start_time' not in word or 'end_time' not in word:
|
610
|
+
raise ValueError("Invalid transcript format. Each entry must have 'word', 'start_time', and 'end_time'.")
|
611
|
+
word['start'] = float(word['start_time'])
|
612
|
+
word['end'] = float(word['end_time'])
|
613
|
+
if 'speaker' not in word:
|
614
|
+
word['speaker'] = ''
|
615
|
+
else:
|
616
|
+
speaker = word['speaker']
|
617
|
+
if speaker and speaker not in self.speakers:
|
618
|
+
self.speakers.append(speaker)
|
619
|
+
self.canvas.load_words(words)
|
620
|
+
self.update_table()
|
621
|
+
print(f"Loaded transcript file: {file_path}")
|
622
|
+
except Exception as e:
|
623
|
+
QMessageBox.critical(self, "Error Loading Transcript", f"Failed to load transcript:\n{str(e)}")
|
624
|
+
|
625
|
+
def update_table(self):
|
626
|
+
self.table_widget.blockSignals(True)
|
627
|
+
words = self.canvas.words
|
628
|
+
self.table_widget.setRowCount(len(words))
|
629
|
+
for i, word in enumerate(words):
|
630
|
+
word_item = QTableWidgetItem(word['word'])
|
631
|
+
start_item = QTableWidgetItem(f"{word['start']:.2f}")
|
632
|
+
end_item = QTableWidgetItem(f"{word['end']:.2f}")
|
633
|
+
speaker_dropdown = QComboBox()
|
634
|
+
speaker_dropdown.addItems(self.speakers + [""])
|
635
|
+
speaker_dropdown.setCurrentText(word.get('speaker', ''))
|
636
|
+
speaker_dropdown.currentTextChanged.connect(self.on_speaker_changed)
|
637
|
+
self.table_widget.setItem(i, 0, word_item)
|
638
|
+
self.table_widget.setItem(i, 1, start_item)
|
639
|
+
self.table_widget.setItem(i, 2, end_item)
|
640
|
+
self.table_widget.setCellWidget(i, 3, speaker_dropdown)
|
641
|
+
# Set default cell colors
|
642
|
+
for j in range(3):
|
643
|
+
item = self.table_widget.item(i, j)
|
644
|
+
if item:
|
645
|
+
item.setBackground(QColor("black"))
|
646
|
+
item.setForeground(QColor("white"))
|
647
|
+
self.table_widget.blockSignals(False)
|
648
|
+
self.statusBar().showMessage("Transcript loaded successfully.", 5000)
|
649
|
+
|
650
|
+
def on_boundary_changed(self, idx, boundary_type, new_pos):
|
651
|
+
if idx == -1:
|
652
|
+
return
|
653
|
+
self.table_widget.blockSignals(True)
|
654
|
+
if boundary_type == 'start':
|
655
|
+
item = self.table_widget.item(idx, 1)
|
656
|
+
if item is not None:
|
657
|
+
item.setText(f"{new_pos:.2f}")
|
658
|
+
elif boundary_type == 'end':
|
659
|
+
item = self.table_widget.item(idx, 2)
|
660
|
+
if item is not None:
|
661
|
+
item.setText(f"{new_pos:.2f}")
|
662
|
+
self.table_widget.blockSignals(False)
|
663
|
+
self.autosave()
|
664
|
+
|
665
|
+
def on_waveform_clicked(self, time):
|
666
|
+
self.current_time = time
|
667
|
+
self.canvas.update_playtime_line(self.current_time)
|
668
|
+
self.highlight_current_row()
|
669
|
+
|
670
|
+
def on_item_changed(self, item):
|
671
|
+
if self.table_widget.signalsBlocked():
|
672
|
+
return
|
673
|
+
row = item.row()
|
674
|
+
column = item.column()
|
675
|
+
key = (row, column)
|
676
|
+
old_value = self.old_values.get(key, "")
|
677
|
+
new_value = item.text()
|
678
|
+
|
679
|
+
if old_value != new_value:
|
680
|
+
if column in [1, 2]:
|
681
|
+
try:
|
682
|
+
new_time = float(new_value)
|
683
|
+
boundary_type = 'start' if column == 1 else 'end'
|
684
|
+
self.canvas.update_line_position(row, boundary_type, new_time)
|
685
|
+
except ValueError:
|
686
|
+
QMessageBox.warning(self, "Invalid Input", "Start and End times must be numeric.")
|
687
|
+
self.table_widget.blockSignals(True)
|
688
|
+
item.setText(old_value)
|
689
|
+
self.table_widget.blockSignals(False)
|
690
|
+
return
|
691
|
+
|
692
|
+
command = EditCellCommand(self, row, column, old_value, new_value)
|
693
|
+
self.undo_stack.push(command)
|
694
|
+
self.autosave()
|
695
|
+
|
696
|
+
if key in self.old_values:
|
697
|
+
del self.old_values[key]
|
698
|
+
|
699
|
+
def on_cell_double_clicked(self, row, column):
|
700
|
+
item = self.table_widget.item(row, column)
|
701
|
+
if item:
|
702
|
+
self.old_values[(row, column)] = item.text()
|
703
|
+
|
704
|
+
def on_speaker_changed(self, new_speaker):
|
705
|
+
sender = self.sender()
|
706
|
+
if new_speaker and new_speaker not in self.speakers:
|
707
|
+
self.speakers.append(new_speaker)
|
708
|
+
self.update_speaker_dropdowns()
|
709
|
+
|
710
|
+
def update_speaker_dropdowns(self):
|
711
|
+
for row in range(self.table_widget.rowCount()):
|
712
|
+
speaker_dropdown = self.table_widget.cellWidget(row, 3)
|
713
|
+
if speaker_dropdown:
|
714
|
+
current_speaker = speaker_dropdown.currentText()
|
715
|
+
speaker_dropdown.blockSignals(True)
|
716
|
+
speaker_dropdown.clear()
|
717
|
+
speaker_dropdown.addItems(self.speakers + [""])
|
718
|
+
speaker_dropdown.setCurrentText(current_speaker)
|
719
|
+
speaker_dropdown.blockSignals(False)
|
720
|
+
|
721
|
+
def toggle_playback(self):
|
722
|
+
if self.audio_segment is None:
|
723
|
+
QMessageBox.warning(self, "No Audio", "Please load an audio file first.")
|
724
|
+
return
|
725
|
+
if self.is_playing:
|
726
|
+
self.pause_playback()
|
727
|
+
else:
|
728
|
+
self.start_playback()
|
729
|
+
|
730
|
+
def start_playback(self):
|
731
|
+
if self.audio_segment is not None:
|
732
|
+
try:
|
733
|
+
start_ms = int(self.current_time * 1000)
|
734
|
+
sliced_audio = self.audio_segment[start_ms:]
|
735
|
+
self.play_obj = play_audio(sliced_audio)
|
736
|
+
self.is_playing = True
|
737
|
+
self.play_button.setText("Pause")
|
738
|
+
# Record the start time and position
|
739
|
+
self.playback_start_time = time.time()
|
740
|
+
self.playback_start_position = self.current_time
|
741
|
+
self.playback_timer = QTimer()
|
742
|
+
self.playback_timer.timeout.connect(self.update_current_time)
|
743
|
+
self.playback_timer.start(50) # Increased frequency for smoother updates
|
744
|
+
except Exception as e:
|
745
|
+
QMessageBox.critical(self, "Playback Error", f"Failed to play audio:\n{str(e)}")
|
746
|
+
|
747
|
+
def pause_playback(self):
|
748
|
+
if self.play_obj:
|
749
|
+
self.play_obj.stop()
|
750
|
+
self.play_obj = None
|
751
|
+
self.is_playing = False
|
752
|
+
self.play_button.setText("Play")
|
753
|
+
if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
|
754
|
+
self.playback_timer.stop()
|
755
|
+
# Update current_time to the actual playback position
|
756
|
+
elapsed_time = time.time() - self.playback_start_time
|
757
|
+
self.current_time = self.playback_start_position + elapsed_time
|
758
|
+
|
759
|
+
def stop_playback(self):
|
760
|
+
if self.play_obj:
|
761
|
+
self.play_obj.stop()
|
762
|
+
self.play_obj = None
|
763
|
+
self.is_playing = False
|
764
|
+
self.play_button.setText("Play")
|
765
|
+
self.current_time = 0.0
|
766
|
+
self.canvas.update_playtime_line(self.current_time)
|
767
|
+
self.highlight_current_row()
|
768
|
+
if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
|
769
|
+
self.playback_timer.stop()
|
770
|
+
|
771
|
+
def update_current_time(self):
|
772
|
+
elapsed_time = time.time() - self.playback_start_time
|
773
|
+
self.current_time = self.playback_start_position + elapsed_time
|
774
|
+
if self.current_time >= self.canvas.duration:
|
775
|
+
self.stop_playback()
|
776
|
+
return
|
777
|
+
self.canvas.update_playtime_line(self.current_time)
|
778
|
+
self.highlight_current_row()
|
779
|
+
|
780
|
+
def highlight_current_row(self):
|
781
|
+
current_row = self.get_current_row()
|
782
|
+
selected_rows = self.get_selected_rows()
|
783
|
+
|
784
|
+
if current_row == self.previous_current_row:
|
785
|
+
return # No change, so no need to update
|
786
|
+
|
787
|
+
# Reset previous current row background
|
788
|
+
if self.previous_current_row is not None:
|
789
|
+
for column in range(3):
|
790
|
+
item = self.table_widget.item(self.previous_current_row, column)
|
791
|
+
if item:
|
792
|
+
if self.previous_current_row in selected_rows:
|
793
|
+
item.setBackground(QColor("blue"))
|
794
|
+
item.setForeground(QColor("white"))
|
795
|
+
else:
|
796
|
+
item.setBackground(QColor("black"))
|
797
|
+
item.setForeground(QColor("white"))
|
798
|
+
|
799
|
+
# Set new current row background
|
800
|
+
if current_row != -1:
|
801
|
+
for column in range(3):
|
802
|
+
item = self.table_widget.item(current_row, column)
|
803
|
+
if item:
|
804
|
+
item.setBackground(QColor("yellow"))
|
805
|
+
item.setForeground(QColor("black"))
|
806
|
+
self.table_widget.scrollToItem(self.table_widget.item(current_row, 0), QAbstractItemView.PositionAtCenter)
|
807
|
+
|
808
|
+
self.previous_current_row = current_row
|
809
|
+
|
810
|
+
|
811
|
+
def get_current_row(self):
|
812
|
+
for row in range(self.table_widget.rowCount()):
|
813
|
+
try:
|
814
|
+
start_time = float(self.table_widget.item(row, 1).text())
|
815
|
+
end_time = float(self.table_widget.item(row, 2).text())
|
816
|
+
if start_time <= self.current_time < end_time:
|
817
|
+
return row
|
818
|
+
except (ValueError, AttributeError):
|
819
|
+
continue
|
820
|
+
return -1
|
821
|
+
|
822
|
+
def return_to_selection(self):
|
823
|
+
selected_rows = self.get_selected_rows()
|
824
|
+
if selected_rows:
|
825
|
+
first_row = selected_rows[0]
|
826
|
+
try:
|
827
|
+
start_time = float(self.table_widget.item(first_row, 1).text())
|
828
|
+
self.current_time = start_time
|
829
|
+
self.canvas.update_playtime_line(self.current_time)
|
830
|
+
self.highlight_current_row()
|
831
|
+
except (ValueError, AttributeError):
|
832
|
+
pass
|
833
|
+
|
834
|
+
def get_selected_rows(self):
|
835
|
+
return sorted(set(index.row() for index in self.table_widget.selectionModel().selectedRows()))
|
836
|
+
|
837
|
+
def show_context_menu(self, position):
|
838
|
+
menu = QMenu()
|
839
|
+
add_row_action = QAction("Add Row", self)
|
840
|
+
add_row_action.triggered.connect(self.add_row)
|
841
|
+
delete_row_action = QAction("Delete Selected Rows", self)
|
842
|
+
delete_row_action.triggered.connect(self.delete_selected_rows)
|
843
|
+
bulk_edit_action = QAction("Bulk Edit Speaker", self)
|
844
|
+
bulk_edit_action.triggered.connect(self.bulk_edit_speaker)
|
845
|
+
menu.addAction(add_row_action)
|
846
|
+
menu.addAction(delete_row_action)
|
847
|
+
menu.addAction(bulk_edit_action)
|
848
|
+
menu.exec_(self.table_widget.viewport().mapToGlobal(position))
|
849
|
+
|
850
|
+
def add_row(self):
|
851
|
+
row_count = self.table_widget.rowCount()
|
852
|
+
row_data = {'word': '', 'start_time': 0.0, 'end_time': 0.0, 'speaker': ''}
|
853
|
+
command = AddRowCommand(self, row_count, row_data)
|
854
|
+
self.undo_stack.push(command)
|
855
|
+
self.autosave()
|
856
|
+
|
857
|
+
def delete_selected_rows(self):
|
858
|
+
selected_rows = self.get_selected_rows()
|
859
|
+
if not selected_rows:
|
860
|
+
QMessageBox.information(self, "No Selection", "Please select at least one row to delete.")
|
861
|
+
return
|
862
|
+
|
863
|
+
# Gather data
|
864
|
+
rows_data = []
|
865
|
+
for row in selected_rows:
|
866
|
+
row_data = {
|
867
|
+
'word': self.table_widget.item(row, 0).text(),
|
868
|
+
'start_time': self.table_widget.item(row, 1).text(),
|
869
|
+
'end_time': self.table_widget.item(row, 2).text(),
|
870
|
+
'speaker': self.table_widget.cellWidget(row, 3).currentText()
|
871
|
+
}
|
872
|
+
rows_data.append(row_data)
|
873
|
+
|
874
|
+
confirm = QMessageBox.question(
|
875
|
+
self,
|
876
|
+
"Confirm Deletion",
|
877
|
+
f"Are you sure you want to delete {len(selected_rows)} selected row(s)?",
|
878
|
+
QMessageBox.Yes | QMessageBox.No
|
879
|
+
)
|
880
|
+
if confirm == QMessageBox.Yes:
|
881
|
+
command = DeleteRowsCommand(self, rows_data, selected_rows)
|
882
|
+
self.undo_stack.push(command)
|
883
|
+
self.autosave()
|
884
|
+
|
885
|
+
def bulk_edit_speaker(self):
|
886
|
+
selected_rows = self.get_selected_rows()
|
887
|
+
if not selected_rows:
|
888
|
+
QMessageBox.information(self, "No Selection", "Please select at least one row to edit.")
|
889
|
+
return
|
890
|
+
|
891
|
+
speaker, ok = QInputDialog.getItem(
|
892
|
+
self,
|
893
|
+
"Select Speaker",
|
894
|
+
"Choose a speaker to assign to selected rows:",
|
895
|
+
self.speakers + [""],
|
896
|
+
0,
|
897
|
+
False
|
898
|
+
)
|
899
|
+
if ok:
|
900
|
+
old_speakers = [self.table_widget.cellWidget(row, 3).currentText() for row in selected_rows]
|
901
|
+
command = BulkEditSpeakerCommand(self, selected_rows, old_speakers, speaker)
|
902
|
+
self.undo_stack.push(command)
|
903
|
+
if speaker and speaker not in self.speakers:
|
904
|
+
self.speakers.append(speaker)
|
905
|
+
self.update_speaker_dropdowns()
|
906
|
+
self.autosave()
|
907
|
+
|
908
|
+
def autosave(self):
|
909
|
+
words = self.canvas.words
|
910
|
+
data_to_save = []
|
911
|
+
for word in words:
|
912
|
+
data_to_save.append({
|
913
|
+
'word': word['word'],
|
914
|
+
'start_time': word['start'],
|
915
|
+
'end_time': word['end'],
|
916
|
+
'speaker': word.get('speaker', '')
|
917
|
+
})
|
918
|
+
try:
|
919
|
+
with open(self.temp_file_path, 'w', encoding='utf-8') as f:
|
920
|
+
json.dump(data_to_save, f, indent=4)
|
921
|
+
print(f"Autosaved annotations to {self.temp_file_path}")
|
922
|
+
except Exception as e:
|
923
|
+
print(f"Autosave failed: {e}")
|
924
|
+
|
925
|
+
def load_autosave(self):
|
926
|
+
if os.path.exists(self.temp_file_path):
|
927
|
+
try:
|
928
|
+
with open(self.temp_file_path, "r", encoding='utf-8') as file:
|
929
|
+
annotations = json.load(file)
|
930
|
+
self.canvas.load_words(annotations)
|
931
|
+
self.update_table()
|
932
|
+
QMessageBox.information(self, "Recovery", "Recovered annotations from autosave.")
|
933
|
+
except Exception as e:
|
934
|
+
print(f"Failed to recover autosave: {e}")
|
935
|
+
|
936
|
+
def closeEvent(self, event):
|
937
|
+
reply = QMessageBox.question(
|
938
|
+
self, 'Exit',
|
939
|
+
"Do you want to save your annotations before exiting?",
|
940
|
+
QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel,
|
941
|
+
QMessageBox.Yes
|
942
|
+
)
|
943
|
+
|
944
|
+
if reply == QMessageBox.Yes:
|
945
|
+
if not self.validate_annotations():
|
946
|
+
event.ignore()
|
947
|
+
return
|
948
|
+
self.save_annotations()
|
949
|
+
if os.path.exists(self.temp_file_path):
|
950
|
+
os.remove(self.temp_file_path)
|
951
|
+
event.accept()
|
952
|
+
elif reply == QMessageBox.No:
|
953
|
+
if os.path.exists(self.temp_file_path):
|
954
|
+
os.remove(self.temp_file_path)
|
955
|
+
event.accept()
|
956
|
+
else:
|
957
|
+
event.ignore()
|
958
|
+
|
959
|
+
def save_annotations(self):
|
960
|
+
if not self.validate_annotations():
|
961
|
+
return
|
962
|
+
|
963
|
+
annotations = []
|
964
|
+
for row_idx in range(self.table_widget.rowCount()):
|
965
|
+
try:
|
966
|
+
start = float(self.table_widget.item(row_idx, 1).text())
|
967
|
+
end = float(self.table_widget.item(row_idx, 2).text())
|
968
|
+
start = round(start, 2)
|
969
|
+
end = round(end, 2)
|
970
|
+
except (ValueError, AttributeError):
|
971
|
+
QMessageBox.warning(self, "Invalid Input", f"Invalid start or end time at row {row_idx + 1}.")
|
972
|
+
return
|
973
|
+
|
974
|
+
word = self.table_widget.item(row_idx, 0).text()
|
975
|
+
speaker = self.table_widget.cellWidget(row_idx, 3).currentText()
|
976
|
+
annotations.append({
|
977
|
+
"word": word,
|
978
|
+
"start_time": start,
|
979
|
+
"end_time": end,
|
980
|
+
"speaker": speaker if speaker else None
|
981
|
+
})
|
982
|
+
|
983
|
+
options = QFileDialog.Options()
|
984
|
+
file_path, _ = QFileDialog.getSaveFileName(
|
985
|
+
self,
|
986
|
+
"Save Annotations",
|
987
|
+
"",
|
988
|
+
"JSON Files (*.json);;All Files (*)",
|
989
|
+
options=options,
|
990
|
+
)
|
991
|
+
if file_path:
|
992
|
+
try:
|
993
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
994
|
+
json.dump(annotations, f, indent=4)
|
995
|
+
QMessageBox.information(self, "Save Successful", f"Annotations saved to {file_path}")
|
996
|
+
except Exception as e:
|
997
|
+
QMessageBox.critical(self, "Save Error", f"Failed to save annotations:\n{str(e)}")
|
998
|
+
|
999
|
+
def on_current_cell_changed(self, current_row, current_column, previous_row, previous_column):
|
1000
|
+
if current_row >= 0:
|
1001
|
+
try:
|
1002
|
+
start_time = float(self.table_widget.item(current_row, 1).text())
|
1003
|
+
self.current_time = start_time
|
1004
|
+
self.canvas.update_playtime_line(self.current_time)
|
1005
|
+
self.highlight_current_row()
|
1006
|
+
except (ValueError, AttributeError):
|
1007
|
+
pass
|
1008
|
+
|
1009
|
+
def on_selection_changed(self, selected, deselected):
|
1010
|
+
self.previous_current_row = None
|
1011
|
+
self.highlight_current_row()
|
1012
|
+
|
1013
|
+
def keyPressEvent(self, event):
|
1014
|
+
if event.key() == Qt.Key_X:
|
1015
|
+
self.return_to_selection()
|
1016
|
+
else:
|
1017
|
+
super().keyPressEvent(event)
|
1018
|
+
|
1019
|
+
def insert_row(self, row_position, row_data):
|
1020
|
+
self.table_widget.blockSignals(True)
|
1021
|
+
self.table_widget.insertRow(row_position)
|
1022
|
+
word_item = QTableWidgetItem(row_data['word'])
|
1023
|
+
start_item = QTableWidgetItem(f"{float(row_data['start_time']):.2f}")
|
1024
|
+
end_item = QTableWidgetItem(f"{float(row_data['end_time']):.2f}")
|
1025
|
+
speaker_dropdown = QComboBox()
|
1026
|
+
speaker_dropdown.addItems(self.speakers + [""])
|
1027
|
+
speaker_dropdown.setCurrentText(row_data.get('speaker', ''))
|
1028
|
+
speaker_dropdown.currentTextChanged.connect(self.on_speaker_changed)
|
1029
|
+
self.table_widget.setItem(row_position, 0, word_item)
|
1030
|
+
self.table_widget.setItem(row_position, 1, start_item)
|
1031
|
+
self.table_widget.setItem(row_position, 2, end_item)
|
1032
|
+
self.table_widget.setCellWidget(row_position, 3, speaker_dropdown)
|
1033
|
+
# Set default cell colors
|
1034
|
+
for j in range(3):
|
1035
|
+
item = self.table_widget.item(row_position, j)
|
1036
|
+
if item:
|
1037
|
+
item.setBackground(QColor("black"))
|
1038
|
+
item.setForeground(QColor("white"))
|
1039
|
+
self.table_widget.blockSignals(False)
|
1040
|
+
# Update waveform
|
1041
|
+
self.canvas.words.insert(row_position, {
|
1042
|
+
'word': row_data['word'],
|
1043
|
+
'start': float(row_data['start_time']),
|
1044
|
+
'end': float(row_data['end_time']),
|
1045
|
+
'speaker': row_data.get('speaker', '')
|
1046
|
+
})
|
1047
|
+
self.canvas.draw_lines()
|
1048
|
+
|
1049
|
+
def remove_row(self, row_position):
|
1050
|
+
self.table_widget.blockSignals(True)
|
1051
|
+
self.table_widget.removeRow(row_position)
|
1052
|
+
self.table_widget.blockSignals(False)
|
1053
|
+
# Update waveform
|
1054
|
+
del self.canvas.words[row_position]
|
1055
|
+
self.canvas.draw_lines()
|
1056
|
+
|
1057
|
+
def set_cell(self, row, column, value):
|
1058
|
+
self.table_widget.blockSignals(True)
|
1059
|
+
item = self.table_widget.item(row, column)
|
1060
|
+
if item:
|
1061
|
+
item.setText(value)
|
1062
|
+
self.table_widget.blockSignals(False)
|
1063
|
+
if column in [1, 2]:
|
1064
|
+
try:
|
1065
|
+
new_time = float(value)
|
1066
|
+
boundary_type = 'start' if column == 1 else 'end'
|
1067
|
+
self.canvas.update_line_position(row, boundary_type, new_time)
|
1068
|
+
self.autosave()
|
1069
|
+
except ValueError:
|
1070
|
+
pass
|
1071
|
+
|
1072
|
+
def set_speaker(self, row, speaker):
|
1073
|
+
self.table_widget.blockSignals(True)
|
1074
|
+
speaker_dropdown = self.table_widget.cellWidget(row, 3)
|
1075
|
+
if speaker_dropdown:
|
1076
|
+
speaker_dropdown.setCurrentText(speaker)
|
1077
|
+
self.table_widget.blockSignals(False)
|
1078
|
+
if speaker and speaker not in self.speakers:
|
1079
|
+
self.speakers.append(speaker)
|
1080
|
+
self.update_speaker_dropdowns()
|
1081
|
+
self.autosave()
|
1082
|
+
|
1083
|
+
def validate_annotations(self):
|
1084
|
+
sorted_rows = sorted(range(self.table_widget.rowCount()), key=lambda r: float(self.table_widget.item(r, 1).text()) if self.table_widget.item(r, 1).text() else 0.0)
|
1085
|
+
for i in range(len(sorted_rows)):
|
1086
|
+
row = sorted_rows[i]
|
1087
|
+
start_item = self.table_widget.item(row, 1)
|
1088
|
+
end_item = self.table_widget.item(row, 2)
|
1089
|
+
if not start_item or not end_item:
|
1090
|
+
QMessageBox.warning(self, "Invalid Annotation", f"Missing start or end time at row {row + 1}.")
|
1091
|
+
return False
|
1092
|
+
try:
|
1093
|
+
start_time = float(start_item.text())
|
1094
|
+
end_time = float(end_item.text())
|
1095
|
+
if start_time > end_time:
|
1096
|
+
QMessageBox.warning(self, "Invalid Annotation", f"Start time must be less than end time at row {row + 1}.")
|
1097
|
+
return False
|
1098
|
+
if i < len(sorted_rows) - 1:
|
1099
|
+
next_row = sorted_rows[i + 1]
|
1100
|
+
next_start = float(self.table_widget.item(next_row, 1).text())
|
1101
|
+
if end_time > next_start:
|
1102
|
+
QMessageBox.warning(
|
1103
|
+
self, "Invalid Annotation",
|
1104
|
+
f"Annotations at rows {row + 1} and {next_row + 1} overlap."
|
1105
|
+
)
|
1106
|
+
return False
|
1107
|
+
except ValueError:
|
1108
|
+
QMessageBox.warning(self, "Invalid Annotation", f"Non-numeric start or end time at row {row + 1}.")
|
1109
|
+
return False
|
1110
|
+
return True
|
1111
|
+
|
1112
|
+
# --- Main Execution ---
|
1113
|
+
|
1114
|
+
def main():
|
1115
|
+
app = QApplication(sys.argv)
|
1116
|
+
window = MainWindow()
|
1117
|
+
window.show()
|
1118
|
+
sys.exit(app.exec_())
|
1119
|
+
|
1120
|
+
|
1121
|
+
if __name__ == "__main__":
|
1122
|
+
main()
|