pelican-nlp 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pelican_nlp/Nils_backup/__init__.py +0 -0
  2. pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
  3. pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
  4. pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
  5. pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
  6. pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
  7. pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
  8. pelican_nlp/Nils_backup/fluency/config.py +231 -0
  9. pelican_nlp/Nils_backup/fluency/main.py +182 -0
  10. pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
  11. pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
  12. pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
  13. pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
  14. pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
  15. pelican_nlp/Nils_backup/fluency/utils.py +41 -0
  16. pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
  17. pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
  18. pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
  19. pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
  20. pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
  21. pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
  22. pelican_nlp/Nils_backup/transcription/test.json +1 -0
  23. pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
  24. pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
  25. pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
  26. pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
  27. pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
  28. pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
  29. pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
  30. pelican_nlp/__init__.py +1 -1
  31. pelican_nlp/_version.py +1 -0
  32. pelican_nlp/configuration_files/config_audio.yml +150 -0
  33. pelican_nlp/configuration_files/config_discourse.yml +104 -0
  34. pelican_nlp/configuration_files/config_fluency.yml +108 -0
  35. pelican_nlp/configuration_files/config_general.yml +131 -0
  36. pelican_nlp/configuration_files/config_morteza.yml +103 -0
  37. pelican_nlp/praat/__init__.py +29 -0
  38. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/METADATA +15 -14
  39. pelican_nlp-0.1.3.dist-info/RECORD +75 -0
  40. pelican_nlp-0.1.1.dist-info/RECORD +0 -39
  41. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/WHEEL +0 -0
  42. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/licenses/LICENSE +0 -0
  43. {pelican_nlp-0.1.1.dist-info → pelican_nlp-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,955 @@
1
+ import sys
2
+ import os
3
+ import json
4
+ import tempfile
5
+ import numpy as np
6
+ import librosa
7
+ import time
8
+ import re
9
+
10
+ from PyQt5.QtWidgets import (
11
+ QApplication, QMainWindow, QWidget,
12
+ QVBoxLayout, QHBoxLayout, QPushButton, QComboBox, QFileDialog, QMessageBox,
13
+ QInputDialog, QMenu, QAction, QUndoStack, QUndoCommand, QScrollBar, QLineEdit
14
+ )
15
+ from PyQt5.QtCore import Qt, QTimer, QObject, pyqtSignal, QThread
16
+ from PyQt5.QtGui import QColor, QCursor
17
+
18
+ import pyqtgraph as pg
19
+ from pydub import AudioSegment
20
+ from pydub.playback import _play_with_simpleaudio as play_audio
21
+
22
+ # Assume Transcript class is imported from another file
23
+ from transcription import Transcript # Replace 'transcription' with the actual module name
24
+
25
+
26
+ # --- Undo/Redo Command Classes ---
27
+
28
+ class EditWordCommand(QUndoCommand):
29
+ def __init__(self, main_window, idx, old_word, new_word, description="Edit Word"):
30
+ super().__init__(description)
31
+ self.main_window = main_window
32
+ self.idx = idx
33
+ self.old_word = old_word
34
+ self.new_word = new_word
35
+
36
+ def redo(self):
37
+ self.main_window.transcript.combined_data[self.idx]['word'] = self.new_word
38
+ self.main_window.canvas.words[self.idx]['word'] = self.new_word
39
+ self.main_window.canvas.update_connecting_line(self.idx)
40
+
41
+ def undo(self):
42
+ self.main_window.transcript.combined_data[self.idx]['word'] = self.old_word
43
+ self.main_window.canvas.words[self.idx]['word'] = self.old_word
44
+ self.main_window.canvas.update_connecting_line(self.idx)
45
+
46
+
47
+ class EditSpeakerCommand(QUndoCommand):
48
+ def __init__(self, main_window, idx, old_speaker, new_speaker, description="Edit Speaker"):
49
+ super().__init__(description)
50
+ self.main_window = main_window
51
+ self.idx = idx
52
+ self.old_speaker = old_speaker
53
+ self.new_speaker = new_speaker
54
+
55
+ def redo(self):
56
+ self.main_window.transcript.combined_data[self.idx]['speaker'] = self.new_speaker
57
+ self.main_window.canvas.words[self.idx]['speaker'] = self.new_speaker
58
+ self.main_window.canvas.update_connecting_line(self.idx)
59
+
60
+ def undo(self):
61
+ self.main_window.transcript.combined_data[self.idx]['speaker'] = self.old_speaker
62
+ self.main_window.canvas.words[self.idx]['speaker'] = self.old_speaker
63
+ self.main_window.canvas.update_connecting_line(self.idx)
64
+
65
+
66
+ class MoveBoundaryCommand(QUndoCommand):
67
+ def __init__(self, main_window, idx, boundary_type, old_pos, new_pos, description="Move Boundary"):
68
+ super().__init__(description)
69
+ self.main_window = main_window
70
+ self.idx = idx
71
+ self.boundary_type = boundary_type
72
+ self.old_pos = old_pos
73
+ self.new_pos = new_pos
74
+
75
+ def redo(self):
76
+ self.main_window.transcript.combined_data[self.idx][f'{self.boundary_type}_time'] = self.new_pos
77
+ self.main_window.canvas.update_line_position(self.idx, self.boundary_type, self.new_pos)
78
+
79
+ def undo(self):
80
+ self.main_window.transcript.combined_data[self.idx][f'{self.boundary_type}_time'] = self.old_pos
81
+ self.main_window.canvas.update_line_position(self.idx, self.boundary_type, self.old_pos)
82
+
83
+
84
+ # --- Audio Loader for Asynchronous Loading ---
85
+
86
+ class AudioLoader(QObject):
87
+ finished = pyqtSignal(np.ndarray, int)
88
+ error = pyqtSignal(str)
89
+
90
+ def __init__(self, file_path, downsample_factor=100):
91
+ super().__init__()
92
+ self.file_path = file_path
93
+ self.downsample_factor = downsample_factor
94
+
95
+ def run(self):
96
+ try:
97
+ # Load audio using librosa for consistent sampling rate
98
+ y, sr = librosa.load(self.file_path, sr=None, mono=True)
99
+ duration = librosa.get_duration(y=y, sr=sr)
100
+ samples = y
101
+
102
+ # Normalize samples
103
+ max_abs_sample = np.max(np.abs(samples))
104
+ samples = samples / max_abs_sample if max_abs_sample != 0 else samples
105
+
106
+ # Downsample if necessary
107
+ if len(samples) > 1_000_000:
108
+ samples = self.downsample_waveform(samples, self.downsample_factor)
109
+
110
+ self.finished.emit(samples, sr)
111
+ except Exception as e:
112
+ self.error.emit(str(e))
113
+
114
+ def downsample_waveform(self, samples, factor):
115
+ num_blocks = len(samples) // factor
116
+ return np.array([samples[i * factor:(i + 1) * factor].mean() for i in range(num_blocks)])
117
+
118
+
119
+ # --- Draggable Line Class for pyqtgraph ---
120
+
121
+ class DraggableLine(pg.InfiniteLine):
122
+ def __init__(self, pos, color, idx, boundary_type, span = (0,1), pen=None, movable=True):
123
+ pen = pen or pg.mkPen(color=color, width=2)
124
+ super().__init__(pos=pos, angle=90, pen=pen, movable=movable)
125
+ self.idx = idx
126
+ self.boundary_type = boundary_type
127
+ self.setSpan(span[0], span[1])
128
+ self.setHoverPen(pen.color().lighter())
129
+ self.setCursor(Qt.SizeHorCursor)
130
+ self.old_pos = pos
131
+
132
+ class WaveformCanvas(QWidget):
133
+ """
134
+ A widget for displaying and interacting with waveform plots using pyqtgraph.
135
+ """
136
+ boundary_changed = pyqtSignal(int, str, float, float) # idx, 'start'/'end', new position, old position
137
+ waveform_clicked = pyqtSignal(float)
138
+ word_double_clicked = pyqtSignal(float)
139
+ word_right_clicked = pyqtSignal(float)
140
+ audio_loaded = pyqtSignal()
141
+ loading_error = pyqtSignal(str)
142
+
143
+ class CustomViewBox(pg.ViewBox):
144
+ """
145
+ Custom ViewBox for handling mouse events and fixing Y-axis behavior.
146
+ """
147
+
148
+ def __init__(self, canvas, y_limits=(-1, 1), *args, **kwargs):
149
+ super().__init__(*args, **kwargs)
150
+ self.canvas = canvas
151
+ self.y_limits = y_limits
152
+ if self.canvas is None:
153
+ raise ValueError("CustomViewBox requires a valid 'canvas' reference.")
154
+
155
+ def mouseClickEvent(self, event):
156
+ """
157
+ Handles mouse click events. Triggers the context menu for right-clicks.
158
+ """
159
+ if event.button() == Qt.RightButton:
160
+ self.raiseContextMenu(event)
161
+ event.accept()
162
+ else:
163
+ super().mouseClickEvent(event)
164
+
165
+ def mouseDoubleClickEvent(self, event):
166
+ """
167
+ Handles mouse double-click events. Notifies the canvas of the double-click.
168
+ """
169
+ mouse_point = self.mapSceneToView(event.pos())
170
+ clicked_time = mouse_point.x()
171
+ if hasattr(self.canvas, "on_waveform_double_clicked"):
172
+ self.canvas.on_waveform_double_clicked(clicked_time)
173
+ event.accept()
174
+
175
+ def raiseContextMenu(self, event):
176
+ """
177
+ Displays the context menu for right-click events.
178
+ """
179
+ mouse_point = self.mapSceneToView(event.scenePos())
180
+ clicked_time = mouse_point.x()
181
+ if hasattr(self.canvas, "on_waveform_right_clicked"):
182
+ self.canvas.on_waveform_right_clicked(clicked_time)
183
+ event.accept()
184
+
185
+ def scaleBy(self, s=None, center=None):
186
+ """
187
+ Restricts scaling to the X-axis only.
188
+ """
189
+ if s is not None:
190
+ s = [s[0], 1] # Only scale X-axis
191
+ super().scaleBy(s, center)
192
+
193
+ def mouseDragEvent(self, ev, axis=None):
194
+ """
195
+ Disables vertical dragging by restricting movements on the Y-axis.
196
+ """
197
+ if axis is None or axis == 1: # Y-axis
198
+ ev.ignore()
199
+ else:
200
+ super().mouseDragEvent(ev, axis=axis)
201
+
202
+ def updateLimits(self):
203
+ """
204
+ Ensures the view remains within the fixed Y-axis range.
205
+ """
206
+ self.setRange(yRange=self.y_limits, padding=0, update=False)
207
+
208
+ def __init__(self, parent=None, main_window=None):
209
+ super().__init__(parent)
210
+ self.main_window = main_window # Store the reference to MainWindow
211
+ self.layout = QVBoxLayout(self)
212
+
213
+ # Initialize PlotWidget with CustomViewBox
214
+ self.plot_widget = pg.PlotWidget(viewBox=self.CustomViewBox(canvas=self, y_limits=(-1, 1)))
215
+ self.plot_widget.setYRange(-1, 1)
216
+ self.plot_widget.showGrid(x=True, y=False)
217
+ self.plot_widget.setLabel('bottom', 'Time', 's')
218
+ self.layout.addWidget(self.plot_widget)
219
+
220
+ # self.plot_widget.plotItem.vb.setLimits(yMin=-1.05, yMax=1.05)
221
+ self.editing_line = None
222
+
223
+
224
+ # Add horizontal scrollbar
225
+ self.scrollbar = QScrollBar(Qt.Horizontal)
226
+ self.layout.addWidget(self.scrollbar)
227
+ self.scrollbar.valueChanged.connect(self.on_scrollbar_value_changed)
228
+
229
+ # Connect plot's X range change to update scrollbar
230
+ self.plot_widget.plotItem.vb.sigXRangeChanged.connect(self.on_x_range_changed)
231
+
232
+ self.words = []
233
+ self.lines = []
234
+ self.connecting_lines = []
235
+ self.word_segments = []
236
+
237
+ self.dragging_line = None
238
+
239
+ self.utterances = []
240
+ self.utterance_items = []
241
+ self.utterance_regions = []
242
+
243
+ self.audio_data = None
244
+ self.sr = None
245
+ self.duration = None
246
+ self.window_size = 5.0 # Default window size of 5 seconds
247
+
248
+ self.speaker_colors = {
249
+ "SPEAKER_00": QColor(255, 200, 200, 100), # Light red
250
+ "SPEAKER_01": QColor(200, 255, 200, 100), # Light green
251
+ "SPEAKER_02": QColor(200, 200, 255, 100), # Light blue
252
+ "UNKNOWN": QColor(200, 200, 200, 100),
253
+ "": QColor(200, 200, 200, 100), # Light gray
254
+ }
255
+
256
+ def load_audio(self, file_path):
257
+ self.thread = QThread()
258
+ self.loader = AudioLoader(file_path)
259
+ self.loader.moveToThread(self.thread)
260
+ self.thread.started.connect(self.loader.run)
261
+ self.loader.finished.connect(self.on_audio_loaded)
262
+ self.loader.finished.connect(self.thread.quit)
263
+ self.loader.finished.connect(self.loader.deleteLater)
264
+ self.thread.finished.connect(self.thread.deleteLater)
265
+ self.loader.error.connect(self.on_loading_error)
266
+ self.thread.start()
267
+
268
+ def on_audio_loaded(self, samples, sr):
269
+ self.audio_data = samples
270
+ self.sr = sr
271
+ effective_sr = sr / self.loader.downsample_factor
272
+ self.duration = len(samples) / effective_sr
273
+ t = np.linspace(0, self.duration, num=len(samples))
274
+
275
+ self.plot_widget.clear()
276
+ self.plot_widget.plot(t, samples, pen='b')
277
+ self.playtime_line = pg.InfiniteLine(pos=0, angle=90, pen=pg.mkPen('y', width=4))
278
+ self.playtime_line.setZValue(1000)
279
+ self.plot_widget.addItem(self.playtime_line)
280
+ self.plot_widget.setLimits(xMin=0, xMax=self.duration)
281
+ self.plot_widget.setXRange(0, min(self.window_size, self.duration))
282
+ self.plot_widget.setLabel('bottom', 'Time', 's')
283
+ self.draw_lines()
284
+ self.boundary_changed.emit(-1, '', 0.0, 0.0) # Reset
285
+
286
+ # Configure scrollbar
287
+ self.scrollbar.setMinimum(0)
288
+ self.scrollbar.setMaximum(int(self.duration * 1000))
289
+ self.scrollbar.setSingleStep(100) # 100 ms steps
290
+ self.scrollbar.setPageStep(int(self.window_size * 1000)) # 5-second page steps
291
+ self.scrollbar.setValue(0)
292
+
293
+ self.audio_loaded.emit()
294
+
295
+ def load_utterances(self, utterances):
296
+ self.utterances = utterances
297
+ self.draw_utterances()
298
+
299
+ def draw_utterances(self):
300
+ self.clear_utterance_items()
301
+ self.utterance_items = []
302
+ self.utterance_regions = []
303
+
304
+ for idx, utterance in enumerate(self.utterances):
305
+ start = float(utterance['start_time'])
306
+ end = float(utterance['end_time'])
307
+ speaker = utterance.get('speaker', '')
308
+ confidence = utterance.get('confidence', '')
309
+ color = self.speaker_colors.get(speaker, QColor(200, 200, 200, 100))
310
+
311
+ # Add background region for utterance
312
+ region = pg.LinearRegionItem(values=[start + 0.005, end - 0.005], brush=color, span = (0.1, 0.4))
313
+ region.setMovable(False)
314
+ self.plot_widget.addItem(region)
315
+ self.utterance_regions.append(region)
316
+
317
+ # Break utterance text into words
318
+ text = utterance.get('text', '')
319
+
320
+ # Add a label for utterance metadata (e.g., speaker, index, duration)
321
+ label_text = f"Utterance: {idx + 1}, Speaker: {speaker}, Confidence: {confidence}, Duration: {round(end - start, 2)}s"
322
+ meta_label = pg.TextItem(label_text, anchor=(0.5, 0), color='yellow')
323
+ meta_label.setPos((start + end) / 2, -0.95) # Centered above the utterance
324
+ self.plot_widget.addItem(meta_label)
325
+ self.utterance_items.append(meta_label)
326
+
327
+ label = pg.TextItem(text, anchor=(0.5, 0), color='white')
328
+ label.setPos((start + end) / 2, -0.5)
329
+ self.plot_widget.addItem(label)
330
+ self.utterance_items.append(label)
331
+
332
+ def clear_utterance_items(self):
333
+ if hasattr(self, 'utterance_items'):
334
+ for item in self.utterance_items:
335
+ self.plot_widget.removeItem(item)
336
+ self.utterance_items = []
337
+ if hasattr(self, 'utterance_regions'):
338
+ for region in self.utterance_regions:
339
+ self.plot_widget.removeItem(region)
340
+ self.utterance_regions = []
341
+
342
+ def on_loading_error(self, error_message):
343
+ self.loading_error.emit(error_message)
344
+
345
+ def load_words(self, words):
346
+ self.words = words
347
+ self.draw_lines()
348
+
349
+ def clear_lines(self):
350
+ for line in self.lines:
351
+ self.plot_widget.removeItem(line['line'])
352
+ self.lines = []
353
+ for cline in self.connecting_lines:
354
+ self.plot_widget.removeItem(cline['line'])
355
+ self.plot_widget.removeItem(cline['start_arrow'])
356
+ self.plot_widget.removeItem(cline['end_arrow'])
357
+ self.plot_widget.removeItem(cline['label'])
358
+ self.connecting_lines = []
359
+ for segment in self.word_segments:
360
+ self.plot_widget.removeItem(segment['segment'])
361
+ self.word_segments = []
362
+
363
+ def draw_lines(self):
364
+ self.clear_lines()
365
+ for idx, word in enumerate(self.words):
366
+
367
+ if idx % 2 == 0:
368
+ y_pos_line = 0.55
369
+ else:
370
+ y_pos_line = 0.45
371
+
372
+ start = (float(word['start_time']) + 0.005)
373
+ end = (float(word['end_time']) - 0.005)
374
+ # Adjust the line positions slightly
375
+ start_line = DraggableLine(pos=start, color='green', idx=idx, boundary_type='start', span = (0.6,0.9))
376
+ end_line = DraggableLine(pos=end, color='red', idx=idx, boundary_type='end', span = (0.6,0.9))
377
+ self.plot_widget.addItem(start_line)
378
+ self.plot_widget.addItem(end_line)
379
+ self.lines.append({'line': start_line, 'idx': idx, 'type': 'start'})
380
+ self.lines.append({'line': end_line, 'idx': idx, 'type': 'end'})
381
+
382
+ # Connecting line at y=0.5
383
+ connecting_line = pg.PlotCurveItem(
384
+ [start, end],
385
+ [y_pos_line, y_pos_line], # Position the line at y=0.5
386
+ pen=pg.mkPen('blue', width=2),
387
+ )
388
+ self.plot_widget.addItem(connecting_line)
389
+
390
+ # Create arrowheads
391
+ start_arrow = self.create_arrow(start, y_pos_line, 0)
392
+ end_arrow = self.create_arrow(end, y_pos_line, 180)
393
+
394
+ # Create label
395
+ label = pg.TextItem(word['word'], anchor=(0.5, 0), color='white')
396
+ label.setPos((float(word['start_time']) + float(word['end_time'])) / 2, 0.6) # Adjust label position
397
+ label.mouseClickEvent = lambda ev, idx=idx: self.main_window.on_word_clicked(idx) # Use self.main_window
398
+ self.plot_widget.addItem(label)
399
+
400
+ # Store all items in the connecting_lines list
401
+ self.connecting_lines.append({
402
+ "line": connecting_line,
403
+ "start_arrow": start_arrow,
404
+ "end_arrow": end_arrow,
405
+ "label": label,
406
+ })
407
+
408
+ color = self.speaker_colors.get(word["speaker"], QColor(200, 200, 200, 100))
409
+
410
+ # Add background region for word segment
411
+ word_segment = pg.LinearRegionItem(values=[start, end], brush=color, span = (0.6, 0.9))
412
+ word_segment.setMovable(False)
413
+ self.plot_widget.addItem(word_segment)
414
+ self.word_segments.append(word_segment)
415
+
416
+ # Position label initially
417
+ self.update_connecting_line(idx)
418
+
419
+ # Connect signals to update arrows and labels
420
+ start_line.sigPositionChangeFinished.connect(lambda _, line=start_line: self.on_line_moved(line))
421
+ end_line.sigPositionChangeFinished.connect(lambda _, line=end_line: self.on_line_moved(line))
422
+
423
+ self.plot_widget.update()
424
+
425
+ def create_arrow(self, x, y, angle):
426
+
427
+ arrow = pg.ArrowItem(
428
+ pos=(x, y),
429
+ angle=angle, # Direction of the arrow in degrees
430
+ tipAngle=30,
431
+ baseAngle=20,
432
+ headLen=15,
433
+ brush='blue',
434
+ )
435
+ self.plot_widget.addItem(arrow)
436
+ return arrow
437
+
438
+ def update_connecting_line(self, idx):
439
+ word = self.words[idx]
440
+ start = float(word['start_time'])+ 0.005
441
+ end = float(word['end_time']) - 0.005
442
+
443
+ if idx % 2 == 0:
444
+ y_pos_line = 0.55
445
+ else:
446
+ y_pos_line = 0.45
447
+
448
+ # Update the connecting line's x-coordinates and keep y fixed at 0.5
449
+ self.connecting_lines[idx]['line'].setData([start , end ], [y_pos_line, y_pos_line])
450
+
451
+ # Update arrowhead positions
452
+ self.connecting_lines[idx]['start_arrow'].setPos(start, y_pos_line)
453
+ self.connecting_lines[idx]['end_arrow'].setPos(end, y_pos_line)
454
+
455
+ # Update label position (middle of the line, slightly above)
456
+ mid_x = (start + end) / 2
457
+
458
+ self.connecting_lines[idx]['label'].setPos(mid_x, 0.5)
459
+ self.connecting_lines[idx]['label'].setText(word['word'])
460
+
461
+ def update_word_segment(self, idx):
462
+
463
+ word = self.words[idx]
464
+ start_time = float(word['start_time'])
465
+ end_time = float(word['end_time'])
466
+ speaker = word.get('speaker', '')
467
+
468
+ # Update the LinearRegionItem for the word segment
469
+ segment = self.word_segments[idx]
470
+ segment.setRegion([start_time, end_time])
471
+
472
+ # Update the color based on the speaker
473
+ color = self.speaker_colors.get(speaker, QColor(200, 200, 200, 100))
474
+ segment.setBrush(color)
475
+
476
+
477
+ def on_line_moved(self, line):
478
+ idx = line.idx
479
+ boundary_type = line.boundary_type
480
+ new_pos = line.value()
481
+ new_pos = max(0.0, min(new_pos, self.duration))
482
+ old_pos = line.old_pos
483
+ line.old_pos = new_pos # Update old_pos for next time
484
+ # Update word data
485
+ if boundary_type == 'start':
486
+ self.words[idx]['start_time'] = new_pos
487
+ elif boundary_type == 'end':
488
+ self.words[idx]['end_time'] = new_pos
489
+ self.boundary_changed.emit(idx, boundary_type, new_pos, old_pos)
490
+ self.update_connecting_line(idx)
491
+ self.update_word_segment(idx)
492
+
493
+ def on_waveform_double_clicked(self, clicked_time):
494
+ self.word_double_clicked.emit(clicked_time)
495
+
496
+ def on_waveform_right_clicked(self, clicked_time):
497
+ self.word_right_clicked.emit(clicked_time)
498
+
499
+ def on_scrollbar_value_changed(self, value):
500
+ start = min(value / 1000.0, self.duration - self.window_size)
501
+ end = min(start + self.window_size, self.duration)
502
+ self.plot_widget.setXRange(start, end, padding=0)
503
+
504
+ def on_x_range_changed(self, view_box, range):
505
+ start, end = max(0, range[0]), min(self.duration, range[1])
506
+ self.scrollbar.blockSignals(True)
507
+ self.scrollbar.setValue(int(start * 1000))
508
+ self.scrollbar.blockSignals(False)
509
+
510
+ def update_playtime_line(self, current_time):
511
+ self.playtime_line.setPos(current_time)
512
+ # Adjust view range and scrollbar
513
+ self.adjust_view_range(current_time)
514
+
515
+ def adjust_view_range(self, current_time, window_size=None):
516
+ if window_size is None:
517
+ window_size = self.window_size
518
+ half_window = window_size / 2.0
519
+ start = max(0.0, current_time - half_window)
520
+ end = min(self.duration, current_time + half_window)
521
+ self.plot_widget.setXRange(start, end, padding=0)
522
+ self.scrollbar.blockSignals(True)
523
+ self.scrollbar.setValue(int(start * 1000))
524
+ self.scrollbar.blockSignals(False)
525
+
526
+ def update_line_position(self, idx, boundary_type, new_pos):
527
+ # Find the line and update its position
528
+ for line_info in self.lines:
529
+ if line_info['idx'] == idx and line_info['type'] == boundary_type:
530
+ line_info['line'].setValue(new_pos)
531
+ line_info['line'].old_pos = new_pos
532
+ break
533
+ # Update word data
534
+ if boundary_type == 'start':
535
+ self.words[idx]['start_time'] = new_pos
536
+ elif boundary_type == 'end':
537
+ self.words[idx]['end_time'] = new_pos
538
+ self.update_connecting_line(idx)
539
+
540
+
541
+ # --- MainWindow Class ---
542
+
543
+ class MainWindow(QMainWindow):
544
+ def __init__(self):
545
+ super().__init__()
546
+ self.setWindowTitle("PELICAn Transcription Tool")
547
+ self.setGeometry(100, 100, 1600, 900)
548
+
549
+ # Initialize variables
550
+ self.audio_segment = None
551
+ self.is_playing = False
552
+ self.play_obj = None
553
+ self.current_time = 0.0
554
+ self.speakers = []
555
+ self.undo_stack = QUndoStack(self)
556
+ self.transcript = None # Initialize the Transcript object
557
+
558
+ # Setup UI components
559
+ self.setup_ui()
560
+ self.setup_signals()
561
+
562
+ # Start the autosave timer
563
+ self.autosave_timer = QTimer(self)
564
+ self.autosave_timer.timeout.connect(self.autosave)
565
+ self.autosave_timer.start(5000) # Trigger autosave every 5 seconds
566
+
567
+ # Load autosave if exists
568
+ self.temp_file_path = tempfile.NamedTemporaryFile(delete=False, suffix='.json').name
569
+ self.load_autosave()
570
+
571
+ def setup_ui(self):
572
+ self.waveform_widget = QWidget()
573
+ waveform_layout = QVBoxLayout(self.waveform_widget)
574
+ self.canvas = WaveformCanvas(parent=self.waveform_widget, main_window=self) # Pass self here
575
+ waveform_layout.addWidget(self.canvas)
576
+ self.setCentralWidget(self.waveform_widget)
577
+
578
+ # Playback Controls
579
+ playback_layout = QHBoxLayout()
580
+ self.play_button = QPushButton("Play")
581
+ self.play_button.clicked.connect(self.toggle_playback)
582
+ self.stop_button = QPushButton("Stop")
583
+ self.stop_button.clicked.connect(self.stop_playback)
584
+ playback_layout.addWidget(self.play_button)
585
+ playback_layout.addWidget(self.stop_button)
586
+ waveform_layout.addLayout(playback_layout)
587
+
588
+ # Load and Save Buttons
589
+ buttons_layout = QHBoxLayout()
590
+ load_audio_button = QPushButton("Load Audio")
591
+ load_audio_button.clicked.connect(self.load_audio)
592
+ load_transcript_button = QPushButton("Load Transcript")
593
+ load_transcript_button.clicked.connect(self.load_transcript)
594
+ save_button = QPushButton("Save Annotations")
595
+ save_button.clicked.connect(self.save_annotations)
596
+ recalc_utterances_button = QPushButton("Recalculate Utterances")
597
+ recalc_utterances_button.clicked.connect(self.recalculate_utterances)
598
+ buttons_layout.addWidget(load_audio_button)
599
+ buttons_layout.addWidget(load_transcript_button)
600
+ buttons_layout.addWidget(save_button)
601
+ buttons_layout.addWidget(recalc_utterances_button)
602
+ waveform_layout.addLayout(buttons_layout)
603
+
604
+ # Undo/Redo Buttons
605
+ undo_redo_layout = QHBoxLayout()
606
+ undo_button = QPushButton("Undo")
607
+ undo_button.clicked.connect(self.undo_stack.undo)
608
+ redo_button = QPushButton("Redo")
609
+ redo_button.clicked.connect(self.undo_stack.redo)
610
+ undo_redo_layout.addWidget(undo_button)
611
+ undo_redo_layout.addWidget(redo_button)
612
+ waveform_layout.addLayout(undo_redo_layout)
613
+
614
+ self.setCentralWidget(self.waveform_widget)
615
+
616
+ def setup_signals(self):
617
+ self.canvas.boundary_changed.connect(self.on_boundary_changed)
618
+ self.canvas.waveform_clicked.connect(self.on_waveform_clicked)
619
+ self.canvas.word_double_clicked.connect(self.on_word_double_clicked)
620
+ self.canvas.word_right_clicked.connect(self.on_word_right_clicked)
621
+ self.canvas.audio_loaded.connect(self.on_audio_loaded)
622
+ self.canvas.loading_error.connect(self.on_audio_load_error)
623
+
624
+ def load_audio(self):
625
+ options = QFileDialog.Options()
626
+ file_path, _ = QFileDialog.getOpenFileName(
627
+ self,
628
+ "Open Audio File",
629
+ "",
630
+ "Audio Files (*.wav *.mp3 *.flac *.ogg);;All Files (*)",
631
+ options=options,
632
+ )
633
+ if file_path:
634
+ self.canvas.load_audio(file_path)
635
+ # Load audio segment for playback
636
+ try:
637
+ self.audio_segment = AudioSegment.from_file(file_path).set_channels(1)
638
+ except Exception as e:
639
+ QMessageBox.critical(self, "Audio Load Error", f"Failed to load audio for playback:\n{str(e)}")
640
+
641
+ def on_audio_loaded(self):
642
+ self.statusBar().showMessage("Audio loaded successfully.", 5000)
643
+ self.canvas.adjust_view_range(self.current_time)
644
+
645
+ def on_audio_load_error(self, error_message):
646
+ QMessageBox.critical(self, "Audio Load Error", f"Failed to load audio file:\n{error_message}")
647
+ self.statusBar().showMessage("Failed to load audio.", 5000)
648
+
649
+ def load_transcript(self):
650
+ options = QFileDialog.Options()
651
+ file_path, _ = QFileDialog.getOpenFileName(
652
+ self,
653
+ "Open Transcript File",
654
+ "",
655
+ "JSON Files (*.json);;All Files (*)",
656
+ options=options,
657
+ )
658
+ if file_path:
659
+ try:
660
+ self.transcript = Transcript.from_json_file(file_path)
661
+ # Extract unique speakers
662
+ self.speakers = list(set(
663
+ word.get('speaker', '') for word in self.transcript.combined_data if word.get('speaker', '')
664
+ ))
665
+ self.canvas.load_words(self.transcript.combined_data)
666
+ self.canvas.load_utterances(self.transcript.combined_utterances)
667
+ print(f"Loaded transcript file: {file_path}")
668
+ except Exception as e:
669
+ QMessageBox.critical(self, "Error Loading Transcript", f"Failed to load transcript:\n{str(e)}")
670
+
671
+ def on_boundary_changed(self, idx, boundary_type, new_pos, old_pos):
672
+ if idx == -1:
673
+ return
674
+ command = MoveBoundaryCommand(self, idx, boundary_type, old_pos, new_pos)
675
+ self.undo_stack.push(command)
676
+ # Do not automatically update utterances
677
+ self.autosave()
678
+
679
+ def on_waveform_clicked(self, time):
680
+ self.current_time = time
681
+ self.canvas.update_playtime_line(self.current_time)
682
+
683
+ def find_word_at_time(self, time):
684
+ words = self.transcript.combined_data
685
+ for idx, word in enumerate(words):
686
+ start_time = float(word['start_time'])
687
+ end_time = float(word['end_time'])
688
+ if start_time <= time < end_time:
689
+ return idx, word
690
+ return None, None
691
+
692
+ def on_word_clicked(self, idx):
693
+ """
694
+ Triggered when a word label is clicked. Allows inline editing of the word.
695
+ """
696
+ word_data = self.transcript.combined_data[idx]
697
+ word_label = self.canvas.connecting_lines[idx]['label']
698
+
699
+ # Remove the current label from the canvas
700
+ self.canvas.plot_widget.removeItem(word_label)
701
+
702
+ # Calculate position for the QLineEdit
703
+ word_pos = word_label.pos()
704
+ word_text = word_data['word']
705
+
706
+ # Create a QLineEdit widget for inline editing
707
+ self.editing_line = QLineEdit(self.canvas)
708
+ self.editing_line.setText(word_text)
709
+ self.editing_line.setFixedWidth(150) # Set appropriate width
710
+ self.editing_line.setAlignment(Qt.AlignCenter)
711
+
712
+ # Map the label's position to the scene and move QLineEdit there
713
+ scene_pos = self.canvas.plot_widget.plotItem.vb.mapViewToScene(word_pos)
714
+ self.editing_line.move(scene_pos.toPoint())
715
+ self.editing_line.show()
716
+ self.editing_line.setFocus()
717
+
718
+ # Connect editing finished signal to finalize changes
719
+ self.editing_line.editingFinished.connect(lambda: self.finish_editing(idx))
720
+
721
+ def finish_editing(self, idx):
722
+ """
723
+ Finalizes word editing, updates the label, and restores functionality.
724
+ """
725
+ # Get the new word from QLineEdit
726
+ new_word = self.editing_line.text()
727
+
728
+ # Update the transcript data
729
+ old_word = self.transcript.combined_data[idx]['word']
730
+ if new_word != old_word:
731
+ command = EditWordCommand(self, idx, old_word, new_word)
732
+ self.undo_stack.push(command)
733
+
734
+ # Remove the QLineEdit
735
+ self.editing_line.deleteLater()
736
+ self.editing_line = None
737
+
738
+ # Create and add the updated label
739
+ word_label = pg.TextItem(new_word, anchor=(0.5, 0), color='white')
740
+ mid_x = (float(self.transcript.combined_data[idx]['start_time']) +
741
+ float(self.transcript.combined_data[idx]['end_time'])) / 2
742
+ word_label.setPos(mid_x, 0.6) # Adjust label position
743
+ word_label.mouseClickEvent = lambda ev, idx=idx: self.on_word_clicked(idx)
744
+ self.canvas.plot_widget.addItem(word_label)
745
+
746
+ # Update the canvas label reference
747
+ self.canvas.connecting_lines[idx]['label'] = word_label
748
+
749
+ # Trigger autosave or other required updates
750
+ self.autosave()
751
+
752
+ def on_word_double_clicked(self, time):
753
+ idx, word = self.find_word_at_time(time)
754
+ if word is not None:
755
+ new_word, ok = QInputDialog.getText(self, "Edit Word", "New word:", text=word['word'])
756
+ if ok and new_word != word['word']:
757
+ old_value = word['word']
758
+ command = EditWordCommand(self, idx, old_value, new_word)
759
+ self.undo_stack.push(command)
760
+ self.autosave()
761
+ else:
762
+ QMessageBox.information(self, "No Word", "No word found at this position.")
763
+
764
+ def on_word_right_clicked(self, time):
765
+ idx, word = self.find_word_at_time(time)
766
+ if word is not None:
767
+ menu = QMenu(self)
768
+ speakers = self.speakers + [""] # Add empty speaker option
769
+ for speaker in speakers:
770
+ display_text = speaker if speaker else "(No Speaker)"
771
+ action = QAction(display_text, self)
772
+ action.triggered.connect(lambda checked, s=speaker: self.set_word_speaker(idx, s))
773
+ menu.addAction(action)
774
+ menu.exec_(QCursor.pos())
775
+ else:
776
+ QMessageBox.information(self, "No Word", "No word found at this position.")
777
+
778
+ def set_word_speaker(self, idx, speaker):
779
+ word = self.transcript.combined_data[idx]
780
+ old_speaker = word.get('speaker', '')
781
+ if speaker != old_speaker:
782
+ command = EditSpeakerCommand(self, idx, old_speaker, speaker)
783
+ self.undo_stack.push(command)
784
+ if speaker and speaker not in self.speakers:
785
+ self.speakers.append(speaker)
786
+ self.autosave()
787
+
788
+ def toggle_playback(self):
789
+ if self.audio_segment is None:
790
+ QMessageBox.warning(self, "No Audio", "Please load an audio file first.")
791
+ return
792
+ if self.is_playing:
793
+ self.pause_playback()
794
+ else:
795
+ self.start_playback()
796
+
797
+ def start_playback(self):
798
+ if self.audio_segment is not None:
799
+ try:
800
+ start_ms = int(self.current_time * 1000)
801
+ sliced_audio = self.audio_segment[start_ms:]
802
+ self.play_obj = play_audio(sliced_audio)
803
+ self.is_playing = True
804
+ self.play_button.setText("Pause")
805
+ # Record the start time and position
806
+ self.playback_start_time = time.time()
807
+ self.playback_start_position = self.current_time
808
+ self.playback_timer = QTimer()
809
+ self.playback_timer.timeout.connect(self.update_current_time)
810
+ self.playback_timer.start(50) # Increased frequency for smoother updates
811
+ except Exception as e:
812
+ QMessageBox.critical(self, "Playback Error", f"Failed to play audio:\n{str(e)}")
813
+
814
+ def pause_playback(self):
815
+ if self.play_obj:
816
+ self.play_obj.stop()
817
+ self.play_obj = None
818
+ self.is_playing = False
819
+ self.play_button.setText("Play")
820
+ if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
821
+ self.playback_timer.stop()
822
+ # Update current_time to the actual playback position
823
+ elapsed_time = time.time() - self.playback_start_time
824
+ self.current_time = self.playback_start_position + elapsed_time
825
+
826
+ def stop_playback(self):
827
+ if self.play_obj:
828
+ self.play_obj.stop()
829
+ self.play_obj = None
830
+ self.is_playing = False
831
+ self.play_button.setText("Play")
832
+ self.current_time = 0.0
833
+ self.canvas.update_playtime_line(self.current_time)
834
+ if hasattr(self, 'playback_timer') and self.playback_timer.isActive():
835
+ self.playback_timer.stop()
836
+
837
+ def update_current_time(self):
838
+ elapsed_time = time.time() - self.playback_start_time
839
+ self.current_time = self.playback_start_position + elapsed_time
840
+ if self.current_time >= self.canvas.duration:
841
+ self.stop_playback()
842
+ return
843
+ self.canvas.update_playtime_line(self.current_time)
844
+
845
+ def recalculate_utterances(self):
846
+ if not self.transcript:
847
+ QMessageBox.warning(self, "No Transcript", "Please load a transcript first.")
848
+ return
849
+ try:
850
+ self.transcript.aggregate_to_utterances()
851
+ self.canvas.load_utterances(self.transcript.combined_utterances)
852
+ self.statusBar().showMessage("Utterances recalculated successfully.", 5000)
853
+ self.autosave()
854
+ except Exception as e:
855
+ QMessageBox.critical(self, "Aggregation Error", f"Failed to recalculate utterances:\n{str(e)}")
856
+
857
+ def autosave(self):
858
+ if self.transcript:
859
+ try:
860
+ self.transcript.save_as_json(self.temp_file_path)
861
+ print(f"Autosaved annotations to {self.temp_file_path}")
862
+ except Exception as e:
863
+ print(f"Autosave failed: {e}")
864
+
865
+ def load_autosave(self):
866
+ if os.path.exists(self.temp_file_path):
867
+ try:
868
+ self.transcript = Transcript.from_json_file(self.temp_file_path)
869
+ # Extract unique speakers
870
+ self.speakers = list(set(
871
+ word.get('speaker', '') for word in self.transcript.combined_data if word.get('speaker', '')
872
+ ))
873
+ self.canvas.load_words(self.transcript.combined_data)
874
+ self.canvas.load_utterances(self.transcript.combined_utterances)
875
+ QMessageBox.information(self, "Recovery", "Recovered annotations from autosave.")
876
+ except Exception as e:
877
+ print(f"Failed to recover autosave: {e}")
878
+
879
+ def closeEvent(self, event):
880
+ reply = QMessageBox.question(
881
+ self, 'Exit',
882
+ "Do you want to save your annotations before exiting?",
883
+ QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel,
884
+ QMessageBox.Yes
885
+ )
886
+
887
+ if reply == QMessageBox.Yes:
888
+ if not self.validate_annotations():
889
+ event.ignore()
890
+ return
891
+ self.save_annotations()
892
+ if os.path.exists(self.temp_file_path):
893
+ os.remove(self.temp_file_path)
894
+ event.accept()
895
+ elif reply == QMessageBox.No:
896
+ if os.path.exists(self.temp_file_path):
897
+ os.remove(self.temp_file_path)
898
+ event.accept()
899
+ else:
900
+ event.ignore()
901
+
902
+ def save_annotations(self):
903
+ if not self.validate_annotations():
904
+ return
905
+ options = QFileDialog.Options()
906
+ file_path, _ = QFileDialog.getSaveFileName(
907
+ self,
908
+ "Save Annotations",
909
+ "",
910
+ "JSON Files (*.json);;All Files (*)",
911
+ options=options,
912
+ )
913
+ if file_path:
914
+ try:
915
+ self.transcript.save_as_json(file_path)
916
+ QMessageBox.information(self, "Save Successful", f"Annotations saved to {file_path}")
917
+ except Exception as e:
918
+ QMessageBox.critical(self, "Save Error", f"Failed to save annotations:\n{str(e)}")
919
+
920
+ def validate_annotations(self):
921
+ words = self.transcript.combined_data
922
+ words_sorted = sorted(words, key=lambda w: float(w['start_time']))
923
+ for i, word in enumerate(words_sorted):
924
+ try:
925
+ start_time = float(word['start_time'])
926
+ end_time = float(word['end_time'])
927
+ if start_time > end_time:
928
+ QMessageBox.warning(self, "Invalid Annotation", f"Start time must be less than end time for word '{word['word']}'.")
929
+ return False
930
+ if i < len(words_sorted) - 1:
931
+ next_word = words_sorted[i + 1]
932
+ next_start = float(next_word['start_time'])
933
+ if end_time > next_start:
934
+ QMessageBox.warning(
935
+ self, "Invalid Annotation",
936
+ f"Annotations for words '{word['word']}' and '{next_word['word']}' overlap."
937
+ )
938
+ return False
939
+ except ValueError:
940
+ QMessageBox.warning(self, "Invalid Annotation", f"Non-numeric start or end time for word '{word['word']}'.")
941
+ return False
942
+ return True
943
+
944
+
945
+ # --- Main Execution ---
946
+
947
+ def main():
948
+ app = QApplication(sys.argv)
949
+ window = MainWindow()
950
+ window.show()
951
+ sys.exit(app.exec_())
952
+
953
+
954
+ if __name__ == "__main__":
955
+ main()