Anchor-annotator 0.8.2__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anchor/_version.py +2 -2
- anchor/command_line.py +1 -0
- anchor/main.py +113 -6
- anchor/models.py +402 -23
- anchor/plot.py +835 -104
- anchor/settings.py +6 -10
- anchor/ui_main_window.py +14 -8
- anchor/undo.py +682 -11
- anchor/widgets.py +303 -39
- anchor/workers.py +632 -351
- {anchor_annotator-0.8.2.dist-info → anchor_annotator-0.9.1.dist-info}/METADATA +1 -1
- anchor_annotator-0.9.1.dist-info/RECORD +22 -0
- anchor_annotator-0.8.2.dist-info/RECORD +0 -22
- {anchor_annotator-0.8.2.dist-info → anchor_annotator-0.9.1.dist-info}/WHEEL +0 -0
- {anchor_annotator-0.8.2.dist-info → anchor_annotator-0.9.1.dist-info}/licenses/LICENSE +0 -0
- {anchor_annotator-0.8.2.dist-info → anchor_annotator-0.9.1.dist-info}/top_level.txt +0 -0
anchor/workers.py
CHANGED
@@ -39,11 +39,11 @@ from montreal_forced_aligner.corpus.acoustic_corpus import (
|
|
39
39
|
)
|
40
40
|
from montreal_forced_aligner.corpus.classes import FileData
|
41
41
|
from montreal_forced_aligner.data import (
|
42
|
-
CtmInterval,
|
43
42
|
DatasetType,
|
44
43
|
DistanceMetric,
|
45
44
|
Language,
|
46
45
|
ManifoldAlgorithm,
|
46
|
+
PhoneType,
|
47
47
|
TextFileType,
|
48
48
|
WordType,
|
49
49
|
WorkflowType,
|
@@ -57,6 +57,8 @@ from montreal_forced_aligner.db import (
|
|
57
57
|
Phone,
|
58
58
|
PhoneInterval,
|
59
59
|
Pronunciation,
|
60
|
+
ReferencePhoneInterval,
|
61
|
+
ReferenceWordInterval,
|
60
62
|
SoundFile,
|
61
63
|
Speaker,
|
62
64
|
SpeakerOrdering,
|
@@ -94,7 +96,7 @@ from montreal_forced_aligner.vad.models import FOUND_SPEECHBRAIN, MfaVAD
|
|
94
96
|
from montreal_forced_aligner.vad.segmenter import TranscriptionSegmenter, VadSegmenter
|
95
97
|
from montreal_forced_aligner.validation.corpus_validator import PretrainedValidator
|
96
98
|
from PySide6 import QtCore
|
97
|
-
from sklearn import discriminant_analysis
|
99
|
+
from sklearn import discriminant_analysis
|
98
100
|
from sqlalchemy.orm import joinedload, selectinload, subqueryload
|
99
101
|
|
100
102
|
import anchor.db
|
@@ -106,26 +108,6 @@ if typing.TYPE_CHECKING:
|
|
106
108
|
logger = logging.getLogger("anchor")
|
107
109
|
|
108
110
|
|
109
|
-
@dataclassy.dataclass(slots=True)
|
110
|
-
class UtteranceData:
|
111
|
-
id: int
|
112
|
-
begin: float
|
113
|
-
end: float
|
114
|
-
channel: int
|
115
|
-
text: str
|
116
|
-
normalized_text: str
|
117
|
-
transcription_text: str
|
118
|
-
speaker_id: int
|
119
|
-
file_id: int
|
120
|
-
reference_phone_intervals: typing.List[CtmInterval]
|
121
|
-
aligned_word_intervals: typing.List[CtmInterval]
|
122
|
-
aligned_phone_intervals: typing.List[CtmInterval]
|
123
|
-
transcribed_word_intervals: typing.List[CtmInterval]
|
124
|
-
transcribed_phone_intervals: typing.List[CtmInterval]
|
125
|
-
per_speaker_transcribed_word_intervals: typing.List[CtmInterval]
|
126
|
-
per_speaker_transcribed_phone_intervals: typing.List[CtmInterval]
|
127
|
-
|
128
|
-
|
129
111
|
@dataclassy.dataclass
|
130
112
|
class SpeakerPlda:
|
131
113
|
test_ivectors: typing.List[DoubleVector]
|
@@ -664,6 +646,9 @@ class ExportFilesWorker(Worker):
|
|
664
646
|
if self.progress_callback is not None:
|
665
647
|
self.progress_callback.update_total(files.count())
|
666
648
|
for f in files:
|
649
|
+
if not f.utterances:
|
650
|
+
logger.debug(f"Skipping {f.name} for no utterances")
|
651
|
+
continue
|
667
652
|
if self.stopped.is_set():
|
668
653
|
session.rollback()
|
669
654
|
break
|
@@ -784,8 +769,8 @@ class ChangeSpeakerWorker(Worker):
|
|
784
769
|
per_utterance = isinstance(self.utterance_ids[0], list)
|
785
770
|
with self.session() as session:
|
786
771
|
try:
|
787
|
-
if (not per_utterance and self.new_speaker_id <= 0) or
|
788
|
-
x[-1] <= 0 for x in self.utterance_ids
|
772
|
+
if (not per_utterance and self.new_speaker_id <= 0) or (
|
773
|
+
per_utterance and any(x[-1] <= 0 for x in self.utterance_ids)
|
789
774
|
):
|
790
775
|
new_speaker_id = session.query(sqlalchemy.func.max(Speaker.id)).scalar() + 1
|
791
776
|
speaker = session.query(Speaker).get(self.old_speaker_id)
|
@@ -805,6 +790,8 @@ class ChangeSpeakerWorker(Worker):
|
|
805
790
|
)
|
806
791
|
)
|
807
792
|
session.flush()
|
793
|
+
else:
|
794
|
+
new_speaker_id = self.new_speaker_id
|
808
795
|
if not per_utterance:
|
809
796
|
utterance_ids = self.utterance_ids
|
810
797
|
if not utterance_ids:
|
@@ -1064,32 +1051,32 @@ class QueryUtterancesWorker(Worker):
|
|
1064
1051
|
c = session.query(Corpus).first()
|
1065
1052
|
count_only = self.kwargs.get("count", False)
|
1066
1053
|
has_ivectors = self.kwargs.get("has_ivectors", False)
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1054
|
+
filter_nulls = self.kwargs.get("filter_nulls", [])
|
1055
|
+
columns = [
|
1056
|
+
Utterance.id,
|
1057
|
+
Utterance.file_id,
|
1058
|
+
Utterance.speaker_id,
|
1059
|
+
Utterance.oovs,
|
1060
|
+
File.name,
|
1061
|
+
Speaker.name,
|
1062
|
+
Utterance.begin,
|
1063
|
+
Utterance.end,
|
1064
|
+
Utterance.duration,
|
1065
|
+
Utterance.text,
|
1066
|
+
Utterance.alignment_log_likelihood,
|
1067
|
+
Utterance.speech_log_likelihood,
|
1068
|
+
Utterance.duration_deviation,
|
1069
|
+
Utterance.snr,
|
1070
|
+
Utterance.phone_error_rate,
|
1071
|
+
Utterance.alignment_score,
|
1072
|
+
Utterance.transcription_text,
|
1073
|
+
Utterance.word_error_rate,
|
1074
|
+
]
|
1075
|
+
if has_ivectors and c.utterance_ivector_column is not None:
|
1076
|
+
columns.append(
|
1077
|
+
c.utterance_ivector_column.cosine_distance(c.speaker_ivector_column)
|
1078
|
+
)
|
1079
|
+
columns.append(Utterance.diarization_variance)
|
1093
1080
|
speaker_filter = self.kwargs.get("speaker_filter", None)
|
1094
1081
|
file_filter = self.kwargs.get("file_filter", None)
|
1095
1082
|
text_filter: TextFilterQuery = self.kwargs.get("text_filter", None)
|
@@ -1115,6 +1102,10 @@ class QueryUtterancesWorker(Worker):
|
|
1115
1102
|
text_column = Utterance.text
|
1116
1103
|
filter_regex = text_filter.generate_expression(posix=True)
|
1117
1104
|
utterances = utterances.filter(text_column.op("~")(filter_regex))
|
1105
|
+
for i, null_check in enumerate(filter_nulls):
|
1106
|
+
if null_check:
|
1107
|
+
column = columns[i + 3]
|
1108
|
+
utterances = utterances.filter(column != None) # noqa
|
1118
1109
|
if count_only:
|
1119
1110
|
try:
|
1120
1111
|
return utterances.count()
|
@@ -1181,9 +1172,9 @@ class QuerySpeakersWorker(Worker):
|
|
1181
1172
|
Speaker.num_utterances,
|
1182
1173
|
Speaker.dictionary_id,
|
1183
1174
|
]
|
1184
|
-
if speaker_filter
|
1175
|
+
if not speaker_filter:
|
1185
1176
|
columns.append(
|
1186
|
-
sqlalchemy.func.
|
1177
|
+
sqlalchemy.func.max(
|
1187
1178
|
c.utterance_ivector_column.cosine_distance(c.speaker_ivector_column)
|
1188
1179
|
)
|
1189
1180
|
)
|
@@ -1257,18 +1248,29 @@ class ClusterSpeakerUtterancesWorker(Worker):
|
|
1257
1248
|
c = session.query(Corpus).first()
|
1258
1249
|
speaker_name, ivector, utt_count = (
|
1259
1250
|
session.query(Speaker.name, c.speaker_ivector_column, Speaker.num_utterances)
|
1260
|
-
.filter(
|
1251
|
+
.filter(
|
1252
|
+
Speaker.id == self.speaker_ids[0], c.utterance_ivector_column != None # noqa
|
1253
|
+
)
|
1261
1254
|
.first()
|
1262
1255
|
)
|
1263
1256
|
if utt_count < 1:
|
1264
1257
|
return None
|
1265
|
-
query =
|
1266
|
-
|
1258
|
+
query = (
|
1259
|
+
session.query(
|
1260
|
+
Utterance.speaker_id,
|
1261
|
+
c.utterance_ivector_column.cosine_distance(c.speaker_ivector_column),
|
1262
|
+
)
|
1263
|
+
.join(Utterance.speaker)
|
1264
|
+
.filter(c.utterance_ivector_column != None) # noqa
|
1267
1265
|
)
|
1268
1266
|
query = query.filter(Utterance.speaker_id.in_(self.speaker_ids))
|
1269
1267
|
query = query.order_by(Utterance.id)
|
1270
1268
|
additional_data = (
|
1271
|
-
session.query(
|
1269
|
+
session.query(
|
1270
|
+
Utterance.speaker_id,
|
1271
|
+
c.utterance_ivector_column.cosine_distance(c.speaker_ivector_column),
|
1272
|
+
)
|
1273
|
+
.join(Utterance.speaker)
|
1272
1274
|
.filter(
|
1273
1275
|
c.utterance_ivector_column != None, # noqa
|
1274
1276
|
)
|
@@ -1281,8 +1283,18 @@ class ClusterSpeakerUtterancesWorker(Worker):
|
|
1281
1283
|
additional_data = additional_data.order_by(
|
1282
1284
|
c.utterance_ivector_column.cosine_distance(ivector)
|
1283
1285
|
).limit(min(query.count(), self.limit))
|
1284
|
-
cluster_ids =
|
1285
|
-
|
1286
|
+
cluster_ids = []
|
1287
|
+
distances = []
|
1288
|
+
for speaker_id, distance in query:
|
1289
|
+
cluster_ids.append(speaker_id)
|
1290
|
+
distances.append(distance)
|
1291
|
+
for speaker_id, distance in additional_data:
|
1292
|
+
cluster_ids.append(speaker_id)
|
1293
|
+
distances.append(distance)
|
1294
|
+
cluster_ids = np.array(cluster_ids)
|
1295
|
+
distances = np.array(distances)
|
1296
|
+
distances = (distances - distances.min()) / distances.max()
|
1297
|
+
return self.speaker_ids, cluster_ids, distances
|
1286
1298
|
|
1287
1299
|
|
1288
1300
|
class CalculateSpeakerIvectorsWorker(Worker):
|
@@ -1308,7 +1320,9 @@ class CalculateSpeakerIvectorsWorker(Worker):
|
|
1308
1320
|
c = session.query(Corpus).first()
|
1309
1321
|
speaker_name, ivector, utt_count = (
|
1310
1322
|
session.query(Speaker.name, c.speaker_ivector_column, Speaker.num_utterances)
|
1311
|
-
.filter(
|
1323
|
+
.filter(
|
1324
|
+
Speaker.id == self.speaker_ids[0], c.utterance_ivector_column != None # noqa
|
1325
|
+
)
|
1312
1326
|
.first()
|
1313
1327
|
)
|
1314
1328
|
if utt_count < 1:
|
@@ -1389,7 +1403,9 @@ class SpeakerMdsWorker(Worker):
|
|
1389
1403
|
dim = IVECTOR_DIMENSION
|
1390
1404
|
speaker_name, ivector, utt_count = (
|
1391
1405
|
session.query(Speaker.name, c.speaker_ivector_column, Speaker.num_utterances)
|
1392
|
-
.filter(
|
1406
|
+
.filter(
|
1407
|
+
Speaker.id == self.speaker_ids[0], c.utterance_ivector_column != None # noqa
|
1408
|
+
)
|
1393
1409
|
.first()
|
1394
1410
|
)
|
1395
1411
|
query = (
|
@@ -1442,17 +1458,17 @@ class SpeakerMdsWorker(Worker):
|
|
1442
1458
|
(num_utterances + additional_data.count() + self.limit,), dtype="int32"
|
1443
1459
|
)
|
1444
1460
|
ivectors = np.array(self.plda.transform_ivectors(ivectors, counts))
|
1445
|
-
metric_type = DistanceMetric.cosine
|
1461
|
+
self.metric_type = DistanceMetric.cosine
|
1446
1462
|
if ivectors.shape[0] <= self.perplexity:
|
1447
|
-
perplexity = ivectors.shape[0] - 1
|
1463
|
+
self.perplexity = ivectors.shape[0] - 1
|
1448
1464
|
if self.speaker_space is not None:
|
1449
1465
|
points = self.speaker_space.transform(ivectors)
|
1450
1466
|
else:
|
1451
1467
|
points = visualize_clusters(
|
1452
1468
|
ivectors,
|
1453
1469
|
ManifoldAlgorithm.tsne,
|
1454
|
-
metric_type,
|
1455
|
-
perplexity,
|
1470
|
+
self.metric_type,
|
1471
|
+
self.perplexity,
|
1456
1472
|
self.plda,
|
1457
1473
|
quick=False,
|
1458
1474
|
)
|
@@ -1460,37 +1476,42 @@ class SpeakerMdsWorker(Worker):
|
|
1460
1476
|
return self.speaker_ids, points
|
1461
1477
|
|
1462
1478
|
|
1463
|
-
class
|
1479
|
+
class AlignmentAnalysisWorker(Worker):
|
1464
1480
|
def __init__(
|
1465
1481
|
self,
|
1466
1482
|
session,
|
1467
1483
|
use_mp=False,
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1471
|
-
|
1472
|
-
|
1473
|
-
|
1484
|
+
speaker_id: int = None,
|
1485
|
+
phone_id: int = None,
|
1486
|
+
word_filter: TextFilterQuery = None,
|
1487
|
+
less_than: float = None,
|
1488
|
+
greater_than: float = None,
|
1489
|
+
measure: str = "duration",
|
1490
|
+
exclude_manual: bool = False,
|
1491
|
+
word_mode: bool = False,
|
1492
|
+
relative_duration: bool = False,
|
1474
1493
|
limit: int = 100,
|
1494
|
+
current_offset: int = 0,
|
1495
|
+
sort_index: int = None,
|
1496
|
+
sort_desc: bool = False,
|
1475
1497
|
**kwargs,
|
1476
1498
|
):
|
1477
1499
|
super().__init__(use_mp=use_mp, **kwargs)
|
1478
1500
|
self.session = session
|
1479
|
-
self.
|
1480
|
-
self.
|
1481
|
-
self.
|
1482
|
-
self.
|
1483
|
-
self.
|
1484
|
-
self.
|
1485
|
-
self.
|
1501
|
+
self.speaker_id = speaker_id
|
1502
|
+
self.phone_id = phone_id
|
1503
|
+
self.less_than = less_than
|
1504
|
+
self.greater_than = greater_than
|
1505
|
+
self.measure = measure
|
1506
|
+
self.word_filter = word_filter
|
1507
|
+
self.exclude_manual = exclude_manual
|
1508
|
+
self.word_mode = word_mode
|
1509
|
+
self.relative_duration = relative_duration
|
1486
1510
|
|
1487
|
-
|
1488
|
-
|
1489
|
-
|
1490
|
-
|
1491
|
-
if self.metric is DistanceMetric.plda:
|
1492
|
-
if self.plda is None:
|
1493
|
-
self.metric = DistanceMetric.cosine
|
1511
|
+
self.limit = limit
|
1512
|
+
self.current_offset = current_offset
|
1513
|
+
self.sort_index = sort_index
|
1514
|
+
self.sort_desc = sort_desc
|
1494
1515
|
|
1495
1516
|
def _run(self):
|
1496
1517
|
count_only = self.kwargs.get("count", False)
|
@@ -1498,137 +1519,164 @@ class SpeakerDiarizationWorker(Worker):
|
|
1498
1519
|
self.progress_callback.update_total(self.limit)
|
1499
1520
|
|
1500
1521
|
with self.session() as session:
|
1501
|
-
|
1502
|
-
|
1522
|
+
indices = []
|
1523
|
+
file_indices = []
|
1503
1524
|
speaker_indices = []
|
1504
1525
|
utterance_ids = []
|
1526
|
+
reversed_indices = {}
|
1505
1527
|
data = []
|
1506
|
-
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1528
|
+
if not self.word_mode:
|
1529
|
+
if not count_only and self.relative_duration:
|
1530
|
+
duration_column = sqlalchemy.sql.label(
|
1531
|
+
"duration",
|
1532
|
+
(PhoneInterval.duration - Phone.mean_duration) / Phone.sd_duration,
|
1533
|
+
)
|
1534
|
+
else:
|
1535
|
+
duration_column = PhoneInterval.duration
|
1536
|
+
goodness_column = PhoneInterval.phone_goodness
|
1537
|
+
columns = [
|
1538
|
+
PhoneInterval.id,
|
1539
|
+
PhoneInterval.utterance_id,
|
1540
|
+
Utterance.file_id,
|
1541
|
+
Utterance.speaker_id,
|
1542
|
+
Utterance.begin,
|
1543
|
+
Utterance.end,
|
1544
|
+
File.name,
|
1545
|
+
Speaker.name,
|
1546
|
+
Phone.phone,
|
1547
|
+
duration_column,
|
1548
|
+
goodness_column,
|
1549
|
+
Word.word,
|
1550
|
+
]
|
1551
|
+
query = (
|
1552
|
+
session.query(*columns)
|
1553
|
+
.join(PhoneInterval.utterance)
|
1554
|
+
.join(PhoneInterval.phone)
|
1555
|
+
.join(Utterance.speaker)
|
1556
|
+
.join(Utterance.file)
|
1557
|
+
.join(PhoneInterval.word_interval)
|
1558
|
+
.join(WordInterval.word)
|
1559
|
+
)
|
1514
1560
|
else:
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
if self.threshold is None:
|
1521
|
-
query = query.limit(self.limit).offset(self.kwargs.get("current_offset", 0))
|
1522
|
-
found = set()
|
1523
|
-
for speaker_id, ivector, speaker_name, num_utterances in query:
|
1524
|
-
if self.stopped is not None and self.stopped.is_set():
|
1525
|
-
break
|
1526
|
-
if self.metric is DistanceMetric.plda:
|
1527
|
-
kaldi_ivector = DoubleVector()
|
1528
|
-
kaldi_ivector.from_numpy(ivector)
|
1529
|
-
ivector_normalize_length(kaldi_ivector)
|
1530
|
-
kaldi_ivector = self.plda.transform_ivector(kaldi_ivector, num_utterances)
|
1531
|
-
index, distance = self.plda.classify_utterance(
|
1532
|
-
kaldi_ivector, self.speaker_plda.test_ivectors, self.speaker_plda.counts
|
1561
|
+
if not count_only and self.relative_duration:
|
1562
|
+
duration_column = sqlalchemy.sql.label(
|
1563
|
+
"duration",
|
1564
|
+
sqlalchemy.func.sum(PhoneInterval.duration)
|
1565
|
+
/ sqlalchemy.func.sum(Phone.mean_duration),
|
1533
1566
|
)
|
1534
|
-
suggested_name = self.speaker_plda.suggested_names[index]
|
1535
|
-
suggested_count = self.speaker_plda.counts[index]
|
1536
|
-
suggested_id = self.speaker_plda.suggested_ids[index]
|
1537
|
-
if suggested_id == speaker_id:
|
1538
|
-
continue
|
1539
|
-
if self.threshold is not None and distance < self.threshold:
|
1540
|
-
continue
|
1541
1567
|
else:
|
1542
|
-
|
1543
|
-
|
1568
|
+
duration_column = sqlalchemy.func.avg(PhoneInterval.duration)
|
1569
|
+
goodness_column = sqlalchemy.func.min(PhoneInterval.phone_goodness)
|
1570
|
+
columns = [
|
1571
|
+
WordInterval.id,
|
1572
|
+
WordInterval.utterance_id,
|
1573
|
+
Utterance.file_id,
|
1574
|
+
Utterance.speaker_id,
|
1575
|
+
Utterance.begin,
|
1576
|
+
Utterance.end,
|
1577
|
+
File.name,
|
1578
|
+
Speaker.name,
|
1579
|
+
sqlalchemy.func.string_agg(
|
1580
|
+
Phone.phone,
|
1581
|
+
sqlalchemy.dialects.postgresql.aggregate_order_by(
|
1582
|
+
sqlalchemy.literal_column("' '"), PhoneInterval.begin
|
1583
|
+
),
|
1584
|
+
),
|
1585
|
+
duration_column,
|
1586
|
+
goodness_column,
|
1587
|
+
Word.word,
|
1588
|
+
]
|
1589
|
+
query = (
|
1590
|
+
session.query(*columns)
|
1591
|
+
.join(PhoneInterval.utterance)
|
1592
|
+
.join(PhoneInterval.phone)
|
1593
|
+
.join(Utterance.speaker)
|
1594
|
+
.join(Utterance.file)
|
1595
|
+
.join(PhoneInterval.word_interval)
|
1596
|
+
.join(WordInterval.word)
|
1597
|
+
.group_by(
|
1598
|
+
WordInterval.id,
|
1599
|
+
Utterance.id,
|
1600
|
+
Utterance.file_id,
|
1601
|
+
Utterance.speaker_id,
|
1602
|
+
Utterance.begin,
|
1603
|
+
Utterance.end,
|
1604
|
+
File.name,
|
1544
1605
|
Speaker.name,
|
1545
|
-
|
1546
|
-
c.speaker_ivector_column.cosine_distance(ivector),
|
1547
|
-
).filter(
|
1548
|
-
Speaker.id != speaker_id,
|
1549
|
-
# Speaker.num_utterances <= 200
|
1606
|
+
Word.word,
|
1550
1607
|
)
|
1551
|
-
|
1552
|
-
suggested_speaker_query = suggested_speaker_query.filter(
|
1553
|
-
Speaker.num_utterances > 1
|
1554
|
-
)
|
1555
|
-
suggested_speaker_query = suggested_speaker_query.order_by(
|
1556
|
-
c.speaker_ivector_column.cosine_distance(ivector)
|
1557
|
-
).limit(1)
|
1558
|
-
r = suggested_speaker_query.first()
|
1559
|
-
if r is None:
|
1560
|
-
continue
|
1561
|
-
suggested_id, suggested_name, suggested_count, distance = r
|
1562
|
-
if (suggested_id, speaker_id) in found or (speaker_id, suggested_id) in found:
|
1563
|
-
continue
|
1564
|
-
if self.use_silhouette:
|
1565
|
-
utterance_query = (
|
1566
|
-
session.query(Utterance.speaker_id, c.utterance_ivector_column)
|
1567
|
-
.filter(Utterance.speaker_id.in_([speaker_id, suggested_id]))
|
1568
|
-
.filter(c.utterance_ivector_column != None) # noqa
|
1569
|
-
)
|
1570
|
-
ivectors = []
|
1571
|
-
labels = []
|
1572
|
-
for speaker_id, utterance_ivector in utterance_query:
|
1573
|
-
labels.append(speaker_id)
|
1574
|
-
ivectors.append(utterance_ivector)
|
1575
|
-
ivectors = np.array(ivectors)
|
1576
|
-
if self.metric is DistanceMetric.cosine:
|
1577
|
-
ivectors = preprocessing.normalize(ivectors, norm="l2")
|
1578
|
-
self.metric = "euclidean"
|
1579
|
-
distance = metrics.silhouette_score(ivectors, labels, metric=self.metric)
|
1580
|
-
if self.threshold is not None:
|
1581
|
-
if distance is not None and distance > self.threshold:
|
1582
|
-
continue
|
1583
|
-
if distance is None:
|
1584
|
-
continue
|
1585
|
-
if self.progress_callback is not None:
|
1586
|
-
self.progress_callback.increment_progress(1)
|
1608
|
+
)
|
1587
1609
|
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
found.add((suggested_id, speaker_id))
|
1592
|
-
suggested_indices.append(suggested_id)
|
1593
|
-
speaker_indices.append(speaker_id)
|
1594
|
-
data.append(
|
1595
|
-
[
|
1596
|
-
utterance_name,
|
1597
|
-
suggested_name,
|
1598
|
-
suggested_count,
|
1599
|
-
speaker_name,
|
1600
|
-
num_utterances,
|
1601
|
-
distance,
|
1602
|
-
]
|
1603
|
-
)
|
1610
|
+
if self.speaker_id is not None:
|
1611
|
+
if isinstance(self.speaker_id, int):
|
1612
|
+
query = query.filter(Utterance.speaker_id == self.speaker_id)
|
1604
1613
|
else:
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1616
|
-
|
1617
|
-
|
1618
|
-
|
1619
|
-
|
1620
|
-
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
1614
|
+
query = query.filter(Speaker.name == self.speaker_id)
|
1615
|
+
if self.phone_id is not None:
|
1616
|
+
if isinstance(self.phone_id, int):
|
1617
|
+
query = query.filter(PhoneInterval.phone_id == self.phone_id)
|
1618
|
+
else:
|
1619
|
+
query = query.filter(Phone.phone == self.phone_id)
|
1620
|
+
else:
|
1621
|
+
query = query.filter(Phone.phone_type.in_([PhoneType.non_silence]))
|
1622
|
+
if self.exclude_manual:
|
1623
|
+
query = query.filter(Utterance.manual_alignments == False) # noqa
|
1624
|
+
if self.measure == "duration":
|
1625
|
+
measure_column = duration_column
|
1626
|
+
else:
|
1627
|
+
measure_column = goodness_column
|
1628
|
+
if self.less_than is not None or self.greater_than is not None:
|
1629
|
+
if self.less_than is not None:
|
1630
|
+
query = query.filter(measure_column < self.less_than)
|
1631
|
+
if self.greater_than is not None:
|
1632
|
+
query = query.filter(measure_column > self.greater_than)
|
1633
|
+
if self.word_filter is not None and self.word_filter.text:
|
1634
|
+
filter_regex = self.word_filter.generate_expression(posix=True)
|
1635
|
+
query = query.filter(Word.word.op("~")(filter_regex))
|
1636
|
+
if count_only:
|
1637
|
+
try:
|
1638
|
+
return query.count()
|
1639
|
+
except psycopg2.errors.InvalidRegularExpression:
|
1640
|
+
return 0
|
1641
|
+
if self.sort_index is not None and self.sort_index + 6 <= len(columns) - 1:
|
1642
|
+
sort_column = columns[self.sort_index + 6]
|
1643
|
+
if self.sort_desc:
|
1644
|
+
sort_column = sort_column.desc()
|
1645
|
+
query = query.order_by(sort_column, Utterance.id, PhoneInterval.begin)
|
1646
|
+
else:
|
1647
|
+
if self.word_mode:
|
1648
|
+
query = query.order_by(duration_column, Utterance.id, WordInterval.id)
|
1649
|
+
else:
|
1650
|
+
query = query.order_by(duration_column, Utterance.id, PhoneInterval.begin)
|
1651
|
+
query = query.limit(self.limit).offset(self.current_offset)
|
1652
|
+
try:
|
1653
|
+
for i, u in enumerate(query):
|
1654
|
+
if self.stopped is not None and self.stopped.is_set():
|
1655
|
+
return
|
1656
|
+
phone_interval_id = u[0]
|
1657
|
+
utterance_id = u[1]
|
1658
|
+
file_id = u[2]
|
1659
|
+
speaker_id = u[3]
|
1660
|
+
begin = u[4]
|
1661
|
+
end = u[5]
|
1662
|
+
file_name = u[6]
|
1663
|
+
indices.append(phone_interval_id)
|
1664
|
+
reversed_indices[phone_interval_id] = i
|
1665
|
+
file_indices.append(file_id)
|
1666
|
+
speaker_indices.append(speaker_id)
|
1629
1667
|
|
1668
|
+
utterance_ids.append(utterance_id)
|
1669
|
+
utterance_name = f"{file_name} ({begin:.3f}-{end:.3f})"
|
1670
|
+
data.append([utterance_name, *u[7:]])
|
1671
|
+
if self.progress_callback is not None:
|
1672
|
+
self.progress_callback.increment_progress(1)
|
1673
|
+
|
1674
|
+
except psycopg2.errors.InvalidRegularExpression:
|
1675
|
+
pass
|
1676
|
+
return data, indices, utterance_ids, file_indices, speaker_indices, reversed_indices
|
1630
1677
|
|
1631
|
-
|
1678
|
+
|
1679
|
+
class SpeakerDiarizationWorker(Worker):
|
1632
1680
|
def __init__(
|
1633
1681
|
self,
|
1634
1682
|
session,
|
@@ -1642,8 +1690,8 @@ class SpeakerUtterancesWorker(Worker):
|
|
1642
1690
|
speaker_plda: SpeakerPlda = None,
|
1643
1691
|
limit: int = 100,
|
1644
1692
|
inverted: bool = False,
|
1693
|
+
utterance_based: bool = False,
|
1645
1694
|
text_filter: TextFilterQuery = None,
|
1646
|
-
in_speakers: bool = False,
|
1647
1695
|
**kwargs,
|
1648
1696
|
):
|
1649
1697
|
super().__init__(use_mp=use_mp, **kwargs)
|
@@ -1657,8 +1705,8 @@ class SpeakerUtterancesWorker(Worker):
|
|
1657
1705
|
self.speaker_plda = speaker_plda
|
1658
1706
|
self.limit = limit
|
1659
1707
|
self.inverted = inverted
|
1708
|
+
self.utterance_based = utterance_based
|
1660
1709
|
self.text_filter = text_filter
|
1661
|
-
self.in_speakers = in_speakers
|
1662
1710
|
|
1663
1711
|
if isinstance(self.metric, str):
|
1664
1712
|
self.metric = DistanceMetric[self.metric]
|
@@ -1684,6 +1732,8 @@ class SpeakerUtterancesWorker(Worker):
|
|
1684
1732
|
and self.speaker_plda is None
|
1685
1733
|
):
|
1686
1734
|
speaker_plda = load_speaker_plda(session, self.plda, minimum_count=2)
|
1735
|
+
elif self.speaker_plda is not None:
|
1736
|
+
speaker_plda = self.speaker_plda
|
1687
1737
|
|
1688
1738
|
if self.reference_utterance_id is not None:
|
1689
1739
|
utterance_query = (
|
@@ -2098,7 +2148,6 @@ class SpeakerUtterancesWorker(Worker):
|
|
2098
2148
|
query = query.filter(
|
2099
2149
|
c.utterance_ivector_column.cosine_distance(ivector) <= self.threshold
|
2100
2150
|
)
|
2101
|
-
|
2102
2151
|
if count_only:
|
2103
2152
|
return query.count()
|
2104
2153
|
if self.text_filter is None or not self.text_filter.text:
|
@@ -2147,33 +2196,52 @@ class SpeakerUtterancesWorker(Worker):
|
|
2147
2196
|
distance,
|
2148
2197
|
]
|
2149
2198
|
)
|
2150
|
-
|
2199
|
+
else:
|
2151
2200
|
query = (
|
2152
2201
|
session.query(
|
2153
|
-
|
2202
|
+
Utterance.id,
|
2203
|
+
File.id,
|
2204
|
+
File.name,
|
2205
|
+
Utterance.begin,
|
2206
|
+
Utterance.end,
|
2207
|
+
c.utterance_ivector_column,
|
2208
|
+
Speaker.name,
|
2209
|
+
Speaker.id,
|
2210
|
+
Speaker.num_utterances,
|
2154
2211
|
)
|
2155
|
-
.
|
2156
|
-
.
|
2212
|
+
.join(Utterance.file)
|
2213
|
+
.join(Utterance.speaker)
|
2214
|
+
.filter(c.utterance_ivector_column != None) # noqa
|
2215
|
+
.filter(Speaker.num_utterances == 1)
|
2157
2216
|
)
|
2158
2217
|
if self.text_filter is not None and self.text_filter.text:
|
2159
2218
|
filter_regex = self.text_filter.generate_expression(posix=True)
|
2160
|
-
query = query.
|
2161
|
-
query = query.filter(Utterance.text.op("~")(filter_regex)).distinct()
|
2219
|
+
query = query.filter(Utterance.text.op("~")(filter_regex))
|
2162
2220
|
if count_only:
|
2163
2221
|
return query.count()
|
2164
|
-
|
2165
|
-
# query = query.order_by(c.utterance_ivector_column.cosine_distance(c.speaker_ivector_column).desc())
|
2166
|
-
query = query.order_by(sqlalchemy.func.random())
|
2167
|
-
# query = query.order_by(Utterance.duration.desc())
|
2222
|
+
query = query.order_by(sqlalchemy.func.random())
|
2168
2223
|
|
2169
2224
|
if self.threshold is None:
|
2170
2225
|
query = query.limit(self.limit).offset(self.kwargs.get("current_offset", 0))
|
2171
|
-
|
2226
|
+
# else:
|
2227
|
+
# query = query.limit(limit*100)
|
2228
|
+
for (
|
2229
|
+
utt_id,
|
2230
|
+
file_id,
|
2231
|
+
file_name,
|
2232
|
+
begin,
|
2233
|
+
end,
|
2234
|
+
ivector,
|
2235
|
+
speaker_name,
|
2236
|
+
speaker_id,
|
2237
|
+
speaker_num_utterances,
|
2238
|
+
) in query:
|
2172
2239
|
if self.stopped is not None and self.stopped.is_set():
|
2173
2240
|
break
|
2174
2241
|
if self.metric is DistanceMetric.plda:
|
2175
2242
|
kaldi_ivector = DoubleVector()
|
2176
2243
|
kaldi_ivector.from_numpy(ivector)
|
2244
|
+
ivector_normalize_length(kaldi_ivector)
|
2177
2245
|
kaldi_ivector = self.plda.transform_ivector(kaldi_ivector, 1)
|
2178
2246
|
index, distance = self.plda.classify_utterance(
|
2179
2247
|
kaldi_ivector, speaker_plda.test_ivectors, speaker_plda.counts
|
@@ -2186,67 +2254,257 @@ class SpeakerUtterancesWorker(Worker):
|
|
2186
2254
|
if self.threshold is not None and distance < self.threshold:
|
2187
2255
|
continue
|
2188
2256
|
else:
|
2189
|
-
|
2190
|
-
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2194
|
-
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2198
|
-
|
2199
|
-
|
2200
|
-
|
2201
|
-
|
2202
|
-
|
2257
|
+
if self.utterance_based:
|
2258
|
+
sub_query = (
|
2259
|
+
session.query(
|
2260
|
+
Speaker.id,
|
2261
|
+
Speaker.name,
|
2262
|
+
Speaker.num_utterances,
|
2263
|
+
c.speaker_ivector_column.cosine_distance(ivector).label(
|
2264
|
+
"distance"
|
2265
|
+
),
|
2266
|
+
)
|
2267
|
+
.join(Speaker.utterances)
|
2268
|
+
.filter(
|
2269
|
+
Speaker.id != speaker_id,
|
2270
|
+
)
|
2271
|
+
.order_by(c.utterance_ivector_column.cosine_distance(ivector))
|
2272
|
+
.limit(100)
|
2273
|
+
.subquery()
|
2274
|
+
)
|
2275
|
+
|
2276
|
+
suggested_speaker_query = (
|
2277
|
+
session.query(
|
2278
|
+
sub_query.c.id,
|
2279
|
+
sub_query.c.name,
|
2280
|
+
sub_query.c.num_utterances,
|
2281
|
+
sub_query.c.distance,
|
2282
|
+
)
|
2283
|
+
.group_by(
|
2284
|
+
sub_query.c.id,
|
2285
|
+
sub_query.c.name,
|
2286
|
+
sub_query.c.num_utterances,
|
2287
|
+
sub_query.c.distance,
|
2288
|
+
)
|
2289
|
+
.order_by(sqlalchemy.func.count(sub_query.c.id).desc())
|
2290
|
+
)
|
2291
|
+
|
2292
|
+
else:
|
2293
|
+
suggested_speaker_query = session.query(
|
2294
|
+
Speaker.id,
|
2295
|
+
Speaker.name,
|
2296
|
+
Speaker.num_utterances,
|
2297
|
+
c.speaker_ivector_column.cosine_distance(ivector),
|
2298
|
+
).filter(
|
2299
|
+
Speaker.id != speaker_id,
|
2300
|
+
)
|
2301
|
+
suggested_speaker_query = suggested_speaker_query.order_by(
|
2302
|
+
c.speaker_ivector_column.cosine_distance(ivector)
|
2303
|
+
).limit(5)
|
2304
|
+
r = suggested_speaker_query.all()
|
2305
|
+
if not r:
|
2203
2306
|
continue
|
2204
|
-
suggested_id
|
2307
|
+
suggested_id = []
|
2308
|
+
suggested_name = []
|
2309
|
+
suggested_count = []
|
2310
|
+
distance = []
|
2311
|
+
for s_id, s_name, s_count, d in r:
|
2312
|
+
suggested_id.append(s_id)
|
2313
|
+
suggested_name.append(s_name)
|
2314
|
+
suggested_count.append(s_count)
|
2315
|
+
distance.append(d)
|
2316
|
+
if len(suggested_id) == 1:
|
2317
|
+
suggested_id = suggested_id[0]
|
2318
|
+
suggested_name = suggested_name[0]
|
2319
|
+
suggested_count = suggested_count[0]
|
2320
|
+
distance = distance[0]
|
2205
2321
|
if self.threshold is not None:
|
2206
|
-
if distance
|
2322
|
+
if isinstance(distance, list) and distance[0] > self.threshold:
|
2323
|
+
continue
|
2324
|
+
elif (
|
2325
|
+
not isinstance(distance, list)
|
2326
|
+
and distance is not None
|
2327
|
+
and distance > self.threshold
|
2328
|
+
):
|
2207
2329
|
continue
|
2208
2330
|
if distance is None:
|
2209
2331
|
continue
|
2210
2332
|
if self.progress_callback is not None:
|
2211
2333
|
self.progress_callback.increment_progress(1)
|
2212
2334
|
|
2213
|
-
utterance_ids.append(
|
2335
|
+
utterance_ids.append(utt_id)
|
2214
2336
|
suggested_indices.append(suggested_id)
|
2215
2337
|
speaker_indices.append(speaker_id)
|
2216
|
-
utterance_name = ""
|
2338
|
+
utterance_name = f"{file_name} ({begin:.3f}-{end:.3f})"
|
2217
2339
|
data.append(
|
2218
2340
|
[
|
2219
2341
|
utterance_name,
|
2220
2342
|
suggested_name,
|
2221
2343
|
suggested_count,
|
2222
2344
|
speaker_name,
|
2223
|
-
|
2345
|
+
speaker_num_utterances,
|
2224
2346
|
distance,
|
2225
2347
|
]
|
2226
2348
|
)
|
2227
2349
|
if len(data) >= self.limit:
|
2228
2350
|
break
|
2351
|
+
d = np.array([x[-1] if not isinstance(x[-1], list) else x[-1][0] for x in data])
|
2352
|
+
if self.metric is DistanceMetric.plda:
|
2353
|
+
d *= -1
|
2354
|
+
indices = np.argsort(d)
|
2355
|
+
utterance_ids = [utterance_ids[x] for x in indices]
|
2356
|
+
suggested_indices = [suggested_indices[x] for x in indices]
|
2357
|
+
speaker_indices = [speaker_indices[x] for x in indices]
|
2358
|
+
data = [data[x] for x in indices]
|
2359
|
+
return data, utterance_ids, suggested_indices, speaker_indices
|
2360
|
+
|
2361
|
+
|
2362
|
+
class SpeakerComparisonWorker(Worker):
|
2363
|
+
def __init__(
|
2364
|
+
self,
|
2365
|
+
session,
|
2366
|
+
use_mp=False,
|
2367
|
+
speaker_id: int = None,
|
2368
|
+
alternate_speaker_id: int = None,
|
2369
|
+
reference_utterance_id: int = None,
|
2370
|
+
threshold: float = None,
|
2371
|
+
metric: typing.Union[str, DistanceMetric] = DistanceMetric.cosine,
|
2372
|
+
plda: Plda = None,
|
2373
|
+
speaker_plda: SpeakerPlda = None,
|
2374
|
+
limit: int = 100,
|
2375
|
+
inverted: bool = False,
|
2376
|
+
text_filter: TextFilterQuery = None,
|
2377
|
+
**kwargs,
|
2378
|
+
):
|
2379
|
+
super().__init__(use_mp=use_mp, **kwargs)
|
2380
|
+
self.session = session
|
2381
|
+
self.speaker_id = speaker_id
|
2382
|
+
self.alternate_speaker_id = alternate_speaker_id
|
2383
|
+
self.reference_utterance_id = reference_utterance_id
|
2384
|
+
self.threshold = threshold
|
2385
|
+
self.metric = metric
|
2386
|
+
self.plda = plda
|
2387
|
+
self.speaker_plda = speaker_plda
|
2388
|
+
self.limit = limit
|
2389
|
+
self.inverted = inverted
|
2390
|
+
self.text_filter = text_filter
|
2391
|
+
|
2392
|
+
if isinstance(self.metric, str):
|
2393
|
+
self.metric = DistanceMetric[self.metric]
|
2394
|
+
if self.metric is DistanceMetric.plda:
|
2395
|
+
if self.plda is None:
|
2396
|
+
self.metric = DistanceMetric.cosine
|
2397
|
+
|
2398
|
+
def _run(self):
|
2399
|
+
count_only = self.kwargs.get("count", False)
|
2400
|
+
if not count_only and self.progress_callback is not None:
|
2401
|
+
self.progress_callback.update_total(self.limit)
|
2402
|
+
|
2403
|
+
with self.session() as session:
|
2404
|
+
c = session.query(Corpus).first()
|
2405
|
+
suggested_indices = []
|
2406
|
+
speaker_indices = []
|
2407
|
+
utterance_ids = []
|
2408
|
+
data = []
|
2409
|
+
if self.inverted or self.speaker_id is None:
|
2410
|
+
if (
|
2411
|
+
self.metric is DistanceMetric.plda
|
2412
|
+
and not count_only
|
2413
|
+
and self.speaker_plda is None
|
2414
|
+
):
|
2415
|
+
speaker_plda = load_speaker_plda(session, self.plda, minimum_count=2)
|
2416
|
+
elif self.speaker_plda is not None:
|
2417
|
+
speaker_plda = self.speaker_plda
|
2418
|
+
found_set = set()
|
2419
|
+
if self.speaker_id is not None:
|
2420
|
+
query = session.query(
|
2421
|
+
Speaker.name, c.speaker_ivector_column, Speaker.num_utterances
|
2422
|
+
)
|
2423
|
+
if isinstance(self.speaker_id, int):
|
2424
|
+
query = query.filter(Speaker.id == self.speaker_id)
|
2425
|
+
else:
|
2426
|
+
query = query.filter(Speaker.name == self.speaker_id)
|
2427
|
+
r = query.first()
|
2428
|
+
if r is None:
|
2429
|
+
return data, utterance_ids, suggested_indices
|
2430
|
+
suggested_name, ivector, utt_count = r
|
2431
|
+
|
2432
|
+
if self.metric is DistanceMetric.plda:
|
2433
|
+
kaldi_speaker_ivector = DoubleVector()
|
2434
|
+
kaldi_speaker_ivector.from_numpy(ivector)
|
2435
|
+
kaldi_speaker_ivector = self.plda.transform_ivector(
|
2436
|
+
kaldi_speaker_ivector, utt_count
|
2437
|
+
)
|
2438
|
+
query = session.query(
|
2439
|
+
Speaker.id,
|
2440
|
+
Speaker.name,
|
2441
|
+
Speaker.num_utterances,
|
2442
|
+
c.speaker_ivector_column,
|
2443
|
+
c.speaker_ivector_column.cosine_distance(ivector),
|
2444
|
+
).filter(Speaker.id != self.speaker_id)
|
2445
|
+
if self.alternate_speaker_id is not None:
|
2446
|
+
query = query.filter(Speaker.id == self.alternate_speaker_id)
|
2447
|
+
if self.threshold is not None:
|
2448
|
+
query = query.filter(
|
2449
|
+
c.speaker_ivector_column.cosine_distance(ivector) <= self.threshold
|
2450
|
+
)
|
2451
|
+
|
2452
|
+
if count_only:
|
2453
|
+
return query.count()
|
2454
|
+
query = query.limit(self.limit).offset(self.kwargs.get("current_offset", 0))
|
2455
|
+
for (
|
2456
|
+
original_id,
|
2457
|
+
speaker_name,
|
2458
|
+
original_count,
|
2459
|
+
original_ivector,
|
2460
|
+
distance,
|
2461
|
+
) in query:
|
2462
|
+
if self.stopped is not None and self.stopped.is_set():
|
2463
|
+
session.rollback()
|
2464
|
+
return
|
2465
|
+
if distance is None:
|
2466
|
+
continue
|
2467
|
+
if (self.speaker_id, original_id) in found_set:
|
2468
|
+
continue
|
2469
|
+
if self.progress_callback is not None:
|
2470
|
+
self.progress_callback.increment_progress(1)
|
2471
|
+
if self.metric is DistanceMetric.plda:
|
2472
|
+
kaldi_utterance_ivector = DoubleVector()
|
2473
|
+
kaldi_utterance_ivector.from_numpy(original_ivector)
|
2474
|
+
ivector_normalize_length(kaldi_utterance_ivector)
|
2475
|
+
kaldi_utterance_ivector = self.plda.transform_ivector(
|
2476
|
+
kaldi_utterance_ivector, original_count
|
2477
|
+
)
|
2478
|
+
distance = self.plda.LogLikelihoodRatio(
|
2479
|
+
kaldi_speaker_ivector, utt_count, kaldi_utterance_ivector
|
2480
|
+
)
|
2481
|
+
utterance_ids.append(None)
|
2482
|
+
suggested_indices.append(self.speaker_id)
|
2483
|
+
speaker_indices.append(original_id)
|
2484
|
+
found_set.add((self.speaker_id, original_id))
|
2485
|
+
utterance_name = ""
|
2486
|
+
data.append(
|
2487
|
+
[
|
2488
|
+
utterance_name,
|
2489
|
+
suggested_name,
|
2490
|
+
utt_count,
|
2491
|
+
speaker_name,
|
2492
|
+
original_count,
|
2493
|
+
distance,
|
2494
|
+
]
|
2495
|
+
)
|
2229
2496
|
else:
|
2230
2497
|
query = (
|
2231
2498
|
session.query(
|
2232
|
-
|
2233
|
-
File.id,
|
2234
|
-
File.name,
|
2235
|
-
Utterance.begin,
|
2236
|
-
Utterance.end,
|
2237
|
-
c.utterance_ivector_column,
|
2238
|
-
Speaker.name,
|
2239
|
-
Speaker.id,
|
2240
|
-
Speaker.num_utterances,
|
2499
|
+
Speaker.id, c.speaker_ivector_column, Speaker.name, Speaker.num_utterances
|
2241
2500
|
)
|
2242
|
-
.
|
2243
|
-
.
|
2244
|
-
.filter(c.utterance_ivector_column != None) # noqa
|
2245
|
-
.filter(Speaker.num_utterances == 1)
|
2501
|
+
.filter(c.speaker_ivector_column != None) # noqa
|
2502
|
+
.filter(Speaker.num_utterances > 0)
|
2246
2503
|
)
|
2247
2504
|
if self.text_filter is not None and self.text_filter.text:
|
2248
2505
|
filter_regex = self.text_filter.generate_expression(posix=True)
|
2249
|
-
query = query.
|
2506
|
+
query = query.join(Speaker.utterances)
|
2507
|
+
query = query.filter(Utterance.text.op("~")(filter_regex)).distinct()
|
2250
2508
|
if count_only:
|
2251
2509
|
return query.count()
|
2252
2510
|
if self.text_filter is None or not self.text_filter.text:
|
@@ -2256,25 +2514,12 @@ class SpeakerUtterancesWorker(Worker):
|
|
2256
2514
|
|
2257
2515
|
if self.threshold is None:
|
2258
2516
|
query = query.limit(self.limit).offset(self.kwargs.get("current_offset", 0))
|
2259
|
-
|
2260
|
-
# query = query.limit(limit*100)
|
2261
|
-
for (
|
2262
|
-
utt_id,
|
2263
|
-
file_id,
|
2264
|
-
file_name,
|
2265
|
-
begin,
|
2266
|
-
end,
|
2267
|
-
ivector,
|
2268
|
-
speaker_name,
|
2269
|
-
speaker_id,
|
2270
|
-
speaker_num_utterances,
|
2271
|
-
) in query:
|
2517
|
+
for speaker_id, ivector, speaker_name, num_utterances in query:
|
2272
2518
|
if self.stopped is not None and self.stopped.is_set():
|
2273
2519
|
break
|
2274
2520
|
if self.metric is DistanceMetric.plda:
|
2275
2521
|
kaldi_ivector = DoubleVector()
|
2276
2522
|
kaldi_ivector.from_numpy(ivector)
|
2277
|
-
ivector_normalize_length(kaldi_ivector)
|
2278
2523
|
kaldi_ivector = self.plda.transform_ivector(kaldi_ivector, 1)
|
2279
2524
|
index, distance = self.plda.classify_utterance(
|
2280
2525
|
kaldi_ivector, speaker_plda.test_ivectors, speaker_plda.counts
|
@@ -2310,18 +2555,26 @@ class SpeakerUtterancesWorker(Worker):
|
|
2310
2555
|
continue
|
2311
2556
|
if self.progress_callback is not None:
|
2312
2557
|
self.progress_callback.increment_progress(1)
|
2313
|
-
|
2314
|
-
|
2558
|
+
if suggested_count < num_utterances:
|
2559
|
+
speaker_id, suggested_id = suggested_id, speaker_id
|
2560
|
+
speaker_name, suggested_name = suggested_name, speaker_name
|
2561
|
+
num_utterances, suggested_count = suggested_count, num_utterances
|
2562
|
+
if (speaker_id, suggested_id) in found_set:
|
2563
|
+
continue
|
2564
|
+
if (suggested_id, speaker_id) in found_set:
|
2565
|
+
continue
|
2566
|
+
found_set.add((speaker_id, suggested_id))
|
2567
|
+
utterance_ids.append(None)
|
2315
2568
|
suggested_indices.append(suggested_id)
|
2316
2569
|
speaker_indices.append(speaker_id)
|
2317
|
-
utterance_name =
|
2570
|
+
utterance_name = ""
|
2318
2571
|
data.append(
|
2319
2572
|
[
|
2320
2573
|
utterance_name,
|
2321
2574
|
suggested_name,
|
2322
2575
|
suggested_count,
|
2323
2576
|
speaker_name,
|
2324
|
-
|
2577
|
+
num_utterances,
|
2325
2578
|
distance,
|
2326
2579
|
]
|
2327
2580
|
)
|
@@ -2846,11 +3099,16 @@ class FileUtterancesWorker(Worker):
|
|
2846
3099
|
.options(
|
2847
3100
|
selectinload(Utterance.phone_intervals).options(
|
2848
3101
|
joinedload(PhoneInterval.phone, innerjoin=True),
|
2849
|
-
|
3102
|
+
),
|
3103
|
+
selectinload(Utterance.reference_phone_intervals).options(
|
3104
|
+
joinedload(ReferencePhoneInterval.phone, innerjoin=True),
|
2850
3105
|
),
|
2851
3106
|
selectinload(Utterance.word_intervals).options(
|
2852
3107
|
joinedload(WordInterval.word, innerjoin=True),
|
2853
|
-
joinedload(WordInterval.
|
3108
|
+
joinedload(WordInterval.pronunciation, innerjoin=True),
|
3109
|
+
),
|
3110
|
+
selectinload(Utterance.reference_word_intervals).options(
|
3111
|
+
joinedload(ReferenceWordInterval.word, innerjoin=True),
|
2854
3112
|
),
|
2855
3113
|
joinedload(Utterance.speaker, innerjoin=True),
|
2856
3114
|
)
|
@@ -3146,45 +3404,82 @@ class WaveformWorker(Worker): # pragma: no cover
|
|
3146
3404
|
|
3147
3405
|
|
3148
3406
|
class SpeakerTierWorker(Worker): # pragma: no cover
|
3149
|
-
def __init__(
|
3407
|
+
def __init__(
|
3408
|
+
self,
|
3409
|
+
session,
|
3410
|
+
file_id,
|
3411
|
+
*args,
|
3412
|
+
query_alignment=False,
|
3413
|
+
utterance_id=None,
|
3414
|
+
begin=None,
|
3415
|
+
end=None,
|
3416
|
+
):
|
3150
3417
|
super().__init__("Generating speaker tier", *args)
|
3151
3418
|
self.query_alignment = query_alignment
|
3152
3419
|
self.session = session
|
3153
3420
|
self.file_id = file_id
|
3421
|
+
self.utterance_id = utterance_id
|
3422
|
+
self.begin = begin
|
3423
|
+
self.end = end
|
3154
3424
|
self.settings = AnchorSettings()
|
3155
3425
|
|
3156
3426
|
def run(self):
|
3157
3427
|
if self.session is None:
|
3158
3428
|
return
|
3159
3429
|
with self.session() as session:
|
3160
|
-
|
3161
|
-
|
3162
|
-
|
3163
|
-
|
3164
|
-
)
|
3165
|
-
show_words = self.settings.value(
|
3166
|
-
self.settings.TIER_ALIGNED_WORDS_VISIBLE
|
3167
|
-
) or self.settings.value(self.settings.TIER_TRANSCRIBED_WORDS_VISIBLE)
|
3430
|
+
file = session.get(File, self.file_id)
|
3431
|
+
show_phones = self.settings.value(
|
3432
|
+
self.settings.TIER_ALIGNED_PHONES_VISIBLE
|
3433
|
+
) or self.settings.value(self.settings.TIER_REFERENCE_PHONES_VISIBLE)
|
3434
|
+
show_words = self.settings.value(self.settings.TIER_ALIGNED_WORDS_VISIBLE)
|
3168
3435
|
utterances = session.query(Utterance)
|
3169
3436
|
if self.query_alignment:
|
3170
3437
|
if show_phones:
|
3171
3438
|
utterances = utterances.options(
|
3172
3439
|
selectinload(Utterance.phone_intervals).options(
|
3173
3440
|
joinedload(PhoneInterval.phone, innerjoin=True),
|
3174
|
-
|
3175
|
-
)
|
3441
|
+
),
|
3442
|
+
selectinload(Utterance.reference_phone_intervals).options(
|
3443
|
+
joinedload(ReferencePhoneInterval.phone, innerjoin=True),
|
3444
|
+
),
|
3176
3445
|
)
|
3177
3446
|
if show_words:
|
3178
3447
|
utterances = utterances.options(
|
3179
3448
|
selectinload(Utterance.word_intervals).options(
|
3180
3449
|
joinedload(WordInterval.word, innerjoin=True),
|
3181
|
-
joinedload(WordInterval.workflow, innerjoin=True),
|
3182
3450
|
),
|
3183
3451
|
)
|
3184
|
-
|
3185
|
-
|
3186
|
-
|
3187
|
-
|
3452
|
+
utterances = utterances.options(
|
3453
|
+
selectinload(Utterance.reference_word_intervals).options(
|
3454
|
+
joinedload(ReferenceWordInterval.word, innerjoin=True),
|
3455
|
+
),
|
3456
|
+
)
|
3457
|
+
utterances = utterances.filter(
|
3458
|
+
Utterance.file_id == self.file_id,
|
3459
|
+
).order_by(Utterance.begin)
|
3460
|
+
|
3461
|
+
if self.utterance_id is not None:
|
3462
|
+
utterances = utterances.filter(Utterance.id == self.utterance_id)
|
3463
|
+
if file.duration > 500 and self.begin is not None and self.end is not None:
|
3464
|
+
cached_begin = self.begin - 30
|
3465
|
+
cached_end = self.end + 30
|
3466
|
+
utterances = utterances.filter(
|
3467
|
+
Utterance.end >= cached_begin,
|
3468
|
+
Utterance.begin <= cached_end,
|
3469
|
+
)
|
3470
|
+
else:
|
3471
|
+
cached_begin = None
|
3472
|
+
cached_end = None
|
3473
|
+
utterances = utterances.all()
|
3474
|
+
if (
|
3475
|
+
file.duration > 500
|
3476
|
+
and self.begin is not None
|
3477
|
+
and self.end is not None
|
3478
|
+
and utterances
|
3479
|
+
):
|
3480
|
+
cached_begin = min(cached_begin, utterances[0].begin)
|
3481
|
+
cached_end = max(cached_end, utterances[-1].end)
|
3482
|
+
self.signals.result.emit((utterances, self.file_id, cached_begin, cached_end))
|
3188
3483
|
|
3189
3484
|
|
3190
3485
|
class SpectrogramWorker(Worker): # pragma: no cover
|
@@ -3444,7 +3739,9 @@ class ImportCorpusWorker(FunctionWorker): # pragma: no cover
|
|
3444
3739
|
).execution_options(logging_token="inspect_dataset_engine")
|
3445
3740
|
with sqlalchemy.orm.Session(engine) as session:
|
3446
3741
|
dictionary = (
|
3447
|
-
session.query(Dictionary.path)
|
3742
|
+
session.query(Dictionary.path)
|
3743
|
+
.filter(Dictionary.path != "", Dictionary.path != ".")
|
3744
|
+
.first()
|
3448
3745
|
)
|
3449
3746
|
if dictionary is not None:
|
3450
3747
|
self.dictionary_path = dictionary[0]
|
@@ -3452,7 +3749,11 @@ class ImportCorpusWorker(FunctionWorker): # pragma: no cover
|
|
3452
3749
|
pass
|
3453
3750
|
try:
|
3454
3751
|
if dataset_type is DatasetType.NONE:
|
3455
|
-
if
|
3752
|
+
if (
|
3753
|
+
self.dictionary_path
|
3754
|
+
and os.path.exists(self.dictionary_path)
|
3755
|
+
and str(self.dictionary_path) != "."
|
3756
|
+
):
|
3456
3757
|
self.corpus = AcousticCorpusWithPronunciations(
|
3457
3758
|
corpus_directory=self.corpus_path, dictionary_path=self.dictionary_path
|
3458
3759
|
)
|
@@ -3460,13 +3761,16 @@ class ImportCorpusWorker(FunctionWorker): # pragma: no cover
|
|
3460
3761
|
self.corpus.dictionary_setup()
|
3461
3762
|
self.corpus.write_lexicon_information(write_disambiguation=False)
|
3462
3763
|
else:
|
3463
|
-
self.corpus =
|
3764
|
+
self.corpus = AcousticCorpusWithPronunciations(
|
3765
|
+
corpus_directory=self.corpus_path
|
3766
|
+
)
|
3464
3767
|
self.corpus.initialize_database()
|
3465
|
-
self.corpus.
|
3768
|
+
self.corpus.create_default_dictionary()
|
3466
3769
|
|
3467
3770
|
elif (
|
3468
3771
|
dataset_type is DatasetType.ACOUSTIC_CORPUS_WITH_DICTIONARY
|
3469
3772
|
and self.dictionary_path
|
3773
|
+
and str(self.dictionary_path) != "."
|
3470
3774
|
and os.path.exists(self.dictionary_path)
|
3471
3775
|
):
|
3472
3776
|
self.corpus = AcousticCorpusWithPronunciations(
|
@@ -3474,8 +3778,9 @@ class ImportCorpusWorker(FunctionWorker): # pragma: no cover
|
|
3474
3778
|
)
|
3475
3779
|
self.corpus.inspect_database()
|
3476
3780
|
else:
|
3477
|
-
self.corpus =
|
3781
|
+
self.corpus = AcousticCorpusWithPronunciations(corpus_directory=self.corpus_path)
|
3478
3782
|
self.corpus.inspect_database()
|
3783
|
+
self.corpus.create_default_dictionary()
|
3479
3784
|
self.corpus._load_corpus()
|
3480
3785
|
if self.dictionary_path and os.path.exists(self.dictionary_path):
|
3481
3786
|
self.corpus.initialize_jobs()
|
@@ -3592,10 +3897,7 @@ class LoadReferenceWorker(FunctionWorker): # pragma: no cover
|
|
3592
3897
|
self.settings.sync()
|
3593
3898
|
try:
|
3594
3899
|
with self.corpus.session() as session:
|
3595
|
-
session.query(
|
3596
|
-
PhoneInterval.workflow_id == CorpusWorkflow.id,
|
3597
|
-
CorpusWorkflow.workflow_type == WorkflowType.reference,
|
3598
|
-
).delete(synchronize_session=False)
|
3900
|
+
session.query(ReferencePhoneInterval).delete(synchronize_session=False)
|
3599
3901
|
session.query(CorpusWorkflow).filter(
|
3600
3902
|
CorpusWorkflow.workflow_type == WorkflowType.reference
|
3601
3903
|
).delete(synchronize_session=False)
|
@@ -3794,8 +4096,8 @@ class ImportIvectorExtractorWorker(FunctionWorker): # pragma: no cover
|
|
3794
4096
|
if not self.model_path:
|
3795
4097
|
return
|
3796
4098
|
try:
|
3797
|
-
if str(self.model_path)
|
3798
|
-
model =
|
4099
|
+
if str(self.model_path) in {"speechbrain", "pyannote"}:
|
4100
|
+
model = str(self.model_path)
|
3799
4101
|
else:
|
3800
4102
|
model = IvectorExtractorModel(self.model_path)
|
3801
4103
|
except Exception:
|
@@ -3832,36 +4134,20 @@ class AlignUtteranceWorker(FunctionWorker): # pragma: no cover
|
|
3832
4134
|
)
|
3833
4135
|
.get(self.utterance_id)
|
3834
4136
|
)
|
3835
|
-
workflow = self.corpus_model.corpus.get_latest_workflow_run(
|
3836
|
-
WorkflowType.online_alignment, session
|
3837
|
-
)
|
3838
|
-
|
3839
|
-
alignment_workflows = [
|
3840
|
-
x
|
3841
|
-
for x, in session.query(CorpusWorkflow.id).filter(
|
3842
|
-
CorpusWorkflow.workflow_type.in_(
|
3843
|
-
[WorkflowType.online_alignment, WorkflowType.alignment]
|
3844
|
-
)
|
3845
|
-
)
|
3846
|
-
]
|
3847
4137
|
session.query(PhoneInterval).filter(
|
3848
4138
|
PhoneInterval.utterance_id == utterance.id
|
3849
|
-
).
|
3850
|
-
synchronize_session=False
|
3851
|
-
)
|
4139
|
+
).delete(synchronize_session=False)
|
3852
4140
|
session.flush()
|
3853
4141
|
session.query(WordInterval).filter(
|
3854
4142
|
WordInterval.utterance_id == utterance.id
|
3855
|
-
).
|
3856
|
-
synchronize_session=False
|
3857
|
-
)
|
4143
|
+
).delete(synchronize_session=False)
|
3858
4144
|
session.flush()
|
3859
4145
|
ctm = align_utterance_online(
|
3860
4146
|
self.corpus_model.acoustic_model,
|
3861
4147
|
utterance.to_kalpy(),
|
3862
4148
|
self.corpus_model.align_lexicon_compiler,
|
3863
4149
|
)
|
3864
|
-
update_utterance_intervals(session, utterance,
|
4150
|
+
update_utterance_intervals(session, utterance, ctm)
|
3865
4151
|
except Exception:
|
3866
4152
|
exctype, value = sys.exc_info()[:2]
|
3867
4153
|
self.signals.error.emit((exctype, value, traceback.format_exc()))
|
@@ -4081,12 +4367,8 @@ class AlignmentWorker(FunctionWorker): # pragma: no cover
|
|
4081
4367
|
)
|
4082
4368
|
]
|
4083
4369
|
|
4084
|
-
session.query(PhoneInterval).
|
4085
|
-
|
4086
|
-
).delete(synchronize_session=False)
|
4087
|
-
session.query(WordInterval).filter(
|
4088
|
-
WordInterval.workflow_id.in_(alignment_workflows)
|
4089
|
-
).delete(synchronize_session=False)
|
4370
|
+
session.query(PhoneInterval).delete(synchronize_session=False)
|
4371
|
+
session.query(WordInterval).delete(synchronize_session=False)
|
4090
4372
|
session.query(CorpusWorkflow).filter(
|
4091
4373
|
CorpusWorkflow.id.in_(alignment_workflows)
|
4092
4374
|
).delete(synchronize_session=False)
|
@@ -4163,7 +4445,7 @@ class ComputeIvectorWorker(FunctionWorker): # pragma: no cover
|
|
4163
4445
|
self.settings.sync()
|
4164
4446
|
diarizer = SpeakerDiarizer(
|
4165
4447
|
ivector_extractor_path=self.corpus_model.ivector_extractor.source
|
4166
|
-
if self.corpus_model.ivector_extractor
|
4448
|
+
if self.corpus_model.ivector_extractor not in {"speechbrain", "pyannote"}
|
4167
4449
|
else self.corpus_model.ivector_extractor,
|
4168
4450
|
corpus_directory=self.corpus_model.corpus.corpus_directory,
|
4169
4451
|
cuda=self.settings.value(self.settings.CUDA),
|
@@ -4174,6 +4456,25 @@ class ComputeIvectorWorker(FunctionWorker): # pragma: no cover
|
|
4174
4456
|
if self.reset:
|
4175
4457
|
logger.info("Resetting ivectors...")
|
4176
4458
|
self.corpus_model.corpus.reset_features()
|
4459
|
+
else:
|
4460
|
+
time.sleep(1.0)
|
4461
|
+
with self.corpus_model.corpus.session() as session:
|
4462
|
+
logger.debug("Dropping indexes...")
|
4463
|
+
session.execute(
|
4464
|
+
sqlalchemy.text("DROP INDEX IF EXISTS utterance_xvector_index;")
|
4465
|
+
)
|
4466
|
+
session.execute(sqlalchemy.text("DROP INDEX IF EXISTS speaker_xvector_index;"))
|
4467
|
+
session.execute(
|
4468
|
+
sqlalchemy.text("DROP INDEX IF EXISTS utterance_ivector_index;")
|
4469
|
+
)
|
4470
|
+
session.execute(sqlalchemy.text("DROP INDEX IF EXISTS speaker_ivector_index;"))
|
4471
|
+
session.execute(
|
4472
|
+
sqlalchemy.text("DROP INDEX IF EXISTS utterance_plda_vector_index;")
|
4473
|
+
)
|
4474
|
+
session.execute(
|
4475
|
+
sqlalchemy.text("DROP INDEX IF EXISTS speaker_plda_vector_index;")
|
4476
|
+
)
|
4477
|
+
session.commit()
|
4177
4478
|
diarizer.inspect_database()
|
4178
4479
|
diarizer.initialize_jobs()
|
4179
4480
|
diarizer.corpus_output_directory = self.corpus_model.corpus.corpus_output_directory
|
@@ -4217,7 +4518,7 @@ class ComputePldaWorker(FunctionWorker): # pragma: no cover
|
|
4217
4518
|
self.settings.sync()
|
4218
4519
|
diarizer = SpeakerDiarizer(
|
4219
4520
|
ivector_extractor_path=self.ivector_extractor.source
|
4220
|
-
if self.ivector_extractor
|
4521
|
+
if self.ivector_extractor not in {"speechbrain", "pyannote"}
|
4221
4522
|
else self.ivector_extractor,
|
4222
4523
|
corpus_directory=self.corpus.corpus_directory,
|
4223
4524
|
cuda=self.settings.value(self.settings.CUDA),
|
@@ -4266,7 +4567,7 @@ class ClusterUtterancesWorker(FunctionWorker): # pragma: no cover
|
|
4266
4567
|
self.parameters["expected_num_speakers"] = self.corpus.num_speakers
|
4267
4568
|
diarizer = SpeakerDiarizer(
|
4268
4569
|
ivector_extractor_path=self.ivector_extractor.source
|
4269
|
-
if self.ivector_extractor
|
4570
|
+
if self.ivector_extractor not in {"speechbrain", "pyannote"}
|
4270
4571
|
else self.ivector_extractor,
|
4271
4572
|
corpus_directory=self.corpus.corpus_directory,
|
4272
4573
|
cuda=self.settings.value(self.settings.CUDA),
|
@@ -4319,7 +4620,7 @@ class ClassifySpeakersWorker(FunctionWorker): # pragma: no cover
|
|
4319
4620
|
self.settings.sync()
|
4320
4621
|
diarizer = SpeakerDiarizer(
|
4321
4622
|
ivector_extractor_path=self.ivector_extractor.source
|
4322
|
-
if self.ivector_extractor
|
4623
|
+
if self.ivector_extractor not in {"speechbrain", "pyannote"}
|
4323
4624
|
else self.ivector_extractor,
|
4324
4625
|
corpus_directory=self.corpus.corpus_directory, # score_threshold = 0.5,
|
4325
4626
|
cluster=False,
|
@@ -4424,16 +4725,8 @@ class TranscriptionWorker(FunctionWorker): # pragma: no cover
|
|
4424
4725
|
)
|
4425
4726
|
try:
|
4426
4727
|
with self.corpus.session() as session:
|
4427
|
-
session.query(PhoneInterval).
|
4428
|
-
|
4429
|
-
).filter(CorpusWorkflow.workflow_type == WorkflowType.transcription).delete(
|
4430
|
-
synchronize_session="fetch"
|
4431
|
-
)
|
4432
|
-
session.query(WordInterval).filter(
|
4433
|
-
WordInterval.workflow_id == CorpusWorkflow.id
|
4434
|
-
).filter(CorpusWorkflow.workflow_type == WorkflowType.transcription).delete(
|
4435
|
-
synchronize_session="fetch"
|
4436
|
-
)
|
4728
|
+
session.query(PhoneInterval).delete(synchronize_session="fetch")
|
4729
|
+
session.query(WordInterval).delete(synchronize_session="fetch")
|
4437
4730
|
session.query(CorpusWorkflow).filter(
|
4438
4731
|
CorpusWorkflow.workflow_type == WorkflowType.transcription
|
4439
4732
|
).delete()
|
@@ -4487,20 +4780,8 @@ class ValidationWorker(FunctionWorker): # pragma: no cover
|
|
4487
4780
|
)
|
4488
4781
|
try:
|
4489
4782
|
with self.corpus.session() as session:
|
4490
|
-
session.query(PhoneInterval).
|
4491
|
-
|
4492
|
-
).filter(
|
4493
|
-
CorpusWorkflow.workflow_type == WorkflowType.per_speaker_transcription
|
4494
|
-
).delete(
|
4495
|
-
synchronize_session="fetch"
|
4496
|
-
)
|
4497
|
-
session.query(WordInterval).filter(
|
4498
|
-
WordInterval.workflow_id == CorpusWorkflow.id
|
4499
|
-
).filter(
|
4500
|
-
CorpusWorkflow.workflow_type == WorkflowType.per_speaker_transcription
|
4501
|
-
).delete(
|
4502
|
-
synchronize_session="fetch"
|
4503
|
-
)
|
4783
|
+
session.query(PhoneInterval).delete(synchronize_session="fetch")
|
4784
|
+
session.query(WordInterval).delete(synchronize_session="fetch")
|
4504
4785
|
session.query(CorpusWorkflow).filter(
|
4505
4786
|
CorpusWorkflow.workflow_type == WorkflowType.per_speaker_transcription
|
4506
4787
|
).delete()
|