lattifai 1.3.1__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/alignment/lattice1_aligner.py +67 -13
- lattifai/cli/__init__.py +2 -0
- lattifai/config/alignment.py +8 -1
- {lattifai-1.3.1.dist-info → lattifai-1.3.2.dist-info}/METADATA +3 -3
- {lattifai-1.3.1.dist-info → lattifai-1.3.2.dist-info}/RECORD +9 -9
- {lattifai-1.3.1.dist-info → lattifai-1.3.2.dist-info}/WHEEL +0 -0
- {lattifai-1.3.1.dist-info → lattifai-1.3.2.dist-info}/entry_points.txt +0 -0
- {lattifai-1.3.1.dist-info → lattifai-1.3.2.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.3.1.dist-info → lattifai-1.3.2.dist-info}/top_level.txt +0 -0
|
@@ -165,10 +165,15 @@ class Lattice1Aligner(object):
|
|
|
165
165
|
return_details=return_details,
|
|
166
166
|
start_margin=self.config.start_margin,
|
|
167
167
|
end_margin=self.config.end_margin,
|
|
168
|
-
check_sanity=
|
|
168
|
+
check_sanity=self.config.check_sanity,
|
|
169
169
|
)
|
|
170
170
|
if verbose:
|
|
171
171
|
safe_print(colorful.green(f" ✓ Successfully aligned {len(alignments)} segments"))
|
|
172
|
+
if not self.config.check_sanity:
|
|
173
|
+
# Find and report low-score segments
|
|
174
|
+
low_score_segments = _find_low_score_segments(alignments)
|
|
175
|
+
if low_score_segments:
|
|
176
|
+
safe_print(colorful.yellow(_format_low_score_warning(low_score_segments)))
|
|
172
177
|
except LatticeDecodingError as e:
|
|
173
178
|
safe_print(colorful.red(" x Failed to decode lattice alignment results"))
|
|
174
179
|
_alignments = self.tokenizer.detokenize(
|
|
@@ -180,18 +185,17 @@ class Lattice1Aligner(object):
|
|
|
180
185
|
end_margin=self.config.end_margin,
|
|
181
186
|
check_sanity=False,
|
|
182
187
|
)
|
|
183
|
-
#
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
+
# Find low-score segments to provide helpful error context
|
|
189
|
+
low_score_segments = _find_low_score_segments(_alignments)
|
|
190
|
+
del _alignments
|
|
191
|
+
if low_score_segments:
|
|
192
|
+
warning_str = _format_low_score_warning(low_score_segments)
|
|
188
193
|
raise LatticeDecodingError(
|
|
189
194
|
lattice_id,
|
|
190
|
-
message=colorful.yellow("
|
|
191
|
-
skip_help=True,
|
|
195
|
+
message=colorful.yellow("Media-text mismatch detected:\n" + warning_str),
|
|
196
|
+
skip_help=True,
|
|
192
197
|
)
|
|
193
198
|
else:
|
|
194
|
-
del _alignments
|
|
195
199
|
raise e
|
|
196
200
|
except Exception as e:
|
|
197
201
|
safe_print(colorful.red(" x Failed to decode lattice alignment results"))
|
|
@@ -204,6 +208,12 @@ class Lattice1Aligner(object):
|
|
|
204
208
|
self.worker.profile()
|
|
205
209
|
|
|
206
210
|
|
|
211
|
+
def _is_event_segment(text: str) -> bool:
|
|
212
|
+
"""Check if text is an event marker like [MUSIC], [Applause], [Writes equation]."""
|
|
213
|
+
text = text.strip()
|
|
214
|
+
return text.startswith("[") and text.endswith("]")
|
|
215
|
+
|
|
216
|
+
|
|
207
217
|
def _detect_score_anomalies(
|
|
208
218
|
alignments: List[Supervision],
|
|
209
219
|
drop_threshold: float = 0.08,
|
|
@@ -215,6 +225,9 @@ def _detect_score_anomalies(
|
|
|
215
225
|
When the drop is significant, it indicates the audio doesn't match
|
|
216
226
|
the text starting at that position.
|
|
217
227
|
|
|
228
|
+
Event segments like [MUSIC], [Applause] are excluded from scoring as they
|
|
229
|
+
naturally have low alignment scores.
|
|
230
|
+
|
|
218
231
|
Args:
|
|
219
232
|
alignments: List of aligned supervisions with scores
|
|
220
233
|
drop_threshold: Minimum drop between before/after averages to trigger
|
|
@@ -223,10 +236,16 @@ def _detect_score_anomalies(
|
|
|
223
236
|
Returns:
|
|
224
237
|
Dict with anomaly info if found, None otherwise
|
|
225
238
|
"""
|
|
226
|
-
|
|
227
|
-
|
|
239
|
+
# Build (original_index, score) pairs, excluding events and None scores
|
|
240
|
+
indexed_scores = [
|
|
241
|
+
(i, s.score) for i, s in enumerate(alignments) if s.score is not None and not _is_event_segment(s.text)
|
|
242
|
+
]
|
|
243
|
+
if len(indexed_scores) < window_size * 2:
|
|
228
244
|
return None
|
|
229
245
|
|
|
246
|
+
scores = [score for _, score in indexed_scores]
|
|
247
|
+
orig_indices = [idx for idx, _ in indexed_scores]
|
|
248
|
+
|
|
230
249
|
for i in range(window_size, len(scores) - window_size):
|
|
231
250
|
before_avg = np.mean(scores[i - window_size : i])
|
|
232
251
|
after_avg = np.mean(scores[i : i + window_size])
|
|
@@ -236,12 +255,15 @@ def _detect_score_anomalies(
|
|
|
236
255
|
if drop > drop_threshold:
|
|
237
256
|
# Find the exact mutation point (largest single-step drop)
|
|
238
257
|
max_drop = 0
|
|
239
|
-
|
|
258
|
+
filtered_mutation_idx = i
|
|
240
259
|
for j in range(i - 1, min(i + window_size, len(scores) - 1)):
|
|
241
260
|
single_drop = scores[j] - scores[j + 1]
|
|
242
261
|
if single_drop > max_drop:
|
|
243
262
|
max_drop = single_drop
|
|
244
|
-
|
|
263
|
+
filtered_mutation_idx = j + 1
|
|
264
|
+
|
|
265
|
+
# Map back to original alignments index
|
|
266
|
+
mutation_idx = orig_indices[filtered_mutation_idx]
|
|
245
267
|
|
|
246
268
|
# Segments: last normal + anomaly segments
|
|
247
269
|
last_normal = alignments[mutation_idx - 1] if mutation_idx > 0 else None
|
|
@@ -290,3 +312,35 @@ def _format_anomaly_warning(anomaly: Dict[str, Any]) -> str:
|
|
|
290
312
|
lines.append("")
|
|
291
313
|
lines.append(" Possible causes: Transcription error, missing content, or wrong audio region")
|
|
292
314
|
return "\n".join(lines)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _find_low_score_segments(
|
|
318
|
+
alignments: List[Supervision],
|
|
319
|
+
threshold: float = 0.7,
|
|
320
|
+
) -> List[Tuple[int, Supervision]]:
|
|
321
|
+
"""Find segments with scores below threshold, excluding event markers.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
alignments: List of aligned supervisions with scores
|
|
325
|
+
threshold: Score threshold (segments below this are considered low)
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
List of (index, supervision) tuples for low-score segments
|
|
329
|
+
"""
|
|
330
|
+
return [
|
|
331
|
+
(i, s)
|
|
332
|
+
for i, s in enumerate(alignments)
|
|
333
|
+
if s.score is not None and s.score < threshold and not _is_event_segment(s.text)
|
|
334
|
+
]
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _format_low_score_warning(low_score_segments: List[Tuple[int, Supervision]]) -> str:
|
|
338
|
+
"""Format low-score segments as warning message."""
|
|
339
|
+
lines = [
|
|
340
|
+
f"⚠️ Found {len(low_score_segments)} low-score segments (potential mismatches):",
|
|
341
|
+
"",
|
|
342
|
+
]
|
|
343
|
+
for idx, seg in low_score_segments:
|
|
344
|
+
text_preview = seg.text[:50] + "..." if len(seg.text) > 50 else seg.text
|
|
345
|
+
lines.append(f' #{idx} [{seg.start:.2f}s-{seg.end:.2f}s] score={seg.score:.4f} "{text_preview}"')
|
|
346
|
+
return "\n".join(lines)
|
lattifai/cli/__init__.py
CHANGED
lattifai/config/alignment.py
CHANGED
|
@@ -21,7 +21,7 @@ class AlignmentConfig:
|
|
|
21
21
|
model_name: str = "LattifAI/Lattice-1"
|
|
22
22
|
"""Model identifier or path to local model directory (e.g., 'LattifAI/Lattice-1')."""
|
|
23
23
|
|
|
24
|
-
model_hub: Literal["huggingface", "modelscope"] = "
|
|
24
|
+
model_hub: Literal["huggingface", "modelscope"] = "modelscope"
|
|
25
25
|
"""Which model hub to use when resolving remote model names: 'huggingface' or 'modelscope'."""
|
|
26
26
|
|
|
27
27
|
device: Literal["cpu", "cuda", "mps", "auto"] = "auto"
|
|
@@ -107,6 +107,13 @@ class AlignmentConfig:
|
|
|
107
107
|
Default: 0.0 (no penalty). Typical range: -1.0 to 0.0 (e.g., -0.5).
|
|
108
108
|
"""
|
|
109
109
|
|
|
110
|
+
check_sanity: bool = True
|
|
111
|
+
"""Whether to perform sanity checks on alignment results.
|
|
112
|
+
When True, raises an error if media and caption content don't match.
|
|
113
|
+
When False, outputs a warning instead of raising an error on mismatch.
|
|
114
|
+
Default: True.
|
|
115
|
+
"""
|
|
116
|
+
|
|
110
117
|
client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
|
|
111
118
|
"""Reference to the SyncAPIClient instance. Auto-set during client initialization."""
|
|
112
119
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattifai
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.2
|
|
4
4
|
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
5
|
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
6
|
Maintainer-email: Lattice <tech@lattifai.com>
|
|
@@ -53,8 +53,8 @@ License-File: LICENSE
|
|
|
53
53
|
Requires-Dist: python-dotenv
|
|
54
54
|
Requires-Dist: colorful>=0.5.6
|
|
55
55
|
Requires-Dist: lattifai-run>=1.0.1
|
|
56
|
-
Requires-Dist: lattifai-core>=0.6.
|
|
57
|
-
Requires-Dist: lattifai-captions[splitting]>=0.1.
|
|
56
|
+
Requires-Dist: lattifai-core>=0.6.5
|
|
57
|
+
Requires-Dist: lattifai-captions[splitting]>=0.1.7
|
|
58
58
|
Requires-Dist: g2p-phonemizer>=0.4.0
|
|
59
59
|
Requires-Dist: error-align-fix>=0.1.4
|
|
60
60
|
Requires-Dist: lhotse>=1.26.0
|
|
@@ -7,21 +7,21 @@ lattifai/mixin.py,sha256=_d-kzu2w5Y4mZTFMEumh30baWzKsQ9n_IS432woi5vQ,26295
|
|
|
7
7
|
lattifai/types.py,sha256=JK7KVaZhX89BiKPm4okY0DWLHY1S8aj-YiZXoVH1akw,667
|
|
8
8
|
lattifai/utils.py,sha256=5LeunAN0OQ1jWoKMIThpXSEOxFYD2dCRTdsglosodUU,7963
|
|
9
9
|
lattifai/alignment/__init__.py,sha256=aOyC1P5DqESNLpDh6Gu6LyUZAVMba-IKI7Ugz7v9G4w,344
|
|
10
|
-
lattifai/alignment/lattice1_aligner.py,sha256=
|
|
10
|
+
lattifai/alignment/lattice1_aligner.py,sha256=2tLk215a3E8mvmQWNW-mgjEd2zE0myYRXXtAF_Epb9c,13255
|
|
11
11
|
lattifai/alignment/lattice1_worker.py,sha256=Z7hxaS-nucNsUmrphbD8tgBBYPkJOgQb-85nFON94_I,13041
|
|
12
12
|
lattifai/alignment/phonemizer.py,sha256=fbhN2DOl39lW4nQWKzyUUTMUabg7v61lB1kj8SKK-Sw,1761
|
|
13
13
|
lattifai/alignment/punctuation.py,sha256=qLcvuXhBzoEa6bznWZiAB5TAxR6eLr_ZV-PnnCY90UA,1218
|
|
14
14
|
lattifai/alignment/segmenter.py,sha256=JTbBYEXn8hkFwy0tITORy7nKoUPiNYLfi3w1DJNeHZ0,6303
|
|
15
15
|
lattifai/alignment/text_align.py,sha256=sF-6Tsf863BhJcii3joeNa6Auv-7l3SiOhh9j8oPGME,14935
|
|
16
16
|
lattifai/alignment/tokenizer.py,sha256=OIpMGHg1rJ7n97zncDMPpXy32uGOSt1yXiNO4sO6eP0,18839
|
|
17
|
-
lattifai/cli/__init__.py,sha256=
|
|
17
|
+
lattifai/cli/__init__.py,sha256=oNIgyYFGdG0gsVTrPULMwJxTchee-5h08tYiAPDqh_k,619
|
|
18
18
|
lattifai/cli/alignment.py,sha256=rqg6wU2vf6RJ058yWVoXft_UJfOCrEpmE-ye5fhTphg,6129
|
|
19
19
|
lattifai/cli/caption.py,sha256=jkMme73sJ16dkVpRh7O6qjbr14SUeBif00vCTBn7ed0,10339
|
|
20
20
|
lattifai/cli/diarization.py,sha256=GTd2vnTm6cJN6Q3mFP-ShY9bZBl1_zKzWFu-4HHcMzk,4075
|
|
21
21
|
lattifai/cli/transcribe.py,sha256=vZIV0TCbZG_IL2F_Mg49cCGSCBinOOFAtROajVTpNWE,7853
|
|
22
22
|
lattifai/cli/youtube.py,sha256=FJwDl48-cuacP1sdPvX19vdszXdT7EoOZgGYzJpoLeM,6360
|
|
23
23
|
lattifai/config/__init__.py,sha256=nJUVk03JRj4rujoEmkCkQ8akZF7kqIj7ci3XphU9uVA,1249
|
|
24
|
-
lattifai/config/alignment.py,sha256=
|
|
24
|
+
lattifai/config/alignment.py,sha256=Bc7_5Sp-5pKVaQ90BRKva2xc9e3gck61JFtwtEplCWc,5659
|
|
25
25
|
lattifai/config/caption.py,sha256=OMLsW8QKDWM6A3G5V3Gf-9bgB3D1PC5gO8LiiNNeOwM,7195
|
|
26
26
|
lattifai/config/client.py,sha256=qqHKFPV4iEjVHCDOuGx7kj-tYFtgZZAszOQRFsNFbO8,2359
|
|
27
27
|
lattifai/config/diarization.py,sha256=cIkwCfsYqfMns3i6tKWcwBBBkdnhhmB_Eo0TuOPCw9o,2484
|
|
@@ -49,9 +49,9 @@ lattifai/workflow/file_manager.py,sha256=yc29Vb7JNUMJ9rwM_YjkAHfDInl8HMVAl9A7z7X
|
|
|
49
49
|
lattifai/youtube/__init__.py,sha256=_uO3KCx-t6I-JaYFpcYLYpvkbmEOOni3xBqGEbExg68,1587
|
|
50
50
|
lattifai/youtube/client.py,sha256=VU8FC1N7YYpbc4LeJNAsahNAI1R7e3_7Yjmb1rz7tyI,52878
|
|
51
51
|
lattifai/youtube/types.py,sha256=80RgBmvM4tRbxqyNv9GU6hr9vPp_yhKrK0RJ_vG2h4E,472
|
|
52
|
-
lattifai-1.3.
|
|
53
|
-
lattifai-1.3.
|
|
54
|
-
lattifai-1.3.
|
|
55
|
-
lattifai-1.3.
|
|
56
|
-
lattifai-1.3.
|
|
57
|
-
lattifai-1.3.
|
|
52
|
+
lattifai-1.3.2.dist-info/licenses/LICENSE,sha256=xGMLmdFJy6Jkz3Hd0znyQLmcxC93FSZB5isKnEDMoQQ,1066
|
|
53
|
+
lattifai-1.3.2.dist-info/METADATA,sha256=kVJb9QV958OrxRytDeIDIb0vyu8xHKUjda4HH-cxlqU,23564
|
|
54
|
+
lattifai-1.3.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
55
|
+
lattifai-1.3.2.dist-info/entry_points.txt,sha256=MfoqXNjXrhD7VMApHgaHmAECTcGVUMUiR0uqnTg7Ads,502
|
|
56
|
+
lattifai-1.3.2.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
|
|
57
|
+
lattifai-1.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|