lattifai 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +0 -25
- lattifai/alignment/lattice1_aligner.py +12 -9
- lattifai/alignment/lattice1_worker.py +124 -155
- lattifai/alignment/segmenter.py +1 -1
- lattifai/alignment/sentence_splitter.py +219 -0
- lattifai/alignment/tokenizer.py +23 -179
- lattifai/audio2.py +1 -1
- lattifai/caption/caption.py +0 -2
- lattifai/caption/gemini_reader.py +151 -60
- lattifai/cli/diarization.py +3 -1
- lattifai/cli/transcribe.py +3 -8
- lattifai/cli/youtube.py +11 -0
- lattifai/client.py +96 -47
- lattifai/config/alignment.py +2 -2
- lattifai/config/client.py +5 -0
- lattifai/mixin.py +17 -8
- lattifai/utils.py +40 -4
- lattifai/workflow/youtube.py +55 -57
- {lattifai-1.1.0.dist-info → lattifai-1.2.1.dist-info}/METADATA +331 -48
- {lattifai-1.1.0.dist-info → lattifai-1.2.1.dist-info}/RECORD +24 -23
- {lattifai-1.1.0.dist-info → lattifai-1.2.1.dist-info}/WHEEL +0 -0
- {lattifai-1.1.0.dist-info → lattifai-1.2.1.dist-info}/entry_points.txt +0 -0
- {lattifai-1.1.0.dist-info → lattifai-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.1.0.dist-info → lattifai-1.2.1.dist-info}/top_level.txt +0 -0
lattifai/mixin.py
CHANGED
|
@@ -290,12 +290,12 @@ class LattifAIClientMixin:
|
|
|
290
290
|
diarization_file = Path(str(input_caption)).with_suffix(".SpkDiar")
|
|
291
291
|
if diarization_file.exists():
|
|
292
292
|
if verbose:
|
|
293
|
-
safe_print(colorful.cyan(f"📖
|
|
293
|
+
safe_print(colorful.cyan(f"📖 Step1b: Reading speaker diarization from {diarization_file}"))
|
|
294
294
|
caption.read_speaker_diarization(diarization_file)
|
|
295
295
|
events_file = Path(str(input_caption)).with_suffix(".AED")
|
|
296
296
|
if events_file.exists():
|
|
297
297
|
if verbose:
|
|
298
|
-
safe_print(colorful.cyan(f"📖
|
|
298
|
+
safe_print(colorful.cyan(f"📖 Step1c: Reading audio events from {events_file}"))
|
|
299
299
|
from tgt import read_textgrid
|
|
300
300
|
|
|
301
301
|
caption.audio_events = read_textgrid(events_file)
|
|
@@ -404,6 +404,14 @@ class LattifAIClientMixin:
|
|
|
404
404
|
# Transcription mode: use Transcriber to transcribe
|
|
405
405
|
self._validate_transcription_setup()
|
|
406
406
|
|
|
407
|
+
if output_dir:
|
|
408
|
+
# Generate transcript file path
|
|
409
|
+
transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
|
|
410
|
+
if transcript_file.exists():
|
|
411
|
+
safe_print(colorful.cyan(f" Using existing transcript file: {transcript_file}"))
|
|
412
|
+
transcription = self._read_caption(transcript_file, normalize_text=False)
|
|
413
|
+
return transcription
|
|
414
|
+
|
|
407
415
|
safe_print(colorful.cyan(f"🎤 Transcribing({self.transcriber.name}) media: {str(media_file)} ..."))
|
|
408
416
|
transcription = await self.transcriber.transcribe_file(media_file, language=source_lang)
|
|
409
417
|
safe_print(colorful.green(" ✓ Transcription completed."))
|
|
@@ -442,8 +450,6 @@ class LattifAIClientMixin:
|
|
|
442
450
|
safe_print(colorful.yellow(f"First segment: {transcription.transcription[0].text}"))
|
|
443
451
|
|
|
444
452
|
if output_dir:
|
|
445
|
-
# Generate transcript file path
|
|
446
|
-
transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
|
|
447
453
|
await asyncio.to_thread(self.transcriber.write, transcription, transcript_file, encoding="utf-8")
|
|
448
454
|
safe_print(colorful.green(f" ✓ Transcription saved to: {transcript_file}"))
|
|
449
455
|
|
|
@@ -491,10 +497,13 @@ class LattifAIClientMixin:
|
|
|
491
497
|
safe_print(colorful.green(f"📄 Using provided caption file: {caption_path}"))
|
|
492
498
|
return str(caption_path)
|
|
493
499
|
else:
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
500
|
+
safe_print(colorful.red(f"Provided caption path does not exist: {caption_path}, use transcription"))
|
|
501
|
+
use_transcription = True
|
|
502
|
+
transcript_file = caption_path
|
|
503
|
+
caption_path.parent.mkdir(parents=True, exist_ok=True)
|
|
504
|
+
else:
|
|
505
|
+
# Generate transcript file path
|
|
506
|
+
transcript_file = output_dir / f"{Path(str(media_file)).stem}_{self.transcriber.file_name}"
|
|
498
507
|
|
|
499
508
|
if use_transcription:
|
|
500
509
|
# Transcription mode: use Transcriber to transcribe
|
lattifai/utils.py
CHANGED
|
@@ -68,7 +68,7 @@ def _is_cache_valid(cache_dir: Path) -> bool:
|
|
|
68
68
|
date_str = latest_marker.name.replace(".done", "")
|
|
69
69
|
marker_date = datetime.strptime(date_str, "%Y%m%d")
|
|
70
70
|
# Check if marker is older than 1 days
|
|
71
|
-
if datetime.now() - marker_date > timedelta(days=
|
|
71
|
+
if datetime.now() - marker_date > timedelta(days=7):
|
|
72
72
|
return False
|
|
73
73
|
return True
|
|
74
74
|
except (ValueError, IndexError):
|
|
@@ -108,12 +108,18 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
|
|
|
108
108
|
return str(Path(model_name_or_path).expanduser())
|
|
109
109
|
|
|
110
110
|
if hub == "huggingface":
|
|
111
|
-
from huggingface_hub import snapshot_download
|
|
111
|
+
from huggingface_hub import HfApi, snapshot_download
|
|
112
112
|
from huggingface_hub.constants import HF_HUB_CACHE
|
|
113
113
|
from huggingface_hub.errors import LocalEntryNotFoundError
|
|
114
114
|
|
|
115
|
+
# Support repo_id@revision syntax
|
|
116
|
+
hf_repo_id = model_name_or_path
|
|
117
|
+
revision = None
|
|
118
|
+
if "@" in model_name_or_path:
|
|
119
|
+
hf_repo_id, revision = model_name_or_path.split("@", 1)
|
|
120
|
+
|
|
115
121
|
# Determine cache directory for this model
|
|
116
|
-
cache_dir = Path(HF_HUB_CACHE) / f'models--{
|
|
122
|
+
cache_dir = Path(HF_HUB_CACHE) / f'models--{hf_repo_id.replace("/", "--")}'
|
|
117
123
|
|
|
118
124
|
# Check if we have a valid cached version
|
|
119
125
|
if _is_cache_valid(cache_dir):
|
|
@@ -126,8 +132,23 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
|
|
|
126
132
|
latest_snapshot = max(snapshot_dirs, key=lambda p: p.stat().st_mtime)
|
|
127
133
|
return str(latest_snapshot)
|
|
128
134
|
|
|
135
|
+
# If no specific revision/commit is provided, try to fetch the real latest SHA
|
|
136
|
+
# to bypass Hugging Face's model_info (metadata) sync lag.
|
|
137
|
+
if not revision:
|
|
138
|
+
try:
|
|
139
|
+
api = HfApi()
|
|
140
|
+
refs = api.list_repo_refs(repo_id=hf_repo_id, repo_type="model")
|
|
141
|
+
# Look for the default branch (usually 'main')
|
|
142
|
+
for branch in refs.branches:
|
|
143
|
+
if branch.name == "main":
|
|
144
|
+
revision = branch.target_commit
|
|
145
|
+
break
|
|
146
|
+
except Exception:
|
|
147
|
+
# Fallback to default behavior if API call fails
|
|
148
|
+
revision = None
|
|
149
|
+
|
|
129
150
|
try:
|
|
130
|
-
downloaded_path = snapshot_download(repo_id=
|
|
151
|
+
downloaded_path = snapshot_download(repo_id=hf_repo_id, repo_type="model", revision=revision)
|
|
131
152
|
_create_cache_marker(cache_dir)
|
|
132
153
|
return downloaded_path
|
|
133
154
|
except LocalEntryNotFoundError:
|
|
@@ -151,8 +172,23 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
|
|
|
151
172
|
# modelscope path
|
|
152
173
|
from modelscope.hub.snapshot_download import snapshot_download as ms_snapshot
|
|
153
174
|
|
|
175
|
+
# Determine cache directory for ModelScope
|
|
176
|
+
# ModelScope uses ~/.cache/modelscope/hub/models/{org}/{model} structure
|
|
177
|
+
modelscope_cache = Path.home() / ".cache" / "modelscope" / "hub" / "models"
|
|
178
|
+
cache_dir = modelscope_cache / model_name_or_path
|
|
179
|
+
|
|
180
|
+
# Check if we have a valid cached version
|
|
181
|
+
if _is_cache_valid(cache_dir):
|
|
182
|
+
# Return the cached path directly
|
|
183
|
+
if cache_dir.exists():
|
|
184
|
+
return str(cache_dir)
|
|
185
|
+
|
|
154
186
|
try:
|
|
155
187
|
downloaded_path = ms_snapshot(model_name_or_path)
|
|
188
|
+
# Create cache marker after successful download
|
|
189
|
+
if downloaded_path:
|
|
190
|
+
actual_cache_dir = Path(downloaded_path)
|
|
191
|
+
_create_cache_marker(actual_cache_dir)
|
|
156
192
|
return downloaded_path
|
|
157
193
|
except Exception as e: # pragma: no cover
|
|
158
194
|
raise ModelLoadError(model_name_or_path, original_error=e)
|
lattifai/workflow/youtube.py
CHANGED
|
@@ -429,79 +429,77 @@ class YouTubeDownloader:
|
|
|
429
429
|
result = await loop.run_in_executor(
|
|
430
430
|
None, lambda: subprocess.run(ytdlp_options, capture_output=True, text=True, check=True)
|
|
431
431
|
)
|
|
432
|
-
|
|
433
432
|
# Only log success message, not full yt-dlp output
|
|
434
433
|
self.logger.debug(f"yt-dlp output: {result.stdout.strip()}")
|
|
435
|
-
|
|
436
|
-
# Find the downloaded transcript file
|
|
437
|
-
caption_patterns = [
|
|
438
|
-
f"{video_id}.*vtt",
|
|
439
|
-
f"{video_id}.*srt",
|
|
440
|
-
f"{video_id}.*sub",
|
|
441
|
-
f"{video_id}.*sbv",
|
|
442
|
-
f"{video_id}.*ssa",
|
|
443
|
-
f"{video_id}.*ass",
|
|
444
|
-
]
|
|
445
|
-
|
|
446
|
-
caption_files = []
|
|
447
|
-
for pattern in caption_patterns:
|
|
448
|
-
_caption_files = list(target_dir.glob(pattern))
|
|
449
|
-
for caption_file in _caption_files:
|
|
450
|
-
self.logger.info(f"📥 Downloaded caption: {caption_file}")
|
|
451
|
-
caption_files.extend(_caption_files)
|
|
452
|
-
|
|
453
|
-
if not caption_files:
|
|
454
|
-
self.logger.warning("No caption available for this video")
|
|
455
|
-
return None
|
|
456
|
-
|
|
457
|
-
# If only one caption file, return it directly
|
|
458
|
-
if len(caption_files) == 1:
|
|
459
|
-
self.logger.info(f"✅ Using caption: {caption_files[0]}")
|
|
460
|
-
return str(caption_files[0])
|
|
461
|
-
|
|
462
|
-
# Multiple caption files found, let user choose
|
|
463
|
-
if FileExistenceManager.is_interactive_mode():
|
|
464
|
-
self.logger.info(f"📋 Found {len(caption_files)} caption files")
|
|
465
|
-
caption_choice = FileExistenceManager.prompt_file_selection(
|
|
466
|
-
file_type="caption",
|
|
467
|
-
files=[str(f) for f in caption_files],
|
|
468
|
-
operation="use",
|
|
469
|
-
transcriber_name=transcriber_name,
|
|
470
|
-
)
|
|
471
|
-
|
|
472
|
-
if caption_choice == "cancel":
|
|
473
|
-
raise RuntimeError("Caption selection cancelled by user")
|
|
474
|
-
elif caption_choice == TRANSCRIBE_CHOICE:
|
|
475
|
-
return caption_choice
|
|
476
|
-
elif caption_choice:
|
|
477
|
-
self.logger.info(f"✅ Selected caption: {caption_choice}")
|
|
478
|
-
return caption_choice
|
|
479
|
-
else:
|
|
480
|
-
# Fallback to first file
|
|
481
|
-
self.logger.info(f"✅ Using first caption: {caption_files[0]}")
|
|
482
|
-
return str(caption_files[0])
|
|
483
|
-
else:
|
|
484
|
-
# Non-interactive mode: use first file
|
|
485
|
-
self.logger.info(f"✅ Using first caption: {caption_files[0]}")
|
|
486
|
-
return str(caption_files[0])
|
|
487
|
-
|
|
488
434
|
except subprocess.CalledProcessError as e:
|
|
489
435
|
error_msg = e.stderr.strip() if e.stderr else str(e)
|
|
490
436
|
|
|
491
437
|
# Check for specific error conditions
|
|
492
438
|
if "No automatic or manual captions found" in error_msg:
|
|
493
439
|
self.logger.warning("No captions available for this video")
|
|
494
|
-
return None
|
|
495
440
|
elif "HTTP Error 429" in error_msg or "Too Many Requests" in error_msg:
|
|
496
441
|
self.logger.error("YouTube rate limit exceeded. Please try again later or use a different method.")
|
|
497
|
-
|
|
442
|
+
self.logger.error(
|
|
498
443
|
"YouTube rate limit exceeded (HTTP 429). "
|
|
499
444
|
"Try again later or use --cookies option with authenticated cookies. "
|
|
500
445
|
"See: https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp"
|
|
501
446
|
)
|
|
502
447
|
else:
|
|
503
448
|
self.logger.error(f"Failed to download transcript: {error_msg}")
|
|
504
|
-
|
|
449
|
+
|
|
450
|
+
# Find the downloaded transcript file
|
|
451
|
+
caption_patterns = [
|
|
452
|
+
f"{video_id}.*vtt",
|
|
453
|
+
f"{video_id}.*srt",
|
|
454
|
+
f"{video_id}.*sub",
|
|
455
|
+
f"{video_id}.*sbv",
|
|
456
|
+
f"{video_id}.*ssa",
|
|
457
|
+
f"{video_id}.*ass",
|
|
458
|
+
]
|
|
459
|
+
|
|
460
|
+
caption_files = []
|
|
461
|
+
for pattern in caption_patterns:
|
|
462
|
+
_caption_files = list(target_dir.glob(pattern))
|
|
463
|
+
for caption_file in _caption_files:
|
|
464
|
+
self.logger.info(f"📥 Downloaded caption: {caption_file}")
|
|
465
|
+
caption_files.extend(_caption_files)
|
|
466
|
+
|
|
467
|
+
# If only one caption file, return it directly
|
|
468
|
+
if len(caption_files) == 1:
|
|
469
|
+
self.logger.info(f"✅ Using caption: {caption_files[0]}")
|
|
470
|
+
return str(caption_files[0])
|
|
471
|
+
|
|
472
|
+
# Multiple caption files found, let user choose
|
|
473
|
+
if FileExistenceManager.is_interactive_mode():
|
|
474
|
+
self.logger.info(f"📋 Found {len(caption_files)} caption files")
|
|
475
|
+
caption_choice = FileExistenceManager.prompt_file_selection(
|
|
476
|
+
file_type="caption",
|
|
477
|
+
files=[str(f) for f in caption_files],
|
|
478
|
+
operation="use",
|
|
479
|
+
transcriber_name=transcriber_name,
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
if caption_choice == "cancel":
|
|
483
|
+
raise RuntimeError("Caption selection cancelled by user")
|
|
484
|
+
elif caption_choice == TRANSCRIBE_CHOICE:
|
|
485
|
+
return caption_choice
|
|
486
|
+
elif caption_choice:
|
|
487
|
+
self.logger.info(f"✅ Selected caption: {caption_choice}")
|
|
488
|
+
return caption_choice
|
|
489
|
+
elif caption_files:
|
|
490
|
+
# Fallback to first file
|
|
491
|
+
self.logger.info(f"✅ Using first caption: {caption_files[0]}")
|
|
492
|
+
return str(caption_files[0])
|
|
493
|
+
else:
|
|
494
|
+
self.logger.warning("No caption files available after download")
|
|
495
|
+
return None
|
|
496
|
+
elif caption_files:
|
|
497
|
+
# Non-interactive mode: use first file
|
|
498
|
+
self.logger.info(f"✅ Using first caption: {caption_files[0]}")
|
|
499
|
+
return str(caption_files[0])
|
|
500
|
+
else:
|
|
501
|
+
self.logger.warning("No caption files available after download")
|
|
502
|
+
return None
|
|
505
503
|
|
|
506
504
|
async def list_available_captions(self, url: str) -> List[Dict[str, Any]]:
|
|
507
505
|
"""
|