lattifai 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/_init.py +20 -0
- lattifai/alignment/__init__.py +9 -1
- lattifai/alignment/lattice1_aligner.py +175 -54
- lattifai/alignment/lattice1_worker.py +47 -4
- lattifai/alignment/punctuation.py +38 -0
- lattifai/alignment/segmenter.py +3 -2
- lattifai/alignment/text_align.py +441 -0
- lattifai/alignment/tokenizer.py +134 -65
- lattifai/audio2.py +162 -183
- lattifai/cli/__init__.py +2 -1
- lattifai/cli/alignment.py +5 -0
- lattifai/cli/caption.py +111 -4
- lattifai/cli/transcribe.py +2 -6
- lattifai/cli/youtube.py +7 -1
- lattifai/client.py +72 -123
- lattifai/config/__init__.py +28 -0
- lattifai/config/alignment.py +14 -0
- lattifai/config/caption.py +45 -31
- lattifai/config/client.py +16 -0
- lattifai/config/event.py +102 -0
- lattifai/config/media.py +20 -0
- lattifai/config/transcription.py +25 -1
- lattifai/data/__init__.py +8 -0
- lattifai/data/caption.py +228 -0
- lattifai/diarization/__init__.py +41 -1
- lattifai/errors.py +78 -53
- lattifai/event/__init__.py +65 -0
- lattifai/event/lattifai.py +166 -0
- lattifai/mixin.py +49 -32
- lattifai/transcription/base.py +8 -2
- lattifai/transcription/gemini.py +147 -16
- lattifai/transcription/lattifai.py +25 -63
- lattifai/types.py +1 -1
- lattifai/utils.py +7 -13
- lattifai/workflow/__init__.py +28 -4
- lattifai/workflow/file_manager.py +2 -5
- lattifai/youtube/__init__.py +43 -0
- lattifai/youtube/client.py +1265 -0
- lattifai/youtube/types.py +23 -0
- lattifai-1.3.0.dist-info/METADATA +678 -0
- lattifai-1.3.0.dist-info/RECORD +57 -0
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +1 -2
- lattifai/__init__.py +0 -88
- lattifai/alignment/sentence_splitter.py +0 -219
- lattifai/caption/__init__.py +0 -20
- lattifai/caption/caption.py +0 -1467
- lattifai/caption/gemini_reader.py +0 -462
- lattifai/caption/gemini_writer.py +0 -173
- lattifai/caption/supervision.py +0 -34
- lattifai/caption/text_parser.py +0 -145
- lattifai/cli/app_installer.py +0 -142
- lattifai/cli/server.py +0 -44
- lattifai/server/app.py +0 -427
- lattifai/workflow/youtube.py +0 -577
- lattifai-1.2.1.dist-info/METADATA +0 -1134
- lattifai-1.2.1.dist-info/RECORD +0 -58
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0
lattifai/transcription/base.py
CHANGED
|
@@ -7,8 +7,9 @@ from typing import List, Optional, Union
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
|
|
9
9
|
from lattifai.audio2 import AudioData
|
|
10
|
-
from lattifai.caption import
|
|
10
|
+
from lattifai.caption import Supervision
|
|
11
11
|
from lattifai.config import TranscriptionConfig
|
|
12
|
+
from lattifai.data import Caption
|
|
12
13
|
from lattifai.logging import get_logger
|
|
13
14
|
|
|
14
15
|
|
|
@@ -41,8 +42,13 @@ class BaseTranscriber(ABC):
|
|
|
41
42
|
self.logger = get_logger("transcription")
|
|
42
43
|
|
|
43
44
|
@property
|
|
45
|
+
@abstractmethod
|
|
44
46
|
def name(self) -> str:
|
|
45
|
-
"""Human-readable name of the transcriber.
|
|
47
|
+
"""Human-readable name of the transcriber.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str: Identifier for the transcriber (e.g., 'gemini', 'parakeet').
|
|
51
|
+
"""
|
|
46
52
|
|
|
47
53
|
@property
|
|
48
54
|
def file_name(self) -> str:
|
lattifai/transcription/gemini.py
CHANGED
|
@@ -11,6 +11,7 @@ from google.genai.types import GenerateContentConfig, Part, ThinkingConfig
|
|
|
11
11
|
from lattifai.audio2 import AudioData
|
|
12
12
|
from lattifai.caption import Supervision
|
|
13
13
|
from lattifai.config import TranscriptionConfig
|
|
14
|
+
from lattifai.data import Caption
|
|
14
15
|
from lattifai.transcription.base import BaseTranscriber
|
|
15
16
|
from lattifai.transcription.prompts import get_prompt_loader
|
|
16
17
|
|
|
@@ -245,18 +246,41 @@ class GeminiTranscriber(BaseTranscriber):
|
|
|
245
246
|
return transcript
|
|
246
247
|
|
|
247
248
|
def _get_transcription_prompt(self) -> str:
|
|
248
|
-
"""Get (and cache) transcription system prompt
|
|
249
|
+
"""Get (and cache) transcription system prompt.
|
|
250
|
+
|
|
251
|
+
Priority:
|
|
252
|
+
1. Custom prompt from config.prompt (file path or text)
|
|
253
|
+
2. Default prompt from prompts/gemini/transcription_gem.txt
|
|
254
|
+
"""
|
|
249
255
|
if self._system_prompt is not None:
|
|
250
256
|
return self._system_prompt
|
|
251
257
|
|
|
252
|
-
#
|
|
253
|
-
|
|
254
|
-
|
|
258
|
+
# Check for custom prompt
|
|
259
|
+
if self.config.prompt:
|
|
260
|
+
prompt_path = Path(self.config.prompt)
|
|
261
|
+
if prompt_path.exists() and prompt_path.is_file():
|
|
262
|
+
# Load from file
|
|
263
|
+
base_prompt = prompt_path.read_text(encoding="utf-8").strip()
|
|
264
|
+
if self.config.verbose:
|
|
265
|
+
self.logger.info(f"📝 Using custom prompt from file: {prompt_path}")
|
|
266
|
+
else:
|
|
267
|
+
# Use as direct text
|
|
268
|
+
base_prompt = self.config.prompt
|
|
269
|
+
if self.config.verbose:
|
|
270
|
+
self.logger.info("📝 Using custom prompt text")
|
|
271
|
+
else:
|
|
272
|
+
# Load default prompt from prompts/gemini/transcription_gem.txt
|
|
273
|
+
prompt_loader = get_prompt_loader()
|
|
274
|
+
base_prompt = prompt_loader.get_gemini_transcription_prompt()
|
|
255
275
|
|
|
256
276
|
# Add language-specific instruction if configured
|
|
257
277
|
if self.config.language:
|
|
258
278
|
base_prompt += f"\n\n* Use {self.config.language} language for transcription."
|
|
259
279
|
|
|
280
|
+
# Add media description context if available
|
|
281
|
+
if self.config.description:
|
|
282
|
+
base_prompt += f"\n\n## Media Context\n\n{self.config.description}"
|
|
283
|
+
|
|
260
284
|
self._system_prompt = base_prompt
|
|
261
285
|
return self._system_prompt
|
|
262
286
|
|
|
@@ -287,14 +311,21 @@ class GeminiTranscriber(BaseTranscriber):
|
|
|
287
311
|
def _get_generation_config(self) -> GenerateContentConfig:
|
|
288
312
|
"""Lazily build the generation config since it rarely changes."""
|
|
289
313
|
if self._generation_config is None:
|
|
314
|
+
# Only include thinking_config if thinking mode is enabled
|
|
315
|
+
thinking_config = None
|
|
316
|
+
if self.config.thinking:
|
|
317
|
+
thinking_config = ThinkingConfig(
|
|
318
|
+
include_thoughts=self.config.include_thoughts,
|
|
319
|
+
thinking_budget=-1,
|
|
320
|
+
)
|
|
321
|
+
|
|
290
322
|
self._generation_config = GenerateContentConfig(
|
|
291
323
|
system_instruction=self._get_transcription_prompt(),
|
|
292
324
|
response_modalities=["TEXT"],
|
|
293
|
-
thinking_config=
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
),
|
|
325
|
+
thinking_config=thinking_config,
|
|
326
|
+
temperature=self.config.temperature,
|
|
327
|
+
top_k=self.config.top_k,
|
|
328
|
+
top_p=self.config.top_p,
|
|
298
329
|
)
|
|
299
330
|
return self._generation_config
|
|
300
331
|
|
|
@@ -323,23 +354,123 @@ class GeminiTranscriber(BaseTranscriber):
|
|
|
323
354
|
),
|
|
324
355
|
)
|
|
325
356
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
357
|
+
# Extract content based on include_thoughts setting
|
|
358
|
+
if self.config.include_thoughts:
|
|
359
|
+
transcript = self._extract_with_thoughts(response)
|
|
360
|
+
else:
|
|
361
|
+
if not response.text:
|
|
362
|
+
raise RuntimeError("Empty response from Gemini API")
|
|
363
|
+
transcript = response.text.strip()
|
|
330
364
|
|
|
331
365
|
if self.config.verbose:
|
|
332
366
|
self.logger.info(f"✅ Transcription completed ({source}): {len(transcript)} characters")
|
|
333
367
|
|
|
334
368
|
return transcript
|
|
335
369
|
|
|
370
|
+
def _extract_with_thoughts(self, response) -> str:
|
|
371
|
+
"""Extract response content including thinking process and metadata."""
|
|
372
|
+
output_parts = []
|
|
373
|
+
thoughts = []
|
|
374
|
+
text_parts = []
|
|
375
|
+
|
|
376
|
+
# Iterate through all parts in the response
|
|
377
|
+
for candidate in response.candidates:
|
|
378
|
+
for part in candidate.content.parts:
|
|
379
|
+
if hasattr(part, "thought") and part.thought:
|
|
380
|
+
# This is a thinking part
|
|
381
|
+
if hasattr(part, "text") and part.text:
|
|
382
|
+
thoughts.append(part.text)
|
|
383
|
+
elif hasattr(part, "text") and part.text:
|
|
384
|
+
# This is a regular text part
|
|
385
|
+
text_parts.append(part.text)
|
|
386
|
+
|
|
387
|
+
# Extract metadata
|
|
388
|
+
metadata_lines = self._extract_response_metadata(response)
|
|
389
|
+
if metadata_lines:
|
|
390
|
+
output_parts.append("---")
|
|
391
|
+
output_parts.extend(metadata_lines)
|
|
392
|
+
output_parts.append("---\n")
|
|
393
|
+
|
|
394
|
+
# Format output with thoughts section if present
|
|
395
|
+
if thoughts:
|
|
396
|
+
output_parts.append("<thinking>")
|
|
397
|
+
output_parts.extend(thoughts)
|
|
398
|
+
output_parts.append("</thinking>\n")
|
|
399
|
+
|
|
400
|
+
output_parts.extend(text_parts)
|
|
401
|
+
|
|
402
|
+
result = "\n".join(output_parts).strip()
|
|
403
|
+
if not result:
|
|
404
|
+
raise RuntimeError("Empty response from Gemini API")
|
|
405
|
+
|
|
406
|
+
return result
|
|
407
|
+
|
|
408
|
+
def _extract_response_metadata(self, response) -> list:
|
|
409
|
+
"""Extract useful metadata from Gemini response as YAML frontmatter."""
|
|
410
|
+
lines = []
|
|
411
|
+
|
|
412
|
+
# Model version
|
|
413
|
+
if hasattr(response, "model_version") and response.model_version:
|
|
414
|
+
lines.append(f"model_version: {response.model_version}")
|
|
415
|
+
|
|
416
|
+
# Usage metadata (token counts)
|
|
417
|
+
if hasattr(response, "usage_metadata") and response.usage_metadata:
|
|
418
|
+
usage = response.usage_metadata
|
|
419
|
+
if hasattr(usage, "prompt_token_count"):
|
|
420
|
+
lines.append(f"prompt_tokens: {usage.prompt_token_count}")
|
|
421
|
+
if hasattr(usage, "candidates_token_count"):
|
|
422
|
+
lines.append(f"output_tokens: {usage.candidates_token_count}")
|
|
423
|
+
if hasattr(usage, "total_token_count"):
|
|
424
|
+
lines.append(f"total_tokens: {usage.total_token_count}")
|
|
425
|
+
# Thinking tokens if available
|
|
426
|
+
if hasattr(usage, "thoughts_token_count") and usage.thoughts_token_count:
|
|
427
|
+
lines.append(f"thinking_tokens: {usage.thoughts_token_count}")
|
|
428
|
+
|
|
429
|
+
# Candidate-level metadata
|
|
430
|
+
if response.candidates:
|
|
431
|
+
candidate = response.candidates[0]
|
|
432
|
+
|
|
433
|
+
# Finish reason
|
|
434
|
+
if hasattr(candidate, "finish_reason") and candidate.finish_reason:
|
|
435
|
+
lines.append(f"finish_reason: {candidate.finish_reason}")
|
|
436
|
+
|
|
437
|
+
# Average log probability (confidence indicator)
|
|
438
|
+
if hasattr(candidate, "avg_logprobs") and candidate.avg_logprobs is not None:
|
|
439
|
+
lines.append(f"avg_logprobs: {candidate.avg_logprobs:.4f}")
|
|
440
|
+
|
|
441
|
+
# Citation metadata
|
|
442
|
+
if hasattr(candidate, "citation_metadata") and candidate.citation_metadata:
|
|
443
|
+
citations = getattr(candidate.citation_metadata, "citations", [])
|
|
444
|
+
if citations:
|
|
445
|
+
lines.append("citations:")
|
|
446
|
+
for cite in citations:
|
|
447
|
+
uri = getattr(cite, "uri", "")
|
|
448
|
+
start = getattr(cite, "start_index", "")
|
|
449
|
+
end = getattr(cite, "end_index", "")
|
|
450
|
+
if uri:
|
|
451
|
+
lines.append(f" - uri: {uri}")
|
|
452
|
+
if start or end:
|
|
453
|
+
lines.append(f" range: [{start}, {end}]")
|
|
454
|
+
|
|
455
|
+
return lines
|
|
456
|
+
|
|
336
457
|
def write(
|
|
337
|
-
self, transcript: str, output_file: Path, encoding: str = "utf-8",
|
|
458
|
+
self, transcript: Union[str, Caption], output_file: Path, encoding: str = "utf-8", cache_event: bool = True
|
|
338
459
|
) -> Path:
|
|
339
460
|
"""
|
|
340
|
-
Persist transcript
|
|
461
|
+
Persist transcript to disk and return the file path.
|
|
462
|
+
|
|
463
|
+
Supports both raw string (from transcribe_file) and Caption object
|
|
464
|
+
(after conversion in mixin._transcribe).
|
|
341
465
|
"""
|
|
342
466
|
if isinstance(output_file, str):
|
|
343
467
|
output_file = Path(output_file)
|
|
344
|
-
|
|
468
|
+
|
|
469
|
+
if isinstance(transcript, Caption):
|
|
470
|
+
# Caption object - use its write method with gemini format
|
|
471
|
+
transcript.write(output_file, output_format="gemini")
|
|
472
|
+
else:
|
|
473
|
+
# Raw string from transcription
|
|
474
|
+
output_file.write_text(transcript, encoding=encoding)
|
|
475
|
+
|
|
345
476
|
return output_file
|
|
@@ -6,10 +6,10 @@ from typing import List, Optional, Union
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
|
|
8
8
|
from lattifai.audio2 import AudioData
|
|
9
|
-
from lattifai.caption import
|
|
9
|
+
from lattifai.caption import Supervision
|
|
10
10
|
from lattifai.config import TranscriptionConfig
|
|
11
|
+
from lattifai.data import Caption
|
|
11
12
|
from lattifai.transcription.base import BaseTranscriber
|
|
12
|
-
from lattifai.transcription.prompts import get_prompt_loader # noqa: F401
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class LattifAITranscriber(BaseTranscriber):
|
|
@@ -20,61 +20,42 @@ class LattifAITranscriber(BaseTranscriber):
|
|
|
20
20
|
Note: This transcriber only supports local file transcription, not URLs.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
-
# Transcriber metadata
|
|
24
23
|
file_suffix = ".ass"
|
|
25
24
|
supports_url = False
|
|
26
25
|
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
transcription_config: TranscriptionConfig,
|
|
30
|
-
):
|
|
26
|
+
def __init__(self, transcription_config: TranscriptionConfig):
|
|
31
27
|
"""
|
|
32
|
-
Initialize
|
|
28
|
+
Initialize LattifAI transcriber.
|
|
33
29
|
|
|
34
30
|
Args:
|
|
35
|
-
transcription_config: Transcription configuration.
|
|
31
|
+
transcription_config: Transcription configuration.
|
|
36
32
|
"""
|
|
37
|
-
super().__init__(
|
|
38
|
-
config=transcription_config,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
self._system_prompt: Optional[str] = None
|
|
33
|
+
super().__init__(config=transcription_config)
|
|
42
34
|
self._transcriber = None
|
|
43
35
|
|
|
44
36
|
@property
|
|
45
37
|
def name(self) -> str:
|
|
46
|
-
return
|
|
47
|
-
|
|
48
|
-
async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
|
|
49
|
-
"""
|
|
50
|
-
URL transcription not supported for LattifAI local models.
|
|
51
|
-
|
|
52
|
-
This method exists to satisfy the BaseTranscriber interface but
|
|
53
|
-
will never be called because supports_url = False and the base
|
|
54
|
-
class checks this flag before calling this method.
|
|
38
|
+
return self.config.model_name
|
|
55
39
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
language: Optional language code (not used)
|
|
59
|
-
"""
|
|
60
|
-
raise NotImplementedError(
|
|
61
|
-
f"{self.__class__.__name__} does not support URL transcription. "
|
|
62
|
-
f"Please download the file first and use transcribe_file()."
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> Caption:
|
|
40
|
+
def _ensure_transcriber(self):
|
|
41
|
+
"""Lazily initialize the core transcriber."""
|
|
66
42
|
if self._transcriber is None:
|
|
67
43
|
from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
|
|
68
44
|
|
|
69
45
|
self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
|
|
46
|
+
return self._transcriber
|
|
70
47
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
48
|
+
async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
|
|
49
|
+
"""URL transcription not supported for LattifAI local models."""
|
|
50
|
+
raise NotImplementedError(
|
|
51
|
+
f"{self.__class__.__name__} does not support URL transcription. "
|
|
52
|
+
"Please download the file first and use transcribe_file()."
|
|
75
53
|
)
|
|
76
54
|
|
|
77
|
-
|
|
55
|
+
async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> Caption:
|
|
56
|
+
transcriber = self._ensure_transcriber()
|
|
57
|
+
transcription, event = transcriber.transcribe(media_file, language=language, num_workers=2)
|
|
58
|
+
return Caption.from_transcription_results(transcription=transcription, event=event)
|
|
78
59
|
|
|
79
60
|
def transcribe_numpy(
|
|
80
61
|
self,
|
|
@@ -92,19 +73,12 @@ class LattifAITranscriber(BaseTranscriber):
|
|
|
92
73
|
Returns:
|
|
93
74
|
Supervision object (or list of Supervision objects) with transcription and alignment info.
|
|
94
75
|
"""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
|
|
99
|
-
|
|
100
|
-
# Delegate to core transcriber which handles both single arrays and lists
|
|
101
|
-
return self._transcriber.transcribe(
|
|
76
|
+
transcriber = self._ensure_transcriber()
|
|
77
|
+
return transcriber.transcribe(
|
|
102
78
|
audio, language=language, return_hypotheses=True, progress_bar=False, timestamps=True
|
|
103
79
|
)[0]
|
|
104
80
|
|
|
105
|
-
def write(
|
|
106
|
-
self, transcript: Caption, output_file: Path, encoding: str = "utf-8", cache_audio_events: bool = True
|
|
107
|
-
) -> Path:
|
|
81
|
+
def write(self, transcript: Caption, output_file: Path, encoding: str = "utf-8", cache_event: bool = True) -> Path:
|
|
108
82
|
"""
|
|
109
83
|
Persist transcript text to disk and return the file path.
|
|
110
84
|
"""
|
|
@@ -112,20 +86,8 @@ class LattifAITranscriber(BaseTranscriber):
|
|
|
112
86
|
output_file,
|
|
113
87
|
include_speaker_in_text=False,
|
|
114
88
|
)
|
|
115
|
-
if
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
events_file = output_file.with_suffix(".AED")
|
|
119
|
-
write_to_file(transcript.audio_events, events_file, format="long")
|
|
89
|
+
if cache_event and transcript.event:
|
|
90
|
+
events_file = output_file.with_suffix(".LED")
|
|
91
|
+
transcript.event.write(events_file)
|
|
120
92
|
|
|
121
93
|
return output_file
|
|
122
|
-
|
|
123
|
-
def _get_transcription_prompt(self) -> str:
|
|
124
|
-
"""Get (and cache) transcription system prompt from prompts module."""
|
|
125
|
-
if self._system_prompt is not None:
|
|
126
|
-
return self._system_prompt
|
|
127
|
-
|
|
128
|
-
base_prompt = "" # TODO
|
|
129
|
-
|
|
130
|
-
self._system_prompt = base_prompt
|
|
131
|
-
return self._system_prompt
|
lattifai/types.py
CHANGED
lattifai/utils.py
CHANGED
|
@@ -94,19 +94,14 @@ def _resolve_model_path(model_name_or_path: str, model_hub: str = "huggingface")
|
|
|
94
94
|
model_name_or_path: Local path or remote model identifier.
|
|
95
95
|
model_hub: Which hub to use for downloads. Supported: "huggingface", "modelscope".
|
|
96
96
|
"""
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
local_path = Path(model_name_or_path).expanduser()
|
|
98
|
+
if local_path.exists():
|
|
99
|
+
return str(local_path)
|
|
99
100
|
|
|
100
|
-
# Normalize hub name
|
|
101
101
|
hub = (model_hub or "huggingface").lower()
|
|
102
|
-
|
|
103
102
|
if hub not in ("huggingface", "modelscope"):
|
|
104
103
|
raise ValueError(f"Unsupported model_hub: {model_hub}. Supported: 'huggingface', 'modelscope'.")
|
|
105
104
|
|
|
106
|
-
# If local path exists, return it regardless of hub
|
|
107
|
-
if Path(model_name_or_path).expanduser().exists():
|
|
108
|
-
return str(Path(model_name_or_path).expanduser())
|
|
109
|
-
|
|
110
105
|
if hub == "huggingface":
|
|
111
106
|
from huggingface_hub import HfApi, snapshot_download
|
|
112
107
|
from huggingface_hub.constants import HF_HUB_CACHE
|
|
@@ -201,9 +196,8 @@ def _select_device(device: Optional[str]) -> str:
|
|
|
201
196
|
|
|
202
197
|
import torch
|
|
203
198
|
|
|
204
|
-
detected = "cpu"
|
|
205
199
|
if torch.backends.mps.is_available():
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
return
|
|
200
|
+
return "mps"
|
|
201
|
+
if torch.cuda.is_available():
|
|
202
|
+
return "cuda"
|
|
203
|
+
return "cpu"
|
lattifai/workflow/__init__.py
CHANGED
|
@@ -1,8 +1,34 @@
|
|
|
1
|
-
"""
|
|
2
|
-
LattifAI Agentic Workflows
|
|
1
|
+
"""LattifAI Agentic Workflows.
|
|
3
2
|
|
|
4
3
|
This module provides agentic workflow capabilities for automated processing
|
|
5
4
|
of multimedia content through intelligent agent-based pipelines.
|
|
5
|
+
|
|
6
|
+
Key Components:
|
|
7
|
+
WorkflowAgent: Abstract base class for implementing workflow agents.
|
|
8
|
+
Provides step-based execution with retry logic, state management,
|
|
9
|
+
and consistent logging.
|
|
10
|
+
|
|
11
|
+
WorkflowStep: Defines individual workflow steps with timing and
|
|
12
|
+
execution status tracking.
|
|
13
|
+
|
|
14
|
+
WorkflowResult: Encapsulates workflow execution results including
|
|
15
|
+
status, outputs, errors, and timing information.
|
|
16
|
+
|
|
17
|
+
FileExistenceManager: Handles file existence conflicts during workflows,
|
|
18
|
+
supporting interactive and automatic resolution modes.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> from lattifai.workflow import WorkflowAgent, WorkflowStep, WorkflowResult
|
|
22
|
+
>>> class MyWorkflow(WorkflowAgent):
|
|
23
|
+
... def define_steps(self):
|
|
24
|
+
... return [WorkflowStep("download"), WorkflowStep("process")]
|
|
25
|
+
... def execute_step(self, step, context):
|
|
26
|
+
... # Implementation
|
|
27
|
+
... pass
|
|
28
|
+
|
|
29
|
+
See Also:
|
|
30
|
+
- lattifai.client.LattifAI: Main client that orchestrates workflows
|
|
31
|
+
- lattifai.youtube: YouTube-specific workflow integration
|
|
6
32
|
"""
|
|
7
33
|
|
|
8
34
|
# Import transcript processing functionality
|
|
@@ -10,13 +36,11 @@ of multimedia content through intelligent agent-based pipelines.
|
|
|
10
36
|
|
|
11
37
|
from .base import WorkflowAgent, WorkflowResult, WorkflowStep
|
|
12
38
|
from .file_manager import TRANSCRIBE_CHOICE, FileExistenceManager
|
|
13
|
-
from .youtube import YouTubeDownloader
|
|
14
39
|
|
|
15
40
|
__all__ = [
|
|
16
41
|
"WorkflowAgent",
|
|
17
42
|
"WorkflowStep",
|
|
18
43
|
"WorkflowResult",
|
|
19
44
|
"FileExistenceManager",
|
|
20
|
-
"YouTubeDownloader",
|
|
21
45
|
"TRANSCRIBE_CHOICE",
|
|
22
46
|
]
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
""".
|
|
2
|
-
File existence management utilities for video processing workflows
|
|
3
|
-
"""
|
|
1
|
+
"""File existence management utilities for video processing workflows."""
|
|
4
2
|
|
|
5
3
|
import asyncio
|
|
6
4
|
import os
|
|
@@ -187,8 +185,7 @@ class FileExistenceManager:
|
|
|
187
185
|
if not files:
|
|
188
186
|
return "proceed"
|
|
189
187
|
|
|
190
|
-
|
|
191
|
-
del emoji # Unused variable
|
|
188
|
+
_, label = FileExistenceManager.FILE_TYPE_INFO.get(file_type, ("📄", file_type.capitalize()))
|
|
192
189
|
|
|
193
190
|
# Header with warning color
|
|
194
191
|
safe_print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""YouTube Data Acquisition Module.
|
|
2
|
+
|
|
3
|
+
This module provides YouTube video metadata extraction, media download,
|
|
4
|
+
and caption retrieval functionality powered by yt-dlp.
|
|
5
|
+
|
|
6
|
+
Key Components:
|
|
7
|
+
YoutubeLoader: Lightweight loader for fetching video metadata and
|
|
8
|
+
caption content in memory. Use this for quick metadata lookups
|
|
9
|
+
or when you don't need to save files to disk.
|
|
10
|
+
|
|
11
|
+
YouTubeDownloader: Full-featured downloader for media files and
|
|
12
|
+
captions with disk persistence. Supports various output formats
|
|
13
|
+
and quality settings.
|
|
14
|
+
|
|
15
|
+
VideoMetadata: Dataclass containing video information (title, duration,
|
|
16
|
+
channel, upload date, available captions, etc.).
|
|
17
|
+
|
|
18
|
+
CaptionTrack: Represents a single caption track with language code,
|
|
19
|
+
format, and content retrieval methods.
|
|
20
|
+
|
|
21
|
+
Features:
|
|
22
|
+
- Proxy and cookie support for geo-restricted content
|
|
23
|
+
- Automatic caption format detection (manual vs auto-generated)
|
|
24
|
+
- Multiple audio/video format options
|
|
25
|
+
- Async and sync download APIs
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
>>> from lattifai.youtube import YoutubeLoader, VideoMetadata
|
|
29
|
+
>>> loader = YoutubeLoader()
|
|
30
|
+
>>> metadata = loader.get_metadata("https://youtube.com/watch?v=...")
|
|
31
|
+
>>> print(metadata.title, metadata.duration)
|
|
32
|
+
|
|
33
|
+
Requirements:
|
|
34
|
+
yt-dlp must be installed: `pip install yt-dlp`
|
|
35
|
+
|
|
36
|
+
See Also:
|
|
37
|
+
- lattifai.client.LattifAI.youtube: High-level YouTube workflow method
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
from .client import YouTubeDownloader, YoutubeLoader
|
|
41
|
+
from .types import CaptionTrack, VideoMetadata
|
|
42
|
+
|
|
43
|
+
__all__ = ["YoutubeLoader", "YouTubeDownloader", "VideoMetadata", "CaptionTrack"]
|