lattifai 0.4.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. lattifai/__init__.py +61 -47
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/alignment/lattice1_worker.py +185 -0
  5. lattifai/{tokenizer → alignment}/phonemizer.py +4 -4
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +244 -169
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/gemini_reader.py +30 -30
  12. lattifai/{io → caption}/gemini_writer.py +17 -17
  13. lattifai/{io → caption}/supervision.py +4 -3
  14. lattifai/caption/text_parser.py +145 -0
  15. lattifai/cli/__init__.py +17 -0
  16. lattifai/cli/alignment.py +153 -0
  17. lattifai/cli/caption.py +204 -0
  18. lattifai/cli/server.py +19 -0
  19. lattifai/cli/transcribe.py +197 -0
  20. lattifai/cli/youtube.py +128 -0
  21. lattifai/client.py +460 -251
  22. lattifai/config/__init__.py +20 -0
  23. lattifai/config/alignment.py +73 -0
  24. lattifai/config/caption.py +178 -0
  25. lattifai/config/client.py +46 -0
  26. lattifai/config/diarization.py +67 -0
  27. lattifai/config/media.py +335 -0
  28. lattifai/config/transcription.py +84 -0
  29. lattifai/diarization/__init__.py +5 -0
  30. lattifai/diarization/lattifai.py +89 -0
  31. lattifai/errors.py +98 -91
  32. lattifai/logging.py +116 -0
  33. lattifai/mixin.py +552 -0
  34. lattifai/server/app.py +420 -0
  35. lattifai/transcription/__init__.py +76 -0
  36. lattifai/transcription/base.py +108 -0
  37. lattifai/transcription/gemini.py +219 -0
  38. lattifai/transcription/lattifai.py +103 -0
  39. lattifai/{workflows → transcription}/prompts/__init__.py +4 -4
  40. lattifai/types.py +30 -0
  41. lattifai/utils.py +16 -44
  42. lattifai/workflow/__init__.py +22 -0
  43. lattifai/workflow/agents.py +6 -0
  44. lattifai/{workflows → workflow}/base.py +22 -22
  45. lattifai/{workflows → workflow}/file_manager.py +239 -215
  46. lattifai/workflow/youtube.py +564 -0
  47. lattifai-1.0.0.dist-info/METADATA +736 -0
  48. lattifai-1.0.0.dist-info/RECORD +52 -0
  49. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  50. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  51. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +1 -1
  52. lattifai/base_client.py +0 -126
  53. lattifai/bin/__init__.py +0 -3
  54. lattifai/bin/agent.py +0 -325
  55. lattifai/bin/align.py +0 -296
  56. lattifai/bin/cli_base.py +0 -25
  57. lattifai/bin/subtitle.py +0 -210
  58. lattifai/io/__init__.py +0 -42
  59. lattifai/io/reader.py +0 -85
  60. lattifai/io/text_parser.py +0 -75
  61. lattifai/io/utils.py +0 -15
  62. lattifai/io/writer.py +0 -90
  63. lattifai/tokenizer/__init__.py +0 -3
  64. lattifai/workers/__init__.py +0 -3
  65. lattifai/workers/lattice1_alpha.py +0 -284
  66. lattifai/workflows/__init__.py +0 -34
  67. lattifai/workflows/agents.py +0 -10
  68. lattifai/workflows/gemini.py +0 -167
  69. lattifai/workflows/prompts/README.md +0 -22
  70. lattifai/workflows/prompts/gemini/README.md +0 -24
  71. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  72. lattifai/workflows/youtube.py +0 -931
  73. lattifai-0.4.5.dist-info/METADATA +0 -808
  74. lattifai-0.4.5.dist-info/RECORD +0 -39
  75. lattifai-0.4.5.dist-info/entry_points.txt +0 -3
  76. {lattifai-0.4.5.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,219 @@
1
+ """Gemini 2.5 Pro transcription module with config-driven architecture."""
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import Optional, Union
6
+
7
+ from google import genai
8
+ from google.genai.types import GenerateContentConfig, Part, ThinkingConfig
9
+
10
+ from lattifai.audio2 import AudioData
11
+ from lattifai.config import TranscriptionConfig
12
+ from lattifai.transcription.base import BaseTranscriber
13
+ from lattifai.transcription.prompts import get_prompt_loader
14
+
15
+
16
+ class GeminiTranscriber(BaseTranscriber):
17
+ """
18
+ Gemini 2.5/3 Pro audio transcription with config-driven architecture.
19
+
20
+ Uses TranscriptionConfig for all behavioral settings.
21
+ """
22
+
23
+ # Transcriber metadata
24
+ file_suffix = ".md"
25
+
26
+ # The specific Gem URL
27
+ GEM_URL = "https://gemini.google.com/gem/1870ly7xvW2hU_umtv-LedGsjywT0sQiN"
28
+
29
+ def __init__(
30
+ self,
31
+ transcription_config: Optional[TranscriptionConfig] = None,
32
+ ):
33
+ """
34
+ Initialize Gemini transcriber.
35
+
36
+ Args:
37
+ transcription_config: Transcription configuration. If None, uses default.
38
+ """
39
+ super().__init__(config=transcription_config)
40
+
41
+ self._client: Optional[genai.Client] = None
42
+ self._generation_config: Optional[GenerateContentConfig] = None
43
+ self._system_prompt: Optional[str] = None
44
+
45
+ # Warn if API key not available
46
+ if not self.config.gemini_api_key:
47
+ self.logger.warning(
48
+ "⚠️ Gemini API key not provided. API key will be required when calling transcription methods."
49
+ )
50
+
51
+ @property
52
+ def name(self) -> str:
53
+ """Human-readable name of the transcriber."""
54
+ return f"{self.config.model_name}"
55
+
56
+ async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
57
+ """
58
+ Transcribe audio from URL using Gemini 2.5 Pro.
59
+
60
+ Args:
61
+ url: URL to transcribe (e.g., YouTube)
62
+ language: Optional language code for transcription (overrides config)
63
+
64
+ Returns:
65
+ Transcribed text
66
+
67
+ Raises:
68
+ ValueError: If API key not provided
69
+ RuntimeError: If transcription fails
70
+ """
71
+ if self.config.verbose:
72
+ self.logger.info(f"🎤 Starting Gemini transcription for: {url}")
73
+
74
+ try:
75
+ contents = Part.from_uri(file_uri=url, mime_type="video/*")
76
+ return await self._run_generation(contents, source=url)
77
+
78
+ except ImportError:
79
+ raise RuntimeError("Google GenAI SDK not installed. Please install with: pip install google-genai")
80
+ except Exception as e:
81
+ self.logger.error(f"Gemini transcription failed: {str(e)}")
82
+ raise RuntimeError(f"Gemini transcription failed: {str(e)}")
83
+
84
+ async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> str:
85
+ """
86
+ Transcribe audio/video from local file using Gemini 2.5 Pro.
87
+
88
+ Args:
89
+ media_file: Path to local audio/video file
90
+ language: Optional language code for transcription (overrides config)
91
+
92
+ Returns:
93
+ Transcribed text
94
+
95
+ Raises:
96
+ ValueError: If API key not provided
97
+ RuntimeError: If transcription fails
98
+ """
99
+ media_file = str(media_file)
100
+
101
+ if self.config.verbose:
102
+ self.logger.info(f"🎤 Starting Gemini transcription for file: {media_file}")
103
+
104
+ try:
105
+ client = self._get_client()
106
+
107
+ # Upload audio file
108
+ if self.config.verbose:
109
+ self.logger.info("📤 Uploading audio file to Gemini...")
110
+ media_file = client.files.upload(path=media_file)
111
+
112
+ contents = Part.from_uri(file_uri=media_file.uri, mime_type=media_file.mime_type)
113
+ return await self._run_generation(contents, source=media_file, client=client)
114
+
115
+ except ImportError:
116
+ raise RuntimeError("Google GenAI SDK not installed. Please install with: pip install google-genai")
117
+ except Exception as e:
118
+ self.logger.error(f"Gemini transcription failed: {str(e)}")
119
+ raise RuntimeError(f"Gemini transcription failed: {str(e)}")
120
+
121
+ def _get_transcription_prompt(self) -> str:
122
+ """Get (and cache) transcription system prompt from prompts module."""
123
+ if self._system_prompt is not None:
124
+ return self._system_prompt
125
+
126
+ # Load prompt from prompts/gemini/transcription_gem.txt
127
+ prompt_loader = get_prompt_loader()
128
+ base_prompt = prompt_loader.get_gemini_transcription_prompt()
129
+
130
+ # Add language-specific instruction if configured
131
+ if self.config.language:
132
+ base_prompt += f"\n\n* Use {self.config.language} language for transcription."
133
+
134
+ self._system_prompt = base_prompt
135
+ return self._system_prompt
136
+
137
+ def get_gem_info(self) -> dict:
138
+ """Get information about the Gem being used."""
139
+ return {
140
+ "gem_name": "Media Transcription Gem",
141
+ "gem_url": self.GEM_URL,
142
+ "model": self.config.model_name,
143
+ "description": "Specialized Gem for media content transcription",
144
+ }
145
+
146
+ def _build_result(self, transcript: str, output_file: Path) -> dict:
147
+ """Augment the base result with Gemini-specific metadata."""
148
+ base_result = super()._build_result(transcript, output_file)
149
+ base_result.update({"model": self.config.model_name, "language": self.config.language})
150
+ return base_result
151
+
152
+ def _get_client(self) -> genai.Client:
153
+ """Lazily create the Gemini client when first needed."""
154
+ if not self.config.gemini_api_key:
155
+ raise ValueError("Gemini API key is required for transcription")
156
+
157
+ if self._client is None:
158
+ self._client = genai.Client(api_key=self.config.gemini_api_key)
159
+ return self._client
160
+
161
+ def _get_generation_config(self) -> GenerateContentConfig:
162
+ """Lazily build the generation config since it rarely changes."""
163
+ if self._generation_config is None:
164
+ self._generation_config = GenerateContentConfig(
165
+ system_instruction=self._get_transcription_prompt(),
166
+ response_modalities=["TEXT"],
167
+ thinking_config=ThinkingConfig(
168
+ include_thoughts=False,
169
+ thinking_budget=-1,
170
+ # thinking_level="high", # "low", "medium"
171
+ ),
172
+ )
173
+ return self._generation_config
174
+
175
+ async def _run_generation(
176
+ self,
177
+ contents: Part,
178
+ *,
179
+ source: str,
180
+ client: Optional[genai.Client] = None,
181
+ ) -> str:
182
+ """
183
+ Shared helper for sending generation requests and handling the response.
184
+ """
185
+ client = client or self._get_client()
186
+ config = self._get_generation_config()
187
+
188
+ if self.config.verbose:
189
+ self.logger.info(f"🔄 Sending transcription request to {self.config.model_name} ({source})...")
190
+
191
+ response = await asyncio.get_event_loop().run_in_executor(
192
+ None,
193
+ lambda: client.models.generate_content(
194
+ model=self.config.model_name,
195
+ contents=contents,
196
+ config=config,
197
+ ),
198
+ )
199
+
200
+ if not response.text:
201
+ raise RuntimeError("Empty response from Gemini API")
202
+
203
+ transcript = response.text.strip()
204
+
205
+ if self.config.verbose:
206
+ self.logger.info(f"✅ Transcription completed ({source}): {len(transcript)} characters")
207
+
208
+ return transcript
209
+
210
+ def write(
211
+ self, transcript: str, output_file: Path, encoding: str = "utf-8", cache_audio_events: bool = True
212
+ ) -> Path:
213
+ """
214
+ Persist transcript text to disk and return the file path.
215
+ """
216
+ if isinstance(output_file, str):
217
+ output_file = Path(output_file)
218
+ output_file.write_text(transcript, encoding=encoding)
219
+ return output_file
@@ -0,0 +1,103 @@
1
+ """Transcription module with config-driven architecture."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional, Union
5
+
6
+ from lattifai.audio2 import AudioData
7
+ from lattifai.caption import Caption
8
+ from lattifai.config import TranscriptionConfig
9
+ from lattifai.transcription.base import BaseTranscriber
10
+ from lattifai.transcription.prompts import get_prompt_loader # noqa: F401
11
+
12
+
13
+ class LattifAITranscriber(BaseTranscriber):
14
+ """
15
+ LattifAI local transcription with config-driven architecture.
16
+
17
+ Uses TranscriptionConfig for all behavioral settings.
18
+ Note: This transcriber only supports local file transcription, not URLs.
19
+ """
20
+
21
+ # Transcriber metadata
22
+ file_suffix = ".ass"
23
+ supports_url = False
24
+
25
+ def __init__(
26
+ self,
27
+ transcription_config: TranscriptionConfig,
28
+ ):
29
+ """
30
+ Initialize Gemini transcriber.
31
+
32
+ Args:
33
+ transcription_config: Transcription configuration. If None, uses default.
34
+ """
35
+ super().__init__(
36
+ config=transcription_config,
37
+ )
38
+
39
+ self._system_prompt: Optional[str] = None
40
+ self._transcriber = None
41
+
42
+ @property
43
+ def name(self) -> str:
44
+ return f"{self.config.model_name}"
45
+
46
+ async def transcribe_url(self, url: str, language: Optional[str] = None) -> str:
47
+ """
48
+ URL transcription not supported for LattifAI local models.
49
+
50
+ This method exists to satisfy the BaseTranscriber interface but
51
+ will never be called because supports_url = False and the base
52
+ class checks this flag before calling this method.
53
+
54
+ Args:
55
+ url: URL to transcribe (not supported)
56
+ language: Optional language code (not used)
57
+ """
58
+ raise NotImplementedError(
59
+ f"{self.__class__.__name__} does not support URL transcription. "
60
+ f"Please download the file first and use transcribe_file()."
61
+ )
62
+
63
+ async def transcribe_file(self, media_file: Union[str, Path, AudioData], language: Optional[str] = None) -> Caption:
64
+ if self._transcriber is None:
65
+ from lattifai_core.transcription import LattifAITranscriber as CoreLattifAITranscriber
66
+
67
+ self._transcriber = CoreLattifAITranscriber.from_pretrained(model_config=self.config)
68
+
69
+ transcription, audio_events = self._transcriber.transcribe(media_file, language=language, num_workers=2)
70
+ caption = Caption.from_transcription_results(
71
+ transcription=transcription,
72
+ audio_events=audio_events,
73
+ )
74
+
75
+ return caption
76
+
77
+ def write(
78
+ self, transcript: Caption, output_file: Path, encoding: str = "utf-8", cache_audio_events: bool = True
79
+ ) -> Path:
80
+ """
81
+ Persist transcript text to disk and return the file path.
82
+ """
83
+ transcript.write(
84
+ output_file,
85
+ include_speaker_in_text=False,
86
+ )
87
+ if cache_audio_events and transcript.audio_events:
88
+ from tgt import write_to_file
89
+
90
+ events_file = output_file.with_suffix(".AED")
91
+ write_to_file(transcript.audio_events, events_file, format="long")
92
+
93
+ return output_file
94
+
95
+ def _get_transcription_prompt(self) -> str:
96
+ """Get (and cache) transcription system prompt from prompts module."""
97
+ if self._system_prompt is not None:
98
+ return self._system_prompt
99
+
100
+ base_prompt = "" # TODO
101
+
102
+ self._system_prompt = base_prompt
103
+ return self._system_prompt
@@ -26,16 +26,16 @@ class PromptLoader:
26
26
  Raises:
27
27
  FileNotFoundError: If prompt file doesn't exist
28
28
  """
29
- prompt_path = self.prompts_dir / category / f'{name}.txt'
29
+ prompt_path = self.prompts_dir / category / f"{name}.txt"
30
30
 
31
31
  if not prompt_path.exists():
32
- raise FileNotFoundError(f'Prompt not found: {prompt_path}')
32
+ raise FileNotFoundError(f"Prompt not found: {prompt_path}")
33
33
 
34
- return prompt_path.read_text(encoding='utf-8').strip()
34
+ return prompt_path.read_text(encoding="utf-8").strip()
35
35
 
36
36
  def get_gemini_transcription_prompt(self) -> str:
37
37
  """Get the Gemini transcription Gem prompt"""
38
- return self.load_prompt('gemini', 'transcription_gem')
38
+ return self.load_prompt("gemini", "transcription_gem")
39
39
 
40
40
 
41
41
  # Global instance
lattifai/types.py ADDED
@@ -0,0 +1,30 @@
1
+ """Common type definitions for LattifAI."""
2
+
3
+ from pathlib import Path
4
+ from typing import List, TypeAlias, Union
5
+
6
+ from lhotse.utils import Pathlike
7
+
8
+ from .caption import Supervision
9
+
10
+ # Path-like types
11
+ PathLike: TypeAlias = Pathlike # Re-export for convenience (str | Path)
12
+
13
+ # Caption types
14
+ SupervisionList: TypeAlias = List[Supervision]
15
+ """List of caption segments with timing and text information."""
16
+
17
+ # Media format types
18
+ MediaFormat: TypeAlias = str
19
+ """Media format string (e.g., 'mp3', 'wav', 'mp4')."""
20
+
21
+ # URL types
22
+ URL: TypeAlias = str
23
+ """String representing a URL."""
24
+
25
+ __all__ = [
26
+ "PathLike",
27
+ "SupervisionList",
28
+ "MediaFormat",
29
+ "URL",
30
+ ]
lattifai/utils.py CHANGED
@@ -6,14 +6,12 @@ from pathlib import Path
6
6
  from typing import Any, Optional, Type
7
7
 
8
8
  from lattifai.errors import ModelLoadError
9
- from lattifai.tokenizer import LatticeTokenizer
10
- from lattifai.workers import Lattice1AlphaWorker
11
9
 
12
10
 
13
11
  def _get_cache_marker_path(cache_dir: Path) -> Path:
14
12
  """Get the path for the cache marker file with current date."""
15
- today = datetime.now().strftime('%Y%m%d')
16
- return cache_dir / f'.done{today}'
13
+ today = datetime.now().strftime("%Y%m%d")
14
+ return cache_dir / f".done{today}"
17
15
 
18
16
 
19
17
  def _is_cache_valid(cache_dir: Path) -> bool:
@@ -22,7 +20,7 @@ def _is_cache_valid(cache_dir: Path) -> bool:
22
20
  return False
23
21
 
24
22
  # Find any .done* marker files
25
- marker_files = list(cache_dir.glob('.done*'))
23
+ marker_files = list(cache_dir.glob(".done*"))
26
24
  if not marker_files:
27
25
  return False
28
26
 
@@ -31,8 +29,8 @@ def _is_cache_valid(cache_dir: Path) -> bool:
31
29
 
32
30
  # Extract date from marker filename (format: .doneYYYYMMDD)
33
31
  try:
34
- date_str = latest_marker.name.replace('.done', '')
35
- marker_date = datetime.strptime(date_str, '%Y%m%d')
32
+ date_str = latest_marker.name.replace(".done", "")
33
+ marker_date = datetime.strptime(date_str, "%Y%m%d")
36
34
  # Check if marker is older than 1 days
37
35
  if datetime.now() - marker_date > timedelta(days=1):
38
36
  return False
@@ -45,7 +43,7 @@ def _is_cache_valid(cache_dir: Path) -> bool:
45
43
  def _create_cache_marker(cache_dir: Path) -> None:
46
44
  """Create a cache marker file with current date and clean old markers."""
47
45
  # Remove old marker files
48
- for old_marker in cache_dir.glob('.done*'):
46
+ for old_marker in cache_dir.glob(".done*"):
49
47
  old_marker.unlink(missing_ok=True)
50
48
 
51
49
  # Create new marker file
@@ -55,8 +53,8 @@ def _create_cache_marker(cache_dir: Path) -> None:
55
53
 
56
54
  def _resolve_model_path(model_name_or_path: str) -> str:
57
55
  """Resolve model path, downloading from Hugging Face when necessary."""
58
- if Path(model_name_or_path).exists():
59
- return model_name_or_path
56
+ if Path(model_name_or_path).expanduser().exists():
57
+ return str(Path(model_name_or_path).expanduser())
60
58
 
61
59
  from huggingface_hub import snapshot_download
62
60
  from huggingface_hub.constants import HF_HUB_CACHE
@@ -68,7 +66,7 @@ def _resolve_model_path(model_name_or_path: str) -> str:
68
66
  # Check if we have a valid cached version
69
67
  if _is_cache_valid(cache_dir):
70
68
  # Return the snapshot path (latest version)
71
- snapshots_dir = cache_dir / 'snapshots'
69
+ snapshots_dir = cache_dir / "snapshots"
72
70
  if snapshots_dir.exists():
73
71
  snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
74
72
  if snapshot_dirs:
@@ -77,13 +75,13 @@ def _resolve_model_path(model_name_or_path: str) -> str:
77
75
  return str(latest_snapshot)
78
76
 
79
77
  try:
80
- downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
78
+ downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
81
79
  _create_cache_marker(cache_dir)
82
80
  return downloaded_path
83
81
  except LocalEntryNotFoundError:
84
82
  try:
85
- os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
86
- downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
83
+ os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
84
+ downloaded_path = snapshot_download(repo_id=model_name_or_path, repo_type="model")
87
85
  _create_cache_marker(cache_dir)
88
86
  return downloaded_path
89
87
  except Exception as e: # pragma: no cover - bubble up for caller context
@@ -94,40 +92,14 @@ def _resolve_model_path(model_name_or_path: str) -> str:
94
92
 
95
93
  def _select_device(device: Optional[str]) -> str:
96
94
  """Select best available torch device when not explicitly provided."""
97
- if device:
95
+ if device and device != "auto":
98
96
  return device
99
97
 
100
98
  import torch
101
99
 
102
- detected = 'cpu'
100
+ detected = "cpu"
103
101
  if torch.backends.mps.is_available():
104
- detected = 'mps'
102
+ detected = "mps"
105
103
  elif torch.cuda.is_available():
106
- detected = 'cuda'
104
+ detected = "cuda"
107
105
  return detected
108
-
109
-
110
- def _load_tokenizer(
111
- client_wrapper: Any,
112
- model_path: str,
113
- device: str,
114
- *,
115
- tokenizer_cls: Type[LatticeTokenizer] = LatticeTokenizer,
116
- ) -> LatticeTokenizer:
117
- """Instantiate tokenizer with consistent error handling."""
118
- try:
119
- return tokenizer_cls.from_pretrained(
120
- client_wrapper=client_wrapper,
121
- model_path=model_path,
122
- device=device,
123
- )
124
- except Exception as e:
125
- raise ModelLoadError(f'tokenizer from {model_path}', original_error=e)
126
-
127
-
128
- def _load_worker(model_path: str, device: str) -> Lattice1AlphaWorker:
129
- """Instantiate lattice worker with consistent error handling."""
130
- try:
131
- return Lattice1AlphaWorker(model_path, device=device, num_threads=8)
132
- except Exception as e:
133
- raise ModelLoadError(f'worker from {model_path}', original_error=e)
@@ -0,0 +1,22 @@
1
+ """
2
+ LattifAI Agentic Workflows
3
+
4
+ This module provides agentic workflow capabilities for automated processing
5
+ of multimedia content through intelligent agent-based pipelines.
6
+ """
7
+
8
+ # Import transcript processing functionality
9
+
10
+
11
+ from .base import WorkflowAgent, WorkflowResult, WorkflowStep
12
+ from .file_manager import TRANSCRIBE_CHOICE, FileExistenceManager
13
+ from .youtube import YouTubeDownloader
14
+
15
+ __all__ = [
16
+ "WorkflowAgent",
17
+ "WorkflowStep",
18
+ "WorkflowResult",
19
+ "FileExistenceManager",
20
+ "YouTubeDownloader",
21
+ "TRANSCRIBE_CHOICE",
22
+ ]
@@ -0,0 +1,6 @@
1
+ """
2
+ Caption Agents
3
+
4
+ """
5
+
6
+ __all__ = []
@@ -7,20 +7,20 @@ import logging
7
7
  import time
8
8
  from dataclasses import dataclass
9
9
  from enum import Enum
10
- from typing import Any, Dict, List, Optional, Union
10
+ from typing import Any, Dict, List, Optional
11
11
 
12
12
  import colorful
13
13
 
14
14
 
15
15
  def setup_workflow_logger(name: str) -> logging.Logger:
16
16
  """Setup a logger with consistent formatting for workflow modules"""
17
- logger = logging.getLogger(f'workflows.{name}')
17
+ logger = logging.getLogger(f"workflows.{name}")
18
18
 
19
19
  # Only add handler if it doesn't exist
20
20
  if not logger.handlers:
21
21
  handler = logging.StreamHandler()
22
22
  formatter = logging.Formatter(
23
- '%(asctime)s - %(name)+17s.py:%(lineno)-4d - %(levelname)-8s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'
23
+ "%(asctime)s - %(name)+17s.py:%(lineno)-4d - %(levelname)-8s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
24
24
  )
25
25
  handler.setFormatter(formatter)
26
26
  logger.addHandler(handler)
@@ -30,17 +30,17 @@ def setup_workflow_logger(name: str) -> logging.Logger:
30
30
  return logger
31
31
 
32
32
 
33
- logger = setup_workflow_logger('base')
33
+ logger = setup_workflow_logger("base")
34
34
 
35
35
 
36
36
  class WorkflowStatus(Enum):
37
37
  """Workflow execution status"""
38
38
 
39
- PENDING = 'pending'
40
- RUNNING = 'running'
41
- COMPLETED = 'completed'
42
- FAILED = 'failed'
43
- RETRYING = 'retrying'
39
+ PENDING = "pending"
40
+ RUNNING = "running"
41
+ COMPLETED = "completed"
42
+ FAILED = "failed"
43
+ RETRYING = "retrying"
44
44
 
45
45
 
46
46
  @dataclass
@@ -84,7 +84,7 @@ class WorkflowAgent(abc.ABC):
84
84
  self.name = name
85
85
  self.max_retries = max_retries
86
86
  self.steps: List[WorkflowStep] = []
87
- self.logger = setup_workflow_logger('agent')
87
+ self.logger = setup_workflow_logger("agent")
88
88
 
89
89
  @abc.abstractmethod
90
90
  def define_steps(self) -> List[WorkflowStep]:
@@ -111,11 +111,11 @@ class WorkflowAgent(abc.ABC):
111
111
  context = kwargs.copy()
112
112
  step_results = []
113
113
 
114
- self.logger.info(colorful.bold_white_on_green(f'🚀 Starting workflow: {self.name}'))
114
+ self.logger.info(colorful.bold_white_on_green(f"🚀 Starting workflow: {self.name}"))
115
115
 
116
116
  try:
117
117
  for i, step in enumerate(self.steps):
118
- step_info = f'📋 Step {i + 1}/{len(self.steps)}: {step.name}'
118
+ step_info = f"📋 Step {i + 1}/{len(self.steps)}: {step.name}"
119
119
  self.logger.info(colorful.bold_white_on_green(step_info))
120
120
 
121
121
  step_start = time.time()
@@ -123,17 +123,17 @@ class WorkflowAgent(abc.ABC):
123
123
  step_duration = time.time() - step_start
124
124
 
125
125
  step_results.append(
126
- {'step_name': step.name, 'status': 'completed', 'duration': step_duration, 'result': step_result}
126
+ {"step_name": step.name, "status": "completed", "duration": step_duration, "result": step_result}
127
127
  )
128
128
 
129
129
  # Update context with step result
130
- context[f'step_{i}_result'] = step_result
130
+ context[f"step_{i}_result"] = step_result
131
131
  context[f'{step.name.lower().replace(" ", "_")}_result'] = step_result
132
132
 
133
- self.logger.info(f'✅ Step {i + 1} completed in {step_duration:.2f}s')
133
+ self.logger.info(f"✅ Step {i + 1} completed in {step_duration:.2f}s")
134
134
 
135
135
  execution_time = time.time() - start_time
136
- self.logger.info(f'🎉 Workflow completed in {execution_time:.2f}s')
136
+ self.logger.info(f"🎉 Workflow completed in {execution_time:.2f}s")
137
137
 
138
138
  return WorkflowResult(
139
139
  status=WorkflowStatus.COMPLETED, data=context, execution_time=execution_time, step_results=step_results
@@ -145,9 +145,9 @@ class WorkflowAgent(abc.ABC):
145
145
  from lattifai.errors import LattifAIError
146
146
 
147
147
  if isinstance(e, LattifAIError):
148
- self.logger.error(f'❌ Workflow failed after {execution_time:.2f}s: [{e.error_code}] {e.message}')
148
+ self.logger.error(f"❌ Workflow failed after {execution_time:.2f}s: [{e.error_code}] {e.message}")
149
149
  else:
150
- self.logger.error(f'❌ Workflow failed after {execution_time:.2f}s: {str(e)}')
150
+ self.logger.error(f"❌ Workflow failed after {execution_time:.2f}s: {str(e)}")
151
151
 
152
152
  return WorkflowResult(
153
153
  status=WorkflowStatus.FAILED,
@@ -164,7 +164,7 @@ class WorkflowAgent(abc.ABC):
164
164
  for attempt in range(step.max_retries + 1):
165
165
  try:
166
166
  if attempt > 0:
167
- self.logger.info(f'🔄 Retrying step {step.name} (attempt {attempt + 1}/{step.max_retries + 1})')
167
+ self.logger.info(f"🔄 Retrying step {step.name} (attempt {attempt + 1}/{step.max_retries + 1})")
168
168
 
169
169
  result = await self.execute_step(step, context)
170
170
  return result
@@ -176,14 +176,14 @@ class WorkflowAgent(abc.ABC):
176
176
  # For LattifAI errors, show simplified message in logs
177
177
  from lattifai.errors import LattifAIError
178
178
 
179
- error_summary = f'[{e.error_code}]' if isinstance(e, LattifAIError) else str(e)[:100]
179
+ error_summary = f"[{e.error_code}]" if isinstance(e, LattifAIError) else str(e)[:100]
180
180
 
181
181
  if step.should_retry():
182
- self.logger.warning(f'⚠️ Step {step.name} failed: {error_summary}. Retrying...')
182
+ self.logger.warning(f"⚠️ Step {step.name} failed: {error_summary}. Retrying...")
183
183
  continue
184
184
  else:
185
185
  self.logger.error(
186
- f'❌ Step {step.name} failed after {step.max_retries + 1} attempts: {error_summary}'
186
+ f"❌ Step {step.name} failed after {step.max_retries + 1} attempts: {error_summary}"
187
187
  )
188
188
  raise e
189
189