lattifai 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lattifai/bin/agent.py CHANGED
@@ -210,7 +210,7 @@ async def _run_youtube_workflow(
210
210
  click.echo()
211
211
 
212
212
  # Import workflow components
213
- from lattifai import AsyncLattifAI
213
+ from lattifai.client import AsyncLattifAI
214
214
  from lattifai.workflows import YouTubeSubtitleAgent
215
215
  from lattifai.workflows.gemini import GeminiTranscriber
216
216
  from lattifai.workflows.youtube import YouTubeDownloader
@@ -309,12 +309,13 @@ if __name__ == '__main__':
309
309
 
310
310
  asyncio.run(
311
311
  _run_youtube_workflow(
312
+ # url='https://www.youtube.com/watch?v=7nv1snJRCEI',
312
313
  url='https://www.youtube.com/watch?v=DQacCB9tDaw',
313
314
  lattifai_api_key=os.getenv('LATTIFAI_API_KEY'),
314
315
  gemini_api_key=os.getenv('GEMINI_API_KEY', ''),
315
- device='cpu',
316
+ device='mps',
316
317
  model_name_or_path='Lattifai/Lattice-1-Alpha',
317
- media_format='mp4',
318
+ media_format='mp3',
318
319
  output_format='TextGrid',
319
320
  output_dir='~/Downloads/lattifai_youtube',
320
321
  max_retries=0,
lattifai/io/reader.py CHANGED
@@ -5,6 +5,7 @@ from typing import List, Literal, Optional, Union
5
5
  from lhotse.utils import Pathlike
6
6
 
7
7
  from .supervision import Supervision
8
+ from .text_parser import parse_speaker_text
8
9
 
9
10
  SubtitleFormat = Literal['txt', 'srt', 'vtt', 'ass', 'auto']
10
11
 
@@ -72,9 +73,11 @@ class SubtitleReader(ABCMeta):
72
73
 
73
74
  supervisions = []
74
75
  for event in subs.events:
76
+ speaker, text = parse_speaker_text(event.text)
75
77
  supervisions.append(
76
78
  Supervision(
77
- text=event.text,
79
+ text=text,
80
+ speaker=speaker,
78
81
  start=event.start / 1000.0 if event.start is not None else None,
79
82
  duration=(event.end - event.start) / 1000.0 if event.end is not None else None,
80
83
  )
@@ -0,0 +1,75 @@
1
+ import logging
2
+ import re
3
+ from typing import Optional, Tuple
4
+
5
+ # 来自于字幕中常见的说话人标记格式
6
+ SPEAKER_PATTERN = re.compile(r'((?:>>|>>|>|>).*?[::])\s*(.*)')
7
+
8
+ # Transcriber Output Example:
9
+ # 26:19.919 --> 26:34.921
10
+ # [SPEAKER_01]: 越来越多的科技巨头入...
11
+ SPEAKER_LATTIFAI = re.compile(r'(^\[SPEAKER_.*?\]:)\s*(.*)')
12
+
13
+ # NISHTHA BHATIA: Hey, everyone.
14
+ # DIETER: Oh, hey, Nishtha.
15
+ # GEMINI: That might
16
+ SPEAKER_PATTERN2 = re.compile(r'^([A-Z]{1,15}(?:\s+[A-Z]{1,15})?)[::]\s*(.*)$')
17
+
18
+
19
+ def parse_speaker_text(line) -> Tuple[Optional[str], str]:
20
+ line = line.replace('\\N', ' ')
21
+
22
+ if ':' not in line and ':' not in line:
23
+ return None, line
24
+
25
+ # 匹配以 >> 开头的行,并去除开头的名字和冒号
26
+ match = SPEAKER_PATTERN.match(line)
27
+ if match:
28
+ return match.group(1).strip(), match.group(2).strip()
29
+
30
+ match = SPEAKER_LATTIFAI.match(line)
31
+ if match:
32
+ assert len(match.groups()) == 2, match.groups()
33
+ if not match.group(1):
34
+ logging.error(f'ParseSub LINE [{line}]')
35
+ else:
36
+ return match.group(1).strip(), match.group(2).strip()
37
+
38
+ match = SPEAKER_PATTERN2.match(line)
39
+ if match:
40
+ assert len(match.groups()) == 2, match.groups()
41
+ return match.group(1).strip(), match.group(2).strip()
42
+
43
+ return None, line
44
+
45
+
46
+ if __name__ == '__main__':
47
+ pattern = re.compile(r'>>\s*(.*?)\s*[::]\s*(.*)')
48
+ pattern = re.compile(r'(>>.*?[::])\s*(.*)')
49
+
50
+ test_strings = [
51
+ '>>Key: Value',
52
+ '>> Key with space : Value with space ',
53
+ '>> 全角键 : 全角值',
54
+ '>>Key:Value xxx. >>Key:Value',
55
+ ]
56
+
57
+ for text in test_strings:
58
+ match = pattern.match(text)
59
+ if match:
60
+ print(f"Input: '{text}'")
61
+ print(f" Key: '{match.group(1)}'")
62
+ print(f" Value: '{match.group(2)}'")
63
+ print('-------------')
64
+
65
+ # pattern2
66
+ test_strings2 = ['NISHTHA BHATIA: Hey, everyone.', 'DIETER: Oh, hey, Nishtha.', 'GEMINI: That might']
67
+ for text in test_strings2:
68
+ match = SPEAKER_PATTERN2.match(text)
69
+ if match:
70
+ print(f" Input: '{text}'")
71
+ print(f"Speaker: '{match.group(1)}'")
72
+ print(f"Content: '{match.group(2)}'")
73
+ print('-------------')
74
+ else:
75
+ raise ValueError(f"No match for: '{text}'")
@@ -192,12 +192,15 @@ class LatticeTokenizer:
192
192
  for s, supervision in enumerate(supervisions):
193
193
  text_len += len(supervision.text)
194
194
  if supervision.speaker:
195
- speakers.append(supervision.speaker)
196
195
  if sidx < s:
196
+ if len(speakers) < len(texts) + 1:
197
+ speakers.append(None)
197
198
  text = ' '.join([sup.text for sup in supervisions[sidx:s]])
198
199
  texts.append(text)
199
200
  sidx = s
200
201
  text_len = len(supervision.text)
202
+ speakers.append(supervision.speaker)
203
+
201
204
  else:
202
205
  if text_len >= 2000 or s == len(supervisions) - 1:
203
206
  if len(speakers) < len(texts) + 1:
@@ -238,6 +241,12 @@ class LatticeTokenizer:
238
241
  processed_sentences.extend(resplit_parts)
239
242
  _sentences = processed_sentences
240
243
 
244
+ if not _sentences:
245
+ if remainder:
246
+ _sentences, remainder = [remainder.strip()], ''
247
+ else:
248
+ continue
249
+
241
250
  if any(_sentences[-1].endswith(ep) for ep in END_PUNCTUATION):
242
251
  supervisions.extend(
243
252
  Supervision(text=text, speaker=(_speaker if s == 0 else None)) for s, text in enumerate(_sentences)
@@ -0,0 +1,34 @@
1
+ """
2
+ LattifAI Agentic Workflows
3
+
4
+ This module provides agentic workflow capabilities for automated processing
5
+ of multimedia content through intelligent agent-based pipelines.
6
+ """
7
+
8
+ # Import transcript processing functionality
9
+ from lattifai.io import (
10
+ ALL_SUBTITLE_FORMATS,
11
+ INPUT_SUBTITLE_FORMATS,
12
+ OUTPUT_SUBTITLE_FORMATS,
13
+ SUBTITLE_FORMATS,
14
+ GeminiReader,
15
+ GeminiWriter,
16
+ )
17
+
18
+ from .agents import YouTubeSubtitleAgent
19
+ from .base import WorkflowAgent, WorkflowResult, WorkflowStep
20
+ from .file_manager import FileExistenceManager
21
+
22
+ __all__ = [
23
+ 'WorkflowAgent',
24
+ 'WorkflowStep',
25
+ 'WorkflowResult',
26
+ 'YouTubeSubtitleAgent',
27
+ 'FileExistenceManager',
28
+ 'GeminiReader',
29
+ 'GeminiWriter',
30
+ 'SUBTITLE_FORMATS',
31
+ 'INPUT_SUBTITLE_FORMATS',
32
+ 'OUTPUT_SUBTITLE_FORMATS',
33
+ 'ALL_SUBTITLE_FORMATS',
34
+ ]
@@ -0,0 +1,10 @@
1
+ """
2
+ Subtitle Agents
3
+
4
+ An agentic workflow for processing YouTube(or more) videos through:
5
+ 1. URL processing and audio download
6
+ 2. Gemini 2.5 Pro transcription
7
+ 3. LattifAI alignment
8
+ """
9
+
10
+ from .youtube import YouTubeSubtitleAgent
@@ -0,0 +1,192 @@
1
+ """
2
+ Base classes for agentic workflows
3
+ """
4
+
5
+ import abc
6
+ import logging
7
+ import time
8
+ from dataclasses import dataclass
9
+ from enum import Enum
10
+ from typing import Any, Dict, List, Optional, Union
11
+
12
+ import colorful
13
+
14
+
15
+ def setup_workflow_logger(name: str) -> logging.Logger:
16
+ """Setup a logger with consistent formatting for workflow modules"""
17
+ logger = logging.getLogger(f'workflows.{name}')
18
+
19
+ # Only add handler if it doesn't exist
20
+ if not logger.handlers:
21
+ handler = logging.StreamHandler()
22
+ formatter = logging.Formatter(
23
+ '%(asctime)s - %(name)+17s.py:%(lineno)-4d - %(levelname)-8s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'
24
+ )
25
+ handler.setFormatter(formatter)
26
+ logger.addHandler(handler)
27
+ logger.setLevel(logging.INFO)
28
+ logger.propagate = False
29
+
30
+ return logger
31
+
32
+
33
+ logger = setup_workflow_logger('base')
34
+
35
+
36
+ class WorkflowStatus(Enum):
37
+ """Workflow execution status"""
38
+
39
+ PENDING = 'pending'
40
+ RUNNING = 'running'
41
+ COMPLETED = 'completed'
42
+ FAILED = 'failed'
43
+ RETRYING = 'retrying'
44
+
45
+
46
+ @dataclass
47
+ class WorkflowResult:
48
+ """Result of a workflow execution"""
49
+
50
+ status: WorkflowStatus
51
+ data: Optional[Any] = None
52
+ error: Optional[str] = None
53
+ exception: Optional[Exception] = None # Store the original exception object
54
+ execution_time: Optional[float] = None
55
+ step_results: Optional[List[Dict[str, Any]]] = None
56
+
57
+ @property
58
+ def is_success(self) -> bool:
59
+ return self.status == WorkflowStatus.COMPLETED
60
+
61
+ @property
62
+ def is_error(self) -> bool:
63
+ return self.status == WorkflowStatus.FAILED
64
+
65
+
66
+ @dataclass
67
+ class WorkflowStep:
68
+ """Individual step in a workflow"""
69
+
70
+ name: str
71
+ description: str
72
+ required: bool = True
73
+ retry_count: int = 0
74
+ max_retries: int = 1
75
+
76
+ def should_retry(self) -> bool:
77
+ return self.retry_count < self.max_retries
78
+
79
+
80
+ class WorkflowAgent(abc.ABC):
81
+ """Base class for agentic workflows"""
82
+
83
+ def __init__(self, name: str, max_retries: int = 0):
84
+ self.name = name
85
+ self.max_retries = max_retries
86
+ self.steps: List[WorkflowStep] = []
87
+ self.logger = setup_workflow_logger('agent')
88
+
89
+ @abc.abstractmethod
90
+ def define_steps(self) -> List[WorkflowStep]:
91
+ """Define the workflow steps"""
92
+ pass
93
+
94
+ @abc.abstractmethod
95
+ async def execute_step(self, step: WorkflowStep, context: Dict[str, Any]) -> Any:
96
+ """Execute a single workflow step"""
97
+ pass
98
+
99
+ def setup(self):
100
+ """Setup the workflow"""
101
+ self.steps = self.define_steps()
102
+ for step in self.steps:
103
+ step.max_retries = self.max_retries
104
+
105
+ async def execute(self, **kwargs) -> WorkflowResult:
106
+ """Execute the complete workflow"""
107
+ if not self.steps:
108
+ self.setup()
109
+
110
+ start_time = time.time()
111
+ context = kwargs.copy()
112
+ step_results = []
113
+
114
+ self.logger.info(colorful.bold_white_on_green(f'🚀 Starting workflow: {self.name}'))
115
+
116
+ try:
117
+ for i, step in enumerate(self.steps):
118
+ step_info = f'📋 Step {i + 1}/{len(self.steps)}: {step.name}'
119
+ self.logger.info(colorful.bold_white_on_green(step_info))
120
+
121
+ step_start = time.time()
122
+ step_result = await self._execute_step_with_retry(step, context)
123
+ step_duration = time.time() - step_start
124
+
125
+ step_results.append(
126
+ {'step_name': step.name, 'status': 'completed', 'duration': step_duration, 'result': step_result}
127
+ )
128
+
129
+ # Update context with step result
130
+ context[f'step_{i}_result'] = step_result
131
+ context[f'{step.name.lower().replace(" ", "_")}_result'] = step_result
132
+
133
+ self.logger.info(f'✅ Step {i + 1} completed in {step_duration:.2f}s')
134
+
135
+ execution_time = time.time() - start_time
136
+ self.logger.info(f'🎉 Workflow completed in {execution_time:.2f}s')
137
+
138
+ return WorkflowResult(
139
+ status=WorkflowStatus.COMPLETED, data=context, execution_time=execution_time, step_results=step_results
140
+ )
141
+
142
+ except Exception as e:
143
+ execution_time = time.time() - start_time
144
+ # For LattifAI errors, just log the error code and basic message
145
+ from lattifai.errors import LattifAIError
146
+
147
+ if isinstance(e, LattifAIError):
148
+ self.logger.error(f'❌ Workflow failed after {execution_time:.2f}s: [{e.error_code}] {e.message}')
149
+ else:
150
+ self.logger.error(f'❌ Workflow failed after {execution_time:.2f}s: {str(e)}')
151
+
152
+ return WorkflowResult(
153
+ status=WorkflowStatus.FAILED,
154
+ error=str(e),
155
+ exception=e, # Store the original exception
156
+ execution_time=execution_time,
157
+ step_results=step_results,
158
+ )
159
+
160
+ async def _execute_step_with_retry(self, step: WorkflowStep, context: Dict[str, Any]) -> Any:
161
+ """Execute a step with retry logic"""
162
+ last_error = None
163
+
164
+ for attempt in range(step.max_retries + 1):
165
+ try:
166
+ if attempt > 0:
167
+ self.logger.info(f'🔄 Retrying step {step.name} (attempt {attempt + 1}/{step.max_retries + 1})')
168
+
169
+ result = await self.execute_step(step, context)
170
+ return result
171
+
172
+ except Exception as e:
173
+ last_error = e
174
+ step.retry_count += 1
175
+
176
+ # For LattifAI errors, show simplified message in logs
177
+ from lattifai.errors import LattifAIError
178
+
179
+ error_summary = f'[{e.error_code}]' if isinstance(e, LattifAIError) else str(e)[:100]
180
+
181
+ if step.should_retry():
182
+ self.logger.warning(f'⚠️ Step {step.name} failed: {error_summary}. Retrying...')
183
+ continue
184
+ else:
185
+ self.logger.error(
186
+ f'❌ Step {step.name} failed after {step.max_retries + 1} attempts: {error_summary}'
187
+ )
188
+ raise e
189
+
190
+ # This should never be reached, but just in case
191
+ if last_error:
192
+ raise last_error