lattifai 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lattifai/bin/agent.py CHANGED
@@ -210,7 +210,7 @@ async def _run_youtube_workflow(
210
210
  click.echo()
211
211
 
212
212
  # Import workflow components
213
- from lattifai import AsyncLattifAI
213
+ from lattifai.client import AsyncLattifAI
214
214
  from lattifai.workflows import YouTubeSubtitleAgent
215
215
  from lattifai.workflows.gemini import GeminiTranscriber
216
216
  from lattifai.workflows.youtube import YouTubeDownloader
@@ -309,12 +309,13 @@ if __name__ == '__main__':
309
309
 
310
310
  asyncio.run(
311
311
  _run_youtube_workflow(
312
+ # url='https://www.youtube.com/watch?v=7nv1snJRCEI',
312
313
  url='https://www.youtube.com/watch?v=DQacCB9tDaw',
313
314
  lattifai_api_key=os.getenv('LATTIFAI_API_KEY'),
314
315
  gemini_api_key=os.getenv('GEMINI_API_KEY', ''),
315
- device='cpu',
316
+ device='mps',
316
317
  model_name_or_path='Lattifai/Lattice-1-Alpha',
317
- media_format='mp4',
318
+ media_format='mp3',
318
319
  output_format='TextGrid',
319
320
  output_dir='~/Downloads/lattifai_youtube',
320
321
  max_retries=0,
lattifai/io/parser.py ADDED
@@ -0,0 +1,75 @@
1
+ import logging
2
+ import re
3
+ from typing import Optional, Tuple
4
+
5
+ # 来自于字幕中常见的说话人标记格式
6
+ SPEAKER_PATTERN = re.compile(r'((?:>>|>>|>|>).*?[::])\s*(.*)')
7
+
8
+ # Transcriber Output Example:
9
+ # 26:19.919 --> 26:34.921
10
+ # [SPEAKER_01]: 越来越多的科技巨头入...
11
+ SPEAKER_LATTIFAI = re.compile(r'(^\[SPEAKER_.*?\]:)\s*(.*)')
12
+
13
+ # NISHTHA BHATIA: Hey, everyone.
14
+ # DIETER: Oh, hey, Nishtha.
15
+ # GEMINI: That might
16
+ SPEAKER_PATTERN2 = re.compile(r'^([A-Z]{1,15}(?:\s+[A-Z]{1,15})?)[::]\s*(.*)$')
17
+
18
+
19
+ def parse_speaker_text(line) -> Tuple[Optional[str], str]:
20
+ line = line.replace('\\N', ' ')
21
+
22
+ if ':' not in line and ':' not in line:
23
+ return None, line
24
+
25
+ # 匹配以 >> 开头的行,并去除开头的名字和冒号
26
+ match = SPEAKER_PATTERN.match(line)
27
+ if match:
28
+ return match.group(1).strip(), match.group(2).strip()
29
+
30
+ match = SPEAKER_LATTIFAI.match(line)
31
+ if match:
32
+ assert len(match.groups()) == 2, match.groups()
33
+ if not match.group(1):
34
+ logging.error(f'ParseSub LINE [{line}]')
35
+ else:
36
+ return match.group(1).strip(), match.group(2).strip()
37
+
38
+ match = SPEAKER_PATTERN2.match(line)
39
+ if match:
40
+ assert len(match.groups()) == 2, match.groups()
41
+ return match.group(1).strip(), match.group(2).strip()
42
+
43
+ return None, line
44
+
45
+
46
+ if __name__ == '__main__':
47
+ pattern = re.compile(r'>>\s*(.*?)\s*[::]\s*(.*)')
48
+ pattern = re.compile(r'(>>.*?[::])\s*(.*)')
49
+
50
+ test_strings = [
51
+ '>>Key: Value',
52
+ '>> Key with space : Value with space ',
53
+ '>> 全角键 : 全角值',
54
+ '>>Key:Value xxx. >>Key:Value',
55
+ ]
56
+
57
+ for text in test_strings:
58
+ match = pattern.match(text)
59
+ if match:
60
+ print(f"Input: '{text}'")
61
+ print(f" Key: '{match.group(1)}'")
62
+ print(f" Value: '{match.group(2)}'")
63
+ print('-------------')
64
+
65
+ # pattern2
66
+ test_strings2 = ['NISHTHA BHATIA: Hey, everyone.', 'DIETER: Oh, hey, Nishtha.', 'GEMINI: That might']
67
+ for text in test_strings2:
68
+ match = SPEAKER_PATTERN2.match(text)
69
+ if match:
70
+ print(f" Input: '{text}'")
71
+ print(f"Speaker: '{match.group(1)}'")
72
+ print(f"Content: '{match.group(2)}'")
73
+ print('-------------')
74
+ else:
75
+ raise ValueError(f"No match for: '{text}'")
lattifai/io/reader.py CHANGED
@@ -4,6 +4,7 @@ from typing import List, Literal, Optional, Union
4
4
 
5
5
  from lhotse.utils import Pathlike
6
6
 
7
+ from .parser import parse_speaker_text
7
8
  from .supervision import Supervision
8
9
 
9
10
  SubtitleFormat = Literal['txt', 'srt', 'vtt', 'ass', 'auto']
@@ -72,9 +73,11 @@ class SubtitleReader(ABCMeta):
72
73
 
73
74
  supervisions = []
74
75
  for event in subs.events:
76
+ speaker, text = parse_speaker_text(event.text)
75
77
  supervisions.append(
76
78
  Supervision(
77
- text=event.text,
79
+ text=text,
80
+ speaker=speaker,
78
81
  start=event.start / 1000.0 if event.start is not None else None,
79
82
  duration=(event.end - event.start) / 1000.0 if event.end is not None else None,
80
83
  )
@@ -192,12 +192,15 @@ class LatticeTokenizer:
192
192
  for s, supervision in enumerate(supervisions):
193
193
  text_len += len(supervision.text)
194
194
  if supervision.speaker:
195
- speakers.append(supervision.speaker)
196
195
  if sidx < s:
196
+ if len(speakers) < len(texts) + 1:
197
+ speakers.append(None)
197
198
  text = ' '.join([sup.text for sup in supervisions[sidx:s]])
198
199
  texts.append(text)
199
200
  sidx = s
200
201
  text_len = len(supervision.text)
202
+ speakers.append(supervision.speaker)
203
+
201
204
  else:
202
205
  if text_len >= 2000 or s == len(supervisions) - 1:
203
206
  if len(speakers) < len(texts) + 1:
@@ -228,7 +231,7 @@ class LatticeTokenizer:
228
231
  remainder = ''
229
232
  # Detect and split special sentence types: e.g., '[APPLAUSE] &gt;&gt; MIRA MURATI:' -> ['[APPLAUSE]', '&gt;&gt; MIRA MURATI:'] # noqa: E501
230
233
  resplit_parts = self._resplit_special_sentence_types(_sentence)
231
- if any(resplit_parts[-1].endswith(sp) for sp in [':', ':']):
234
+ if any(resplit_parts[-1].endswith(sp) for sp in [':', ':', ']']):
232
235
  if s < len(_sentences) - 1:
233
236
  _sentences[s + 1] = resplit_parts[-1] + ' ' + _sentences[s + 1]
234
237
  else: # last part
@@ -238,6 +241,12 @@ class LatticeTokenizer:
238
241
  processed_sentences.extend(resplit_parts)
239
242
  _sentences = processed_sentences
240
243
 
244
+ if not _sentences:
245
+ if remainder:
246
+ _sentences, remainder = [remainder.strip()], ''
247
+ else:
248
+ continue
249
+
241
250
  if any(_sentences[-1].endswith(ep) for ep in END_PUNCTUATION):
242
251
  supervisions.extend(
243
252
  Supervision(text=text, speaker=(_speaker if s == 0 else None)) for s, text in enumerate(_sentences)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
5
5
  Author-email: Lattifai Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>
@@ -40,7 +40,6 @@ Classifier: Programming Language :: Python :: 3.10
40
40
  Classifier: Programming Language :: Python :: 3.11
41
41
  Classifier: Programming Language :: Python :: 3.12
42
42
  Classifier: Programming Language :: Python :: 3.13
43
- Classifier: Programming Language :: Python :: 3.14
44
43
  Classifier: Operating System :: MacOS :: MacOS X
45
44
  Classifier: Operating System :: POSIX :: Linux
46
45
  Classifier: Operating System :: Microsoft :: Windows
@@ -135,16 +134,15 @@ The library provides two equivalent commands: `lai` (recommended for convenience
135
134
  ```bash
136
135
  # Align audio with subtitle (using lai command)
137
136
  lai align audio.wav subtitle.srt output.srt
138
-
139
137
  # Or use the full command
140
138
  lattifai align audio.wav subtitle.srt output.srt
141
139
 
142
- # Process YouTube videos with intelligent workflow
143
- lai agent --youtube https://www.youtube.com/watch?v=VIDEO_ID
144
-
145
140
  # Download and align YouTube content directly
146
141
  lai youtube https://www.youtube.com/watch?v=VIDEO_ID
147
142
 
143
+ # Process YouTube videos with intelligent workflow (equivalent to lai youtube)
144
+ lai agent --youtube https://www.youtube.com/watch?v=VIDEO_ID
145
+
148
146
  # Convert subtitle format
149
147
  lai subtitle convert input.srt output.vtt
150
148
  ```
@@ -261,7 +259,8 @@ Options:
261
259
  ```
262
260
 
263
261
  **When to use `lai agent` vs `lai youtube`**:
264
- - **Use `lai agent`**: For production workflows, batch processing, advanced error handling, and when you need retry logic
262
+ - Both `lai agent --youtube URL` and `lai youtube URL` provide the same core functionality for downloading and aligning YouTube content
263
+ - **Use `lai agent --youtube`**: For production workflows, batch processing, advanced error handling, and when you need retry logic
265
264
  - **Use `lai youtube`**: For quick one-off downloads and alignment with minimal overhead
266
265
 
267
266
  #### Understanding --split_sentence
@@ -4,25 +4,26 @@ lattifai/client.py,sha256=WewbRnfI8b_rQ8V2JG3y2ojkiyEWylXI7No_4NpbzEI,13282
4
4
  lattifai/errors.py,sha256=5i_acoly1g-TLAID8QnhzQshwOXfgLL55mHsdwzlNGA,10814
5
5
  lattifai/utils.py,sha256=CzVwNc08u8lm7XavCMJskXHfni0xsZ-EgpcMkRywVm8,4736
6
6
  lattifai/bin/__init__.py,sha256=QWbmVUbzqwPQNeOV_g-bOsz53w37v-tbZ3uFrSj-7Mg,90
7
- lattifai/bin/agent.py,sha256=7e_Q1ND62u-_voKpLkO77WHmmInJYDPPL0hQuKq8LCY,9863
7
+ lattifai/bin/agent.py,sha256=daXsg0PsW4HS53PYw-r6fSSDsoe9XHOIVEzz-djjKto,9935
8
8
  lattifai/bin/align.py,sha256=Tpu2OWBXBYUqzBt1Qu6HmkVZ8Bpx6mc0O9KItDimoCc,8377
9
9
  lattifai/bin/cli_base.py,sha256=i4KQDyUPJFKae_wjuQeB4N2Vhg9vZ6yc6L9YUJLC18I,507
10
10
  lattifai/bin/subtitle.py,sha256=UZMPh71O2X1UwbfZ9VWlhzxkz78viz8KWwoVsDpewK0,6577
11
11
  lattifai/io/__init__.py,sha256=mGnQkGm6BClRcogRK3J1nJJfPOk5z4o0sZI2nnyJVOU,1087
12
12
  lattifai/io/gemini_reader.py,sha256=WDZA93MSrUAsa5j-ZDXLdPXzEIoREymEy-rMAED_6f4,15152
13
13
  lattifai/io/gemini_writer.py,sha256=rlXO9zx6kQhqTi9K9izE69-8S-2GPOIiJHPwZyebpiM,6515
14
- lattifai/io/reader.py,sha256=_tVQPQ6KBal3Zd7kq1IZ7rpidE0tDmfpk64iU7cGPrg,3199
14
+ lattifai/io/parser.py,sha256=LQHgcEYXaSdhwUo9rP6P_31Z6RMv_BTP1YSKzXji4bk,2386
15
+ lattifai/io/reader.py,sha256=wm4O4ry2ZtBbGtO1FQTrTa1r2_83gUfZpzdh61qvuu0,3328
15
16
  lattifai/io/supervision.py,sha256=iBDRiDJ0hddo__SoEZau2cdEIBFnXZNLgSWFjtJd-lM,871
16
17
  lattifai/io/utils.py,sha256=4drRwcM1n7AYhdJcF51EZxMTy_Ut_1GKtdWpRhPuVmg,686
17
18
  lattifai/io/writer.py,sha256=8n9ZBuXuVOCFwzr1hqrnXpZ-fARTsepebwjKgRuueWE,3872
18
19
  lattifai/tokenizer/__init__.py,sha256=y-FyfO7tLga9b46pkCC6jdSBKOFZS-jFfHcqUieGEyU,120
19
20
  lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
20
- lattifai/tokenizer/tokenizer.py,sha256=AIliPRfBscqzyc2z-P638rOmyF38FbliHkE1NQzrwsE,18903
21
+ lattifai/tokenizer/tokenizer.py,sha256=m6dDvvDmaVFtQd0aikMUViRZNzztXM0OyFs-vlToT9o,19191
21
22
  lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
22
23
  lattifai/workers/lattice1_alpha.py,sha256=1lCq0-bgWMXvYslAbCTFgHC0p6UWPto1y0wkTw9WrmQ,10177
23
- lattifai-0.4.0.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
24
- lattifai-0.4.0.dist-info/METADATA,sha256=VLBwud4AA78kRomOnJ0_dD6tLruRp13DQk3xephebmE,26597
25
- lattifai-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
- lattifai-0.4.0.dist-info/entry_points.txt,sha256=fCgo8-LKA_9C7_jmEGsZPJko0woXHtEh0iRbpO7PYzI,69
27
- lattifai-0.4.0.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
28
- lattifai-0.4.0.dist-info/RECORD,,
24
+ lattifai-0.4.1.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
25
+ lattifai-0.4.1.dist-info/METADATA,sha256=MLSUT7RUUOUd0lD_NA3GIR-AjBBkwjZV-HLiNK5k534,26719
26
+ lattifai-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ lattifai-0.4.1.dist-info/entry_points.txt,sha256=fCgo8-LKA_9C7_jmEGsZPJko0woXHtEh0iRbpO7PYzI,69
28
+ lattifai-0.4.1.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
29
+ lattifai-0.4.1.dist-info/RECORD,,