lattifai 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/bin/agent.py +4 -0
- lattifai/io/text_parser.py +2 -2
- lattifai/tokenizer/tokenizer.py +1 -1
- lattifai/workers/lattice1_alpha.py +70 -36
- lattifai/workflows/youtube.py +1 -1
- {lattifai-0.4.3.dist-info → lattifai-0.4.5.dist-info}/METADATA +1 -2
- {lattifai-0.4.3.dist-info → lattifai-0.4.5.dist-info}/RECORD +11 -11
- {lattifai-0.4.3.dist-info → lattifai-0.4.5.dist-info}/WHEEL +0 -0
- {lattifai-0.4.3.dist-info → lattifai-0.4.5.dist-info}/entry_points.txt +0 -0
- {lattifai-0.4.3.dist-info → lattifai-0.4.5.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.4.3.dist-info → lattifai-0.4.5.dist-info}/top_level.txt +0 -0
lattifai/bin/agent.py
CHANGED
|
@@ -302,6 +302,10 @@ if not check_dependencies():
|
|
|
302
302
|
if __name__ == '__main__':
|
|
303
303
|
import os
|
|
304
304
|
|
|
305
|
+
from dotenv import find_dotenv, load_dotenv
|
|
306
|
+
|
|
307
|
+
load_dotenv(find_dotenv(usecwd=True))
|
|
308
|
+
|
|
305
309
|
asyncio.run(
|
|
306
310
|
_run_youtube_workflow(
|
|
307
311
|
# url='https://www.youtube.com/watch?v=7nv1snJRCEI',
|
lattifai/io/text_parser.py
CHANGED
|
@@ -8,12 +8,12 @@ SPEAKER_PATTERN = re.compile(r'((?:>>|>>|>|>).*?[::])\s*(.*)')
|
|
|
8
8
|
# Transcriber Output Example:
|
|
9
9
|
# 26:19.919 --> 26:34.921
|
|
10
10
|
# [SPEAKER_01]: 越来越多的科技巨头入...
|
|
11
|
-
SPEAKER_LATTIFAI = re.compile(r'(^\[SPEAKER_.*?\]
|
|
11
|
+
SPEAKER_LATTIFAI = re.compile(r'(^\[SPEAKER_.*?\][::])\s*(.*)')
|
|
12
12
|
|
|
13
13
|
# NISHTHA BHATIA: Hey, everyone.
|
|
14
14
|
# DIETER: Oh, hey, Nishtha.
|
|
15
15
|
# GEMINI: That might
|
|
16
|
-
SPEAKER_PATTERN2 = re.compile(r'^([A-Z]{1,15}(?:\s+[A-Z]{1,15})?
|
|
16
|
+
SPEAKER_PATTERN2 = re.compile(r'^([A-Z]{1,15}(?:\s+[A-Z]{1,15})?[::])\s*(.*)$')
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def parse_speaker_text(line) -> Tuple[Optional[str], str]:
|
lattifai/tokenizer/tokenizer.py
CHANGED
|
@@ -148,7 +148,7 @@ class LatticeTokenizer:
|
|
|
148
148
|
oov_words = []
|
|
149
149
|
for text in texts:
|
|
150
150
|
words = text.lower().replace('-', ' ').replace('—', ' ').replace('–', ' ').split()
|
|
151
|
-
oovs = [w for w in words if w not in self.words]
|
|
151
|
+
oovs = [w.strip(PUNCTUATION) for w in words if w not in self.words]
|
|
152
152
|
if oovs:
|
|
153
153
|
oov_words.extend([w for w in oovs if (w not in self.words and len(w) <= MAXIMUM_WORD_LENGTH)])
|
|
154
154
|
|
|
@@ -1,25 +1,70 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import time
|
|
3
3
|
from collections import defaultdict
|
|
4
|
-
from typing import Any, BinaryIO, Dict, Tuple, Union
|
|
4
|
+
from typing import Any, BinaryIO, Dict, Iterable, Optional, Tuple, Union
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import onnxruntime as ort
|
|
8
|
-
import resampy
|
|
9
8
|
import soundfile as sf
|
|
10
9
|
import torch
|
|
11
10
|
from lhotse import FbankConfig
|
|
12
|
-
from lhotse.
|
|
11
|
+
from lhotse.augmentation import get_or_create_resampler
|
|
13
12
|
from lhotse.features.kaldi.layers import Wav2LogFilterBank
|
|
14
13
|
from lhotse.utils import Pathlike
|
|
15
14
|
|
|
16
|
-
from lattifai.errors import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
15
|
+
from lattifai.errors import AlignmentError, AudioFormatError, AudioLoadError, DependencyError, ModelLoadError
|
|
16
|
+
|
|
17
|
+
ChannelSelectorType = Union[int, Iterable[int], str]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def resample_audio(
|
|
21
|
+
audio_sr: Tuple[torch.Tensor, int],
|
|
22
|
+
sampling_rate: int,
|
|
23
|
+
device: Optional[str],
|
|
24
|
+
channel_selector: Optional[ChannelSelectorType] = 'average',
|
|
25
|
+
) -> torch.Tensor:
|
|
26
|
+
"""
|
|
27
|
+
return:
|
|
28
|
+
(1, T)
|
|
29
|
+
"""
|
|
30
|
+
audio, sr = audio_sr
|
|
31
|
+
|
|
32
|
+
if channel_selector is None:
|
|
33
|
+
# keep the original multi-channel signal
|
|
34
|
+
tensor = audio
|
|
35
|
+
elif isinstance(channel_selector, int):
|
|
36
|
+
assert audio.shape[0] >= channel_selector, f'Invalid channel: {channel_selector}'
|
|
37
|
+
tensor = audio[channel_selector : channel_selector + 1].clone()
|
|
38
|
+
del audio
|
|
39
|
+
elif isinstance(channel_selector, str):
|
|
40
|
+
assert channel_selector == 'average'
|
|
41
|
+
tensor = torch.mean(audio.to(device), dim=0, keepdim=True)
|
|
42
|
+
del audio
|
|
43
|
+
else:
|
|
44
|
+
assert isinstance(channel_selector, Iterable)
|
|
45
|
+
num_channels = audio.shape[0]
|
|
46
|
+
print(f'Selecting channels {channel_selector} from the signal with {num_channels} channels.')
|
|
47
|
+
assert isinstance(channel_selector, Iterable)
|
|
48
|
+
if max(channel_selector) >= num_channels:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f'Cannot select channel subset {channel_selector} from a signal with {num_channels} channels.'
|
|
51
|
+
)
|
|
52
|
+
tensor = audio[channel_selector]
|
|
53
|
+
|
|
54
|
+
tensor = tensor.to(device)
|
|
55
|
+
if sr != sampling_rate:
|
|
56
|
+
resampler = get_or_create_resampler(sr, sampling_rate).to(device=device)
|
|
57
|
+
length = tensor.size(-1)
|
|
58
|
+
chunk_size = sampling_rate * 3600
|
|
59
|
+
if length > chunk_size:
|
|
60
|
+
resampled_chunks = []
|
|
61
|
+
for i in range(0, length, chunk_size):
|
|
62
|
+
resampled_chunks.append(resampler(tensor[..., i : i + chunk_size]))
|
|
63
|
+
tensor = torch.cat(resampled_chunks, dim=-1)
|
|
64
|
+
else:
|
|
65
|
+
tensor = resampler(tensor)
|
|
66
|
+
|
|
67
|
+
return tensor
|
|
23
68
|
|
|
24
69
|
|
|
25
70
|
class Lattice1AlphaWorker:
|
|
@@ -48,7 +93,7 @@ class Lattice1AlphaWorker:
|
|
|
48
93
|
self.acoustic_ort = ort.InferenceSession(
|
|
49
94
|
f'{model_path}/acoustic_opt.onnx',
|
|
50
95
|
sess_options,
|
|
51
|
-
providers=providers + ['
|
|
96
|
+
providers=providers + ['CPUExecutionProvider', 'CoreMLExecutionProvider'],
|
|
52
97
|
)
|
|
53
98
|
except Exception as e:
|
|
54
99
|
raise ModelLoadError(f'acoustic model from {model_path}', original_error=e)
|
|
@@ -93,17 +138,13 @@ class Lattice1AlphaWorker:
|
|
|
93
138
|
self.timings['emission'] += time.time() - _start
|
|
94
139
|
return emission # (1, T, vocab_size) torch
|
|
95
140
|
|
|
96
|
-
def load_audio(
|
|
141
|
+
def load_audio(
|
|
142
|
+
self, audio: Union[Pathlike, BinaryIO], channel_selector: Optional[ChannelSelectorType] = 'average'
|
|
143
|
+
) -> Tuple[torch.Tensor, int]:
|
|
97
144
|
# load audio
|
|
98
145
|
try:
|
|
99
|
-
waveform, sample_rate =
|
|
100
|
-
|
|
101
|
-
waveform = waveform.reshape([1, -1]) # (1, L)
|
|
102
|
-
else: # make sure channel first
|
|
103
|
-
if waveform.shape[0] > waveform.shape[1]:
|
|
104
|
-
waveform = waveform.transpose(0, 1)
|
|
105
|
-
# average multiple channels
|
|
106
|
-
waveform = np.mean(waveform, axis=0, keepdims=True) # (1, L)
|
|
146
|
+
waveform, sample_rate = sf.read(audio, always_2d=True, dtype='float32') # numpy array
|
|
147
|
+
waveform = waveform.T # (channels, samples)
|
|
107
148
|
except Exception as primary_error:
|
|
108
149
|
# Fallback to PyAV for formats not supported by soundfile
|
|
109
150
|
try:
|
|
@@ -141,23 +182,16 @@ class Lattice1AlphaWorker:
|
|
|
141
182
|
|
|
142
183
|
# Concatenate all frames
|
|
143
184
|
waveform = np.concatenate(frames, axis=1)
|
|
144
|
-
# Average multiple channels to mono
|
|
145
|
-
if waveform.shape[0] > 1:
|
|
146
|
-
waveform = np.mean(waveform, axis=0, keepdims=True)
|
|
147
|
-
|
|
148
185
|
sample_rate = audio_stream.codec_context.sample_rate
|
|
149
186
|
except Exception as e:
|
|
150
187
|
raise AudioLoadError(str(audio), original_error=e)
|
|
151
188
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
return torch.from_numpy(waveform).to(self.device) # (1, L)
|
|
189
|
+
return resample_audio(
|
|
190
|
+
(torch.from_numpy(waveform), sample_rate),
|
|
191
|
+
self.config.get('sampling_rate', 16000),
|
|
192
|
+
device=self.device.type,
|
|
193
|
+
channel_selector=channel_selector,
|
|
194
|
+
)
|
|
161
195
|
|
|
162
196
|
def alignment(
|
|
163
197
|
self, audio: Union[Union[Pathlike, BinaryIO], torch.tensor], lattice_graph: Tuple[str, int, float]
|
|
@@ -231,9 +265,9 @@ class Lattice1AlphaWorker:
|
|
|
231
265
|
emission.to(device) * acoustic_scale,
|
|
232
266
|
decoding_graph.to(device),
|
|
233
267
|
torch.tensor([emission.shape[1]], dtype=torch.int32),
|
|
234
|
-
search_beam=
|
|
235
|
-
output_beam=
|
|
236
|
-
min_active_states=
|
|
268
|
+
search_beam=200,
|
|
269
|
+
output_beam=80,
|
|
270
|
+
min_active_states=400,
|
|
237
271
|
max_active_states=10000,
|
|
238
272
|
subsampling_factor=1,
|
|
239
273
|
reject_low_confidence=False,
|
lattifai/workflows/youtube.py
CHANGED
|
@@ -708,7 +708,7 @@ class YouTubeSubtitleAgent(WorkflowAgent):
|
|
|
708
708
|
|
|
709
709
|
# If subtitle was already downloaded in step 1 and user selected it, use it directly
|
|
710
710
|
if downloaded_subtitle_path and downloaded_subtitle_path != 'gemini':
|
|
711
|
-
self.logger.info(f'📥 Using subtitle
|
|
711
|
+
self.logger.info(f'📥 Using subtitle: {downloaded_subtitle_path}')
|
|
712
712
|
return {'subtitle_path': downloaded_subtitle_path}
|
|
713
713
|
|
|
714
714
|
# Check for existing subtitles if subtitle was not downloaded yet
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattifai
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.5
|
|
4
4
|
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
5
|
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
6
|
Maintainer-email: Lattice <tech@lattifai.com>
|
|
@@ -57,7 +57,6 @@ Requires-Dist: pysubs2
|
|
|
57
57
|
Requires-Dist: praatio
|
|
58
58
|
Requires-Dist: tgt
|
|
59
59
|
Requires-Dist: onnxruntime
|
|
60
|
-
Requires-Dist: resampy
|
|
61
60
|
Requires-Dist: g2p-phonemizer==0.1.1
|
|
62
61
|
Requires-Dist: wtpsplit>=2.1.6
|
|
63
62
|
Requires-Dist: av
|
|
@@ -4,7 +4,7 @@ lattifai/client.py,sha256=YvK25fcXwKREYCkq_TUBdEZh7I9RNEwCbgW4qUha2ho,13236
|
|
|
4
4
|
lattifai/errors.py,sha256=5i_acoly1g-TLAID8QnhzQshwOXfgLL55mHsdwzlNGA,10814
|
|
5
5
|
lattifai/utils.py,sha256=CzVwNc08u8lm7XavCMJskXHfni0xsZ-EgpcMkRywVm8,4736
|
|
6
6
|
lattifai/bin/__init__.py,sha256=QWbmVUbzqwPQNeOV_g-bOsz53w37v-tbZ3uFrSj-7Mg,90
|
|
7
|
-
lattifai/bin/agent.py,sha256=
|
|
7
|
+
lattifai/bin/agent.py,sha256=8nRh0GC1M4__-qKQtxZspcyNnUm8DOVCWQf3sRblEOI,9826
|
|
8
8
|
lattifai/bin/align.py,sha256=JJHk5uzmYGYhpA3ricHdmBLzJWC2aRyF0k4ANnap50w,8151
|
|
9
9
|
lattifai/bin/cli_base.py,sha256=gvPUi9Z0eGwBJ8Es5xq1z00YzFPlocYiR2zpL7ekyhw,626
|
|
10
10
|
lattifai/bin/subtitle.py,sha256=UZMPh71O2X1UwbfZ9VWlhzxkz78viz8KWwoVsDpewK0,6577
|
|
@@ -13,27 +13,27 @@ lattifai/io/gemini_reader.py,sha256=WDZA93MSrUAsa5j-ZDXLdPXzEIoREymEy-rMAED_6f4,
|
|
|
13
13
|
lattifai/io/gemini_writer.py,sha256=rlXO9zx6kQhqTi9K9izE69-8S-2GPOIiJHPwZyebpiM,6515
|
|
14
14
|
lattifai/io/reader.py,sha256=h4T8dveLHXqSonma0J50iyjqkxH26tujeoPbnLx05nA,3333
|
|
15
15
|
lattifai/io/supervision.py,sha256=iBDRiDJ0hddo__SoEZau2cdEIBFnXZNLgSWFjtJd-lM,871
|
|
16
|
-
lattifai/io/text_parser.py,sha256=
|
|
16
|
+
lattifai/io/text_parser.py,sha256=5rO2qgFLfWIcoU0K-FGBgs2qG7r6Uz2J2LklGTgSUyA,2391
|
|
17
17
|
lattifai/io/utils.py,sha256=4drRwcM1n7AYhdJcF51EZxMTy_Ut_1GKtdWpRhPuVmg,686
|
|
18
18
|
lattifai/io/writer.py,sha256=8n9ZBuXuVOCFwzr1hqrnXpZ-fARTsepebwjKgRuueWE,3872
|
|
19
19
|
lattifai/tokenizer/__init__.py,sha256=y-FyfO7tLga9b46pkCC6jdSBKOFZS-jFfHcqUieGEyU,120
|
|
20
20
|
lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
|
|
21
|
-
lattifai/tokenizer/tokenizer.py,sha256=
|
|
21
|
+
lattifai/tokenizer/tokenizer.py,sha256=uLtGeT6ad_vPuoXNw8Rz43bAdKQWheI_tfHAV47FyqQ,19205
|
|
22
22
|
lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
|
|
23
|
-
lattifai/workers/lattice1_alpha.py,sha256=
|
|
23
|
+
lattifai/workers/lattice1_alpha.py,sha256=4FX91eTmcTDZt78NrMY8EgkwlwBKlU4Qw39qcQeqiTg,11469
|
|
24
24
|
lattifai/workflows/__init__.py,sha256=mTOdwQQs2YY1s0JsVGsATb2TWPhpNo7bRiAAJW92740,830
|
|
25
25
|
lattifai/workflows/agents.py,sha256=ZnxyEb-Li8gQw3Z7V3K7jjnT7sQAtG1uCFuXGX77IcE,227
|
|
26
26
|
lattifai/workflows/base.py,sha256=ZSePq2O2hB59K5CMbk_iGiOM9FSHOVQdG3t8Oyz9gwE,6264
|
|
27
27
|
lattifai/workflows/file_manager.py,sha256=5km59cB4s9PcJgMMqZ6YqRY8sTqvTzd1qyQ2T5bHlfI,31985
|
|
28
28
|
lattifai/workflows/gemini.py,sha256=kpp3GiHyui8RZLWRQMx7vojBscCi2lZSxrjM1VHo_mc,6144
|
|
29
|
-
lattifai/workflows/youtube.py,sha256=
|
|
29
|
+
lattifai/workflows/youtube.py,sha256=tRy9LV5mwgQfQ3jfL_aiyB2catDHqCS2Hmy9Lk3-VGs,38757
|
|
30
30
|
lattifai/workflows/prompts/README.md,sha256=X49KWSQVdjWxxWUp4R2w3ZqKrAOi6_kDNHh1hMaQ4PE,694
|
|
31
31
|
lattifai/workflows/prompts/__init__.py,sha256=i3kMT5sg_W9LbPcda0xmZWLg0tPjXGVI3iKtHrBng3o,1351
|
|
32
32
|
lattifai/workflows/prompts/gemini/README.md,sha256=rt7f7yDGtaobKBo95LG3u56mqa3ABOXQd0UVgJYtYuo,781
|
|
33
33
|
lattifai/workflows/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnKzqVCakr-fTp2Xk38UOsUquvruNX-LU,4600
|
|
34
|
-
lattifai-0.4.
|
|
35
|
-
lattifai-0.4.
|
|
36
|
-
lattifai-0.4.
|
|
37
|
-
lattifai-0.4.
|
|
38
|
-
lattifai-0.4.
|
|
39
|
-
lattifai-0.4.
|
|
34
|
+
lattifai-0.4.5.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
|
|
35
|
+
lattifai-0.4.5.dist-info/METADATA,sha256=e0ojxc-4xgpgpWZC4wdXlTySQYliuLCa-MGG1F80U6E,26687
|
|
36
|
+
lattifai-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
37
|
+
lattifai-0.4.5.dist-info/entry_points.txt,sha256=fCgo8-LKA_9C7_jmEGsZPJko0woXHtEh0iRbpO7PYzI,69
|
|
38
|
+
lattifai-0.4.5.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
|
|
39
|
+
lattifai-0.4.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|