lattifai 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/bin/agent.py +4 -0
- lattifai/tokenizer/tokenizer.py +1 -1
- lattifai/workers/lattice1_alpha.py +66 -26
- lattifai/workflows/youtube.py +1 -1
- {lattifai-0.4.4.dist-info → lattifai-0.4.5.dist-info}/METADATA +1 -2
- {lattifai-0.4.4.dist-info → lattifai-0.4.5.dist-info}/RECORD +10 -10
- {lattifai-0.4.4.dist-info → lattifai-0.4.5.dist-info}/WHEEL +0 -0
- {lattifai-0.4.4.dist-info → lattifai-0.4.5.dist-info}/entry_points.txt +0 -0
- {lattifai-0.4.4.dist-info → lattifai-0.4.5.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.4.4.dist-info → lattifai-0.4.5.dist-info}/top_level.txt +0 -0
lattifai/bin/agent.py
CHANGED
|
@@ -302,6 +302,10 @@ if not check_dependencies():
|
|
|
302
302
|
if __name__ == '__main__':
|
|
303
303
|
import os
|
|
304
304
|
|
|
305
|
+
from dotenv import find_dotenv, load_dotenv
|
|
306
|
+
|
|
307
|
+
load_dotenv(find_dotenv(usecwd=True))
|
|
308
|
+
|
|
305
309
|
asyncio.run(
|
|
306
310
|
_run_youtube_workflow(
|
|
307
311
|
# url='https://www.youtube.com/watch?v=7nv1snJRCEI',
|
lattifai/tokenizer/tokenizer.py
CHANGED
|
@@ -148,7 +148,7 @@ class LatticeTokenizer:
|
|
|
148
148
|
oov_words = []
|
|
149
149
|
for text in texts:
|
|
150
150
|
words = text.lower().replace('-', ' ').replace('—', ' ').replace('–', ' ').split()
|
|
151
|
-
oovs = [w for w in words if w not in self.words]
|
|
151
|
+
oovs = [w.strip(PUNCTUATION) for w in words if w not in self.words]
|
|
152
152
|
if oovs:
|
|
153
153
|
oov_words.extend([w for w in oovs if (w not in self.words and len(w) <= MAXIMUM_WORD_LENGTH)])
|
|
154
154
|
|
|
@@ -1,20 +1,71 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import time
|
|
3
3
|
from collections import defaultdict
|
|
4
|
-
from typing import Any, BinaryIO, Dict, Tuple, Union
|
|
4
|
+
from typing import Any, BinaryIO, Dict, Iterable, Optional, Tuple, Union
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import onnxruntime as ort
|
|
8
|
-
import resampy
|
|
9
8
|
import soundfile as sf
|
|
10
9
|
import torch
|
|
11
10
|
from lhotse import FbankConfig
|
|
12
|
-
from lhotse.
|
|
11
|
+
from lhotse.augmentation import get_or_create_resampler
|
|
13
12
|
from lhotse.features.kaldi.layers import Wav2LogFilterBank
|
|
14
13
|
from lhotse.utils import Pathlike
|
|
15
14
|
|
|
16
15
|
from lattifai.errors import AlignmentError, AudioFormatError, AudioLoadError, DependencyError, ModelLoadError
|
|
17
16
|
|
|
17
|
+
ChannelSelectorType = Union[int, Iterable[int], str]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def resample_audio(
|
|
21
|
+
audio_sr: Tuple[torch.Tensor, int],
|
|
22
|
+
sampling_rate: int,
|
|
23
|
+
device: Optional[str],
|
|
24
|
+
channel_selector: Optional[ChannelSelectorType] = 'average',
|
|
25
|
+
) -> torch.Tensor:
|
|
26
|
+
"""
|
|
27
|
+
return:
|
|
28
|
+
(1, T)
|
|
29
|
+
"""
|
|
30
|
+
audio, sr = audio_sr
|
|
31
|
+
|
|
32
|
+
if channel_selector is None:
|
|
33
|
+
# keep the original multi-channel signal
|
|
34
|
+
tensor = audio
|
|
35
|
+
elif isinstance(channel_selector, int):
|
|
36
|
+
assert audio.shape[0] >= channel_selector, f'Invalid channel: {channel_selector}'
|
|
37
|
+
tensor = audio[channel_selector : channel_selector + 1].clone()
|
|
38
|
+
del audio
|
|
39
|
+
elif isinstance(channel_selector, str):
|
|
40
|
+
assert channel_selector == 'average'
|
|
41
|
+
tensor = torch.mean(audio.to(device), dim=0, keepdim=True)
|
|
42
|
+
del audio
|
|
43
|
+
else:
|
|
44
|
+
assert isinstance(channel_selector, Iterable)
|
|
45
|
+
num_channels = audio.shape[0]
|
|
46
|
+
print(f'Selecting channels {channel_selector} from the signal with {num_channels} channels.')
|
|
47
|
+
assert isinstance(channel_selector, Iterable)
|
|
48
|
+
if max(channel_selector) >= num_channels:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f'Cannot select channel subset {channel_selector} from a signal with {num_channels} channels.'
|
|
51
|
+
)
|
|
52
|
+
tensor = audio[channel_selector]
|
|
53
|
+
|
|
54
|
+
tensor = tensor.to(device)
|
|
55
|
+
if sr != sampling_rate:
|
|
56
|
+
resampler = get_or_create_resampler(sr, sampling_rate).to(device=device)
|
|
57
|
+
length = tensor.size(-1)
|
|
58
|
+
chunk_size = sampling_rate * 3600
|
|
59
|
+
if length > chunk_size:
|
|
60
|
+
resampled_chunks = []
|
|
61
|
+
for i in range(0, length, chunk_size):
|
|
62
|
+
resampled_chunks.append(resampler(tensor[..., i : i + chunk_size]))
|
|
63
|
+
tensor = torch.cat(resampled_chunks, dim=-1)
|
|
64
|
+
else:
|
|
65
|
+
tensor = resampler(tensor)
|
|
66
|
+
|
|
67
|
+
return tensor
|
|
68
|
+
|
|
18
69
|
|
|
19
70
|
class Lattice1AlphaWorker:
|
|
20
71
|
"""Worker for processing audio with LatticeGraph."""
|
|
@@ -42,7 +93,7 @@ class Lattice1AlphaWorker:
|
|
|
42
93
|
self.acoustic_ort = ort.InferenceSession(
|
|
43
94
|
f'{model_path}/acoustic_opt.onnx',
|
|
44
95
|
sess_options,
|
|
45
|
-
providers=providers + ['
|
|
96
|
+
providers=providers + ['CPUExecutionProvider', 'CoreMLExecutionProvider'],
|
|
46
97
|
)
|
|
47
98
|
except Exception as e:
|
|
48
99
|
raise ModelLoadError(f'acoustic model from {model_path}', original_error=e)
|
|
@@ -87,17 +138,13 @@ class Lattice1AlphaWorker:
|
|
|
87
138
|
self.timings['emission'] += time.time() - _start
|
|
88
139
|
return emission # (1, T, vocab_size) torch
|
|
89
140
|
|
|
90
|
-
def load_audio(
|
|
141
|
+
def load_audio(
|
|
142
|
+
self, audio: Union[Pathlike, BinaryIO], channel_selector: Optional[ChannelSelectorType] = 'average'
|
|
143
|
+
) -> Tuple[torch.Tensor, int]:
|
|
91
144
|
# load audio
|
|
92
145
|
try:
|
|
93
|
-
waveform, sample_rate =
|
|
94
|
-
|
|
95
|
-
waveform = waveform.reshape([1, -1]) # (1, L)
|
|
96
|
-
else: # make sure channel first
|
|
97
|
-
if waveform.shape[0] > waveform.shape[1]:
|
|
98
|
-
waveform = waveform.transpose(0, 1)
|
|
99
|
-
# average multiple channels
|
|
100
|
-
waveform = np.mean(waveform, axis=0, keepdims=True) # (1, L)
|
|
146
|
+
waveform, sample_rate = sf.read(audio, always_2d=True, dtype='float32') # numpy array
|
|
147
|
+
waveform = waveform.T # (channels, samples)
|
|
101
148
|
except Exception as primary_error:
|
|
102
149
|
# Fallback to PyAV for formats not supported by soundfile
|
|
103
150
|
try:
|
|
@@ -135,23 +182,16 @@ class Lattice1AlphaWorker:
|
|
|
135
182
|
|
|
136
183
|
# Concatenate all frames
|
|
137
184
|
waveform = np.concatenate(frames, axis=1)
|
|
138
|
-
# Average multiple channels to mono
|
|
139
|
-
if waveform.shape[0] > 1:
|
|
140
|
-
waveform = np.mean(waveform, axis=0, keepdims=True)
|
|
141
|
-
|
|
142
185
|
sample_rate = audio_stream.codec_context.sample_rate
|
|
143
186
|
except Exception as e:
|
|
144
187
|
raise AudioLoadError(str(audio), original_error=e)
|
|
145
188
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
return torch.from_numpy(waveform).to(self.device) # (1, L)
|
|
189
|
+
return resample_audio(
|
|
190
|
+
(torch.from_numpy(waveform), sample_rate),
|
|
191
|
+
self.config.get('sampling_rate', 16000),
|
|
192
|
+
device=self.device.type,
|
|
193
|
+
channel_selector=channel_selector,
|
|
194
|
+
)
|
|
155
195
|
|
|
156
196
|
def alignment(
|
|
157
197
|
self, audio: Union[Union[Pathlike, BinaryIO], torch.tensor], lattice_graph: Tuple[str, int, float]
|
lattifai/workflows/youtube.py
CHANGED
|
@@ -708,7 +708,7 @@ class YouTubeSubtitleAgent(WorkflowAgent):
|
|
|
708
708
|
|
|
709
709
|
# If subtitle was already downloaded in step 1 and user selected it, use it directly
|
|
710
710
|
if downloaded_subtitle_path and downloaded_subtitle_path != 'gemini':
|
|
711
|
-
self.logger.info(f'📥 Using subtitle
|
|
711
|
+
self.logger.info(f'📥 Using subtitle: {downloaded_subtitle_path}')
|
|
712
712
|
return {'subtitle_path': downloaded_subtitle_path}
|
|
713
713
|
|
|
714
714
|
# Check for existing subtitles if subtitle was not downloaded yet
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattifai
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.5
|
|
4
4
|
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
5
|
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
6
|
Maintainer-email: Lattice <tech@lattifai.com>
|
|
@@ -57,7 +57,6 @@ Requires-Dist: pysubs2
|
|
|
57
57
|
Requires-Dist: praatio
|
|
58
58
|
Requires-Dist: tgt
|
|
59
59
|
Requires-Dist: onnxruntime
|
|
60
|
-
Requires-Dist: resampy
|
|
61
60
|
Requires-Dist: g2p-phonemizer==0.1.1
|
|
62
61
|
Requires-Dist: wtpsplit>=2.1.6
|
|
63
62
|
Requires-Dist: av
|
|
@@ -4,7 +4,7 @@ lattifai/client.py,sha256=YvK25fcXwKREYCkq_TUBdEZh7I9RNEwCbgW4qUha2ho,13236
|
|
|
4
4
|
lattifai/errors.py,sha256=5i_acoly1g-TLAID8QnhzQshwOXfgLL55mHsdwzlNGA,10814
|
|
5
5
|
lattifai/utils.py,sha256=CzVwNc08u8lm7XavCMJskXHfni0xsZ-EgpcMkRywVm8,4736
|
|
6
6
|
lattifai/bin/__init__.py,sha256=QWbmVUbzqwPQNeOV_g-bOsz53w37v-tbZ3uFrSj-7Mg,90
|
|
7
|
-
lattifai/bin/agent.py,sha256=
|
|
7
|
+
lattifai/bin/agent.py,sha256=8nRh0GC1M4__-qKQtxZspcyNnUm8DOVCWQf3sRblEOI,9826
|
|
8
8
|
lattifai/bin/align.py,sha256=JJHk5uzmYGYhpA3ricHdmBLzJWC2aRyF0k4ANnap50w,8151
|
|
9
9
|
lattifai/bin/cli_base.py,sha256=gvPUi9Z0eGwBJ8Es5xq1z00YzFPlocYiR2zpL7ekyhw,626
|
|
10
10
|
lattifai/bin/subtitle.py,sha256=UZMPh71O2X1UwbfZ9VWlhzxkz78viz8KWwoVsDpewK0,6577
|
|
@@ -18,22 +18,22 @@ lattifai/io/utils.py,sha256=4drRwcM1n7AYhdJcF51EZxMTy_Ut_1GKtdWpRhPuVmg,686
|
|
|
18
18
|
lattifai/io/writer.py,sha256=8n9ZBuXuVOCFwzr1hqrnXpZ-fARTsepebwjKgRuueWE,3872
|
|
19
19
|
lattifai/tokenizer/__init__.py,sha256=y-FyfO7tLga9b46pkCC6jdSBKOFZS-jFfHcqUieGEyU,120
|
|
20
20
|
lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
|
|
21
|
-
lattifai/tokenizer/tokenizer.py,sha256=
|
|
21
|
+
lattifai/tokenizer/tokenizer.py,sha256=uLtGeT6ad_vPuoXNw8Rz43bAdKQWheI_tfHAV47FyqQ,19205
|
|
22
22
|
lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
|
|
23
|
-
lattifai/workers/lattice1_alpha.py,sha256=
|
|
23
|
+
lattifai/workers/lattice1_alpha.py,sha256=4FX91eTmcTDZt78NrMY8EgkwlwBKlU4Qw39qcQeqiTg,11469
|
|
24
24
|
lattifai/workflows/__init__.py,sha256=mTOdwQQs2YY1s0JsVGsATb2TWPhpNo7bRiAAJW92740,830
|
|
25
25
|
lattifai/workflows/agents.py,sha256=ZnxyEb-Li8gQw3Z7V3K7jjnT7sQAtG1uCFuXGX77IcE,227
|
|
26
26
|
lattifai/workflows/base.py,sha256=ZSePq2O2hB59K5CMbk_iGiOM9FSHOVQdG3t8Oyz9gwE,6264
|
|
27
27
|
lattifai/workflows/file_manager.py,sha256=5km59cB4s9PcJgMMqZ6YqRY8sTqvTzd1qyQ2T5bHlfI,31985
|
|
28
28
|
lattifai/workflows/gemini.py,sha256=kpp3GiHyui8RZLWRQMx7vojBscCi2lZSxrjM1VHo_mc,6144
|
|
29
|
-
lattifai/workflows/youtube.py,sha256=
|
|
29
|
+
lattifai/workflows/youtube.py,sha256=tRy9LV5mwgQfQ3jfL_aiyB2catDHqCS2Hmy9Lk3-VGs,38757
|
|
30
30
|
lattifai/workflows/prompts/README.md,sha256=X49KWSQVdjWxxWUp4R2w3ZqKrAOi6_kDNHh1hMaQ4PE,694
|
|
31
31
|
lattifai/workflows/prompts/__init__.py,sha256=i3kMT5sg_W9LbPcda0xmZWLg0tPjXGVI3iKtHrBng3o,1351
|
|
32
32
|
lattifai/workflows/prompts/gemini/README.md,sha256=rt7f7yDGtaobKBo95LG3u56mqa3ABOXQd0UVgJYtYuo,781
|
|
33
33
|
lattifai/workflows/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnKzqVCakr-fTp2Xk38UOsUquvruNX-LU,4600
|
|
34
|
-
lattifai-0.4.
|
|
35
|
-
lattifai-0.4.
|
|
36
|
-
lattifai-0.4.
|
|
37
|
-
lattifai-0.4.
|
|
38
|
-
lattifai-0.4.
|
|
39
|
-
lattifai-0.4.
|
|
34
|
+
lattifai-0.4.5.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
|
|
35
|
+
lattifai-0.4.5.dist-info/METADATA,sha256=e0ojxc-4xgpgpWZC4wdXlTySQYliuLCa-MGG1F80U6E,26687
|
|
36
|
+
lattifai-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
37
|
+
lattifai-0.4.5.dist-info/entry_points.txt,sha256=fCgo8-LKA_9C7_jmEGsZPJko0woXHtEh0iRbpO7PYzI,69
|
|
38
|
+
lattifai-0.4.5.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
|
|
39
|
+
lattifai-0.4.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|