lattifai 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lattifai/bin/agent.py CHANGED
@@ -302,6 +302,10 @@ if not check_dependencies():
302
302
  if __name__ == '__main__':
303
303
  import os
304
304
 
305
+ from dotenv import find_dotenv, load_dotenv
306
+
307
+ load_dotenv(find_dotenv(usecwd=True))
308
+
305
309
  asyncio.run(
306
310
  _run_youtube_workflow(
307
311
  # url='https://www.youtube.com/watch?v=7nv1snJRCEI',
@@ -148,7 +148,7 @@ class LatticeTokenizer:
148
148
  oov_words = []
149
149
  for text in texts:
150
150
  words = text.lower().replace('-', ' ').replace('—', ' ').replace('–', ' ').split()
151
- oovs = [w for w in words if w not in self.words]
151
+ oovs = [w.strip(PUNCTUATION) for w in words if w not in self.words]
152
152
  if oovs:
153
153
  oov_words.extend([w for w in oovs if (w not in self.words and len(w) <= MAXIMUM_WORD_LENGTH)])
154
154
 
@@ -1,20 +1,71 @@
1
1
  import json
2
2
  import time
3
3
  from collections import defaultdict
4
- from typing import Any, BinaryIO, Dict, Tuple, Union
4
+ from typing import Any, BinaryIO, Dict, Iterable, Optional, Tuple, Union
5
5
 
6
6
  import numpy as np
7
7
  import onnxruntime as ort
8
- import resampy
9
8
  import soundfile as sf
10
9
  import torch
11
10
  from lhotse import FbankConfig
12
- from lhotse.audio import read_audio
11
+ from lhotse.augmentation import get_or_create_resampler
13
12
  from lhotse.features.kaldi.layers import Wav2LogFilterBank
14
13
  from lhotse.utils import Pathlike
15
14
 
16
15
  from lattifai.errors import AlignmentError, AudioFormatError, AudioLoadError, DependencyError, ModelLoadError
17
16
 
17
+ ChannelSelectorType = Union[int, Iterable[int], str]
18
+
19
+
20
+ def resample_audio(
21
+ audio_sr: Tuple[torch.Tensor, int],
22
+ sampling_rate: int,
23
+ device: Optional[str],
24
+ channel_selector: Optional[ChannelSelectorType] = 'average',
25
+ ) -> torch.Tensor:
26
+ """
27
+ return:
28
+ (1, T)
29
+ """
30
+ audio, sr = audio_sr
31
+
32
+ if channel_selector is None:
33
+ # keep the original multi-channel signal
34
+ tensor = audio
35
+ elif isinstance(channel_selector, int):
36
+ assert audio.shape[0] >= channel_selector, f'Invalid channel: {channel_selector}'
37
+ tensor = audio[channel_selector : channel_selector + 1].clone()
38
+ del audio
39
+ elif isinstance(channel_selector, str):
40
+ assert channel_selector == 'average'
41
+ tensor = torch.mean(audio.to(device), dim=0, keepdim=True)
42
+ del audio
43
+ else:
44
+ assert isinstance(channel_selector, Iterable)
45
+ num_channels = audio.shape[0]
46
+ print(f'Selecting channels {channel_selector} from the signal with {num_channels} channels.')
47
+ assert isinstance(channel_selector, Iterable)
48
+ if max(channel_selector) >= num_channels:
49
+ raise ValueError(
50
+ f'Cannot select channel subset {channel_selector} from a signal with {num_channels} channels.'
51
+ )
52
+ tensor = audio[channel_selector]
53
+
54
+ tensor = tensor.to(device)
55
+ if sr != sampling_rate:
56
+ resampler = get_or_create_resampler(sr, sampling_rate).to(device=device)
57
+ length = tensor.size(-1)
58
+ chunk_size = sampling_rate * 3600
59
+ if length > chunk_size:
60
+ resampled_chunks = []
61
+ for i in range(0, length, chunk_size):
62
+ resampled_chunks.append(resampler(tensor[..., i : i + chunk_size]))
63
+ tensor = torch.cat(resampled_chunks, dim=-1)
64
+ else:
65
+ tensor = resampler(tensor)
66
+
67
+ return tensor
68
+
18
69
 
19
70
  class Lattice1AlphaWorker:
20
71
  """Worker for processing audio with LatticeGraph."""
@@ -42,7 +93,7 @@ class Lattice1AlphaWorker:
42
93
  self.acoustic_ort = ort.InferenceSession(
43
94
  f'{model_path}/acoustic_opt.onnx',
44
95
  sess_options,
45
- providers=providers + ['CoreMLExecutionProvider', 'CPUExecutionProvider'],
96
+ providers=providers + ['CPUExecutionProvider', 'CoreMLExecutionProvider'],
46
97
  )
47
98
  except Exception as e:
48
99
  raise ModelLoadError(f'acoustic model from {model_path}', original_error=e)
@@ -87,17 +138,13 @@ class Lattice1AlphaWorker:
87
138
  self.timings['emission'] += time.time() - _start
88
139
  return emission # (1, T, vocab_size) torch
89
140
 
90
- def load_audio(self, audio: Union[Pathlike, BinaryIO]) -> Tuple[torch.Tensor, int]:
141
+ def load_audio(
142
+ self, audio: Union[Pathlike, BinaryIO], channel_selector: Optional[ChannelSelectorType] = 'average'
143
+ ) -> Tuple[torch.Tensor, int]:
91
144
  # load audio
92
145
  try:
93
- waveform, sample_rate = read_audio(audio) # numpy array
94
- if len(waveform.shape) == 1:
95
- waveform = waveform.reshape([1, -1]) # (1, L)
96
- else: # make sure channel first
97
- if waveform.shape[0] > waveform.shape[1]:
98
- waveform = waveform.transpose(0, 1)
99
- # average multiple channels
100
- waveform = np.mean(waveform, axis=0, keepdims=True) # (1, L)
146
+ waveform, sample_rate = sf.read(audio, always_2d=True, dtype='float32') # numpy array
147
+ waveform = waveform.T # (channels, samples)
101
148
  except Exception as primary_error:
102
149
  # Fallback to PyAV for formats not supported by soundfile
103
150
  try:
@@ -135,23 +182,16 @@ class Lattice1AlphaWorker:
135
182
 
136
183
  # Concatenate all frames
137
184
  waveform = np.concatenate(frames, axis=1)
138
- # Average multiple channels to mono
139
- if waveform.shape[0] > 1:
140
- waveform = np.mean(waveform, axis=0, keepdims=True)
141
-
142
185
  sample_rate = audio_stream.codec_context.sample_rate
143
186
  except Exception as e:
144
187
  raise AudioLoadError(str(audio), original_error=e)
145
188
 
146
- try:
147
- if sample_rate != self.config['sample_rate']:
148
- waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=1)
149
- except Exception:
150
- raise AudioFormatError(
151
- str(audio), f'Failed to resample from {sample_rate}Hz to {self.config["sample_rate"]}Hz'
152
- )
153
-
154
- return torch.from_numpy(waveform).to(self.device) # (1, L)
189
+ return resample_audio(
190
+ (torch.from_numpy(waveform), sample_rate),
191
+ self.config.get('sampling_rate', 16000),
192
+ device=self.device.type,
193
+ channel_selector=channel_selector,
194
+ )
155
195
 
156
196
  def alignment(
157
197
  self, audio: Union[Union[Pathlike, BinaryIO], torch.tensor], lattice_graph: Tuple[str, int, float]
@@ -708,7 +708,7 @@ class YouTubeSubtitleAgent(WorkflowAgent):
708
708
 
709
709
  # If subtitle was already downloaded in step 1 and user selected it, use it directly
710
710
  if downloaded_subtitle_path and downloaded_subtitle_path != 'gemini':
711
- self.logger.info(f'📥 Using subtitle downloaded in previous step: {downloaded_subtitle_path}')
711
+ self.logger.info(f'📥 Using subtitle: {downloaded_subtitle_path}')
712
712
  return {'subtitle_path': downloaded_subtitle_path}
713
713
 
714
714
  # Check for existing subtitles if subtitle was not downloaded yet
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
5
5
  Author-email: Lattifai Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>
@@ -57,7 +57,6 @@ Requires-Dist: pysubs2
57
57
  Requires-Dist: praatio
58
58
  Requires-Dist: tgt
59
59
  Requires-Dist: onnxruntime
60
- Requires-Dist: resampy
61
60
  Requires-Dist: g2p-phonemizer==0.1.1
62
61
  Requires-Dist: wtpsplit>=2.1.6
63
62
  Requires-Dist: av
@@ -4,7 +4,7 @@ lattifai/client.py,sha256=YvK25fcXwKREYCkq_TUBdEZh7I9RNEwCbgW4qUha2ho,13236
4
4
  lattifai/errors.py,sha256=5i_acoly1g-TLAID8QnhzQshwOXfgLL55mHsdwzlNGA,10814
5
5
  lattifai/utils.py,sha256=CzVwNc08u8lm7XavCMJskXHfni0xsZ-EgpcMkRywVm8,4736
6
6
  lattifai/bin/__init__.py,sha256=QWbmVUbzqwPQNeOV_g-bOsz53w37v-tbZ3uFrSj-7Mg,90
7
- lattifai/bin/agent.py,sha256=jc7qSuVV2_EkToEu2qYodXgGoTup_na6IgP25kFmfwk,9734
7
+ lattifai/bin/agent.py,sha256=8nRh0GC1M4__-qKQtxZspcyNnUm8DOVCWQf3sRblEOI,9826
8
8
  lattifai/bin/align.py,sha256=JJHk5uzmYGYhpA3ricHdmBLzJWC2aRyF0k4ANnap50w,8151
9
9
  lattifai/bin/cli_base.py,sha256=gvPUi9Z0eGwBJ8Es5xq1z00YzFPlocYiR2zpL7ekyhw,626
10
10
  lattifai/bin/subtitle.py,sha256=UZMPh71O2X1UwbfZ9VWlhzxkz78viz8KWwoVsDpewK0,6577
@@ -18,22 +18,22 @@ lattifai/io/utils.py,sha256=4drRwcM1n7AYhdJcF51EZxMTy_Ut_1GKtdWpRhPuVmg,686
18
18
  lattifai/io/writer.py,sha256=8n9ZBuXuVOCFwzr1hqrnXpZ-fARTsepebwjKgRuueWE,3872
19
19
  lattifai/tokenizer/__init__.py,sha256=y-FyfO7tLga9b46pkCC6jdSBKOFZS-jFfHcqUieGEyU,120
20
20
  lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
21
- lattifai/tokenizer/tokenizer.py,sha256=6XlHehhwahVWgUMj79LqMs13xaRTa17beOZqico4vLE,19186
21
+ lattifai/tokenizer/tokenizer.py,sha256=uLtGeT6ad_vPuoXNw8Rz43bAdKQWheI_tfHAV47FyqQ,19205
22
22
  lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
23
- lattifai/workers/lattice1_alpha.py,sha256=826U2D5UJMnrxqswF6N1mlSs-3xRB9TgsiQe-Z89Ie4,10152
23
+ lattifai/workers/lattice1_alpha.py,sha256=4FX91eTmcTDZt78NrMY8EgkwlwBKlU4Qw39qcQeqiTg,11469
24
24
  lattifai/workflows/__init__.py,sha256=mTOdwQQs2YY1s0JsVGsATb2TWPhpNo7bRiAAJW92740,830
25
25
  lattifai/workflows/agents.py,sha256=ZnxyEb-Li8gQw3Z7V3K7jjnT7sQAtG1uCFuXGX77IcE,227
26
26
  lattifai/workflows/base.py,sha256=ZSePq2O2hB59K5CMbk_iGiOM9FSHOVQdG3t8Oyz9gwE,6264
27
27
  lattifai/workflows/file_manager.py,sha256=5km59cB4s9PcJgMMqZ6YqRY8sTqvTzd1qyQ2T5bHlfI,31985
28
28
  lattifai/workflows/gemini.py,sha256=kpp3GiHyui8RZLWRQMx7vojBscCi2lZSxrjM1VHo_mc,6144
29
- lattifai/workflows/youtube.py,sha256=y1ez32G40CtHAo7Kz7J_DO96ljOwW-3SP9Sm2uPe5hY,38785
29
+ lattifai/workflows/youtube.py,sha256=tRy9LV5mwgQfQ3jfL_aiyB2catDHqCS2Hmy9Lk3-VGs,38757
30
30
  lattifai/workflows/prompts/README.md,sha256=X49KWSQVdjWxxWUp4R2w3ZqKrAOi6_kDNHh1hMaQ4PE,694
31
31
  lattifai/workflows/prompts/__init__.py,sha256=i3kMT5sg_W9LbPcda0xmZWLg0tPjXGVI3iKtHrBng3o,1351
32
32
  lattifai/workflows/prompts/gemini/README.md,sha256=rt7f7yDGtaobKBo95LG3u56mqa3ABOXQd0UVgJYtYuo,781
33
33
  lattifai/workflows/prompts/gemini/transcription_gem.txt,sha256=cljzZ--BDgnnKzqVCakr-fTp2Xk38UOsUquvruNX-LU,4600
34
- lattifai-0.4.4.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
35
- lattifai-0.4.4.dist-info/METADATA,sha256=wW1ZAk7_WdKxHpvgucnJA2xXye1vB-NiKAfpTT56hDk,26710
36
- lattifai-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- lattifai-0.4.4.dist-info/entry_points.txt,sha256=fCgo8-LKA_9C7_jmEGsZPJko0woXHtEh0iRbpO7PYzI,69
38
- lattifai-0.4.4.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
39
- lattifai-0.4.4.dist-info/RECORD,,
34
+ lattifai-0.4.5.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
35
+ lattifai-0.4.5.dist-info/METADATA,sha256=e0ojxc-4xgpgpWZC4wdXlTySQYliuLCa-MGG1F80U6E,26687
36
+ lattifai-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
+ lattifai-0.4.5.dist-info/entry_points.txt,sha256=fCgo8-LKA_9C7_jmEGsZPJko0woXHtEh0iRbpO7PYzI,69
38
+ lattifai-0.4.5.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
39
+ lattifai-0.4.5.dist-info/RECORD,,