lattifai 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/workers/lattice1_alpha.py +49 -6
- {lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/METADATA +2 -1
- {lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/RECORD +7 -7
- {lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/WHEEL +0 -0
- {lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/entry_points.txt +0 -0
- {lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,7 @@ import resampy
|
|
|
9
9
|
import soundfile as sf
|
|
10
10
|
import torch
|
|
11
11
|
from lhotse import FbankConfig
|
|
12
|
+
from lhotse.audio import read_audio
|
|
12
13
|
from lhotse.features.kaldi.layers import Wav2LogFilterBank
|
|
13
14
|
from lhotse.utils import Pathlike
|
|
14
15
|
|
|
@@ -76,13 +77,55 @@ class Lattice1AlphaWorker:
|
|
|
76
77
|
|
|
77
78
|
def load_audio(self, audio: Union[Pathlike, BinaryIO]) -> Tuple[torch.Tensor, int]:
|
|
78
79
|
# load audio
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
try:
|
|
81
|
+
waveform, sample_rate = read_audio(audio) # numpy array
|
|
82
|
+
if len(waveform.shape) == 1:
|
|
83
|
+
waveform = waveform.reshape([1, -1]) # (1, L)
|
|
84
|
+
else: # make sure channel first
|
|
85
|
+
if waveform.shape[0] > waveform.shape[1]:
|
|
86
|
+
waveform = waveform.transpose(0, 1)
|
|
87
|
+
# average multiple channels
|
|
88
|
+
waveform = np.mean(waveform, axis=0, keepdims=True) # (1, L)
|
|
89
|
+
except Exception:
|
|
90
|
+
# Fallback to PyAV for formats not supported by soundfile
|
|
91
|
+
import av
|
|
92
|
+
|
|
93
|
+
container = av.open(audio)
|
|
94
|
+
audio_stream = next((s for s in container.streams if s.type == 'audio'), None)
|
|
95
|
+
|
|
96
|
+
if audio_stream is None:
|
|
97
|
+
raise ValueError(f'No audio stream found in {audio}')
|
|
98
|
+
|
|
99
|
+
# Resample to target sample rate during decoding
|
|
100
|
+
audio_stream.codec_context.format = av.AudioFormat('flt') # 32-bit float
|
|
101
|
+
|
|
102
|
+
frames = []
|
|
103
|
+
for frame in container.decode(audio_stream):
|
|
104
|
+
# Convert frame to numpy array
|
|
105
|
+
array = frame.to_ndarray()
|
|
106
|
+
# Ensure shape is (channels, samples)
|
|
107
|
+
if array.ndim == 1:
|
|
108
|
+
array = array.reshape(1, -1)
|
|
109
|
+
elif array.ndim == 2 and array.shape[0] > array.shape[1]:
|
|
110
|
+
array = array.T
|
|
111
|
+
frames.append(array)
|
|
112
|
+
|
|
113
|
+
container.close()
|
|
114
|
+
|
|
115
|
+
if not frames:
|
|
116
|
+
raise ValueError(f'No audio data found in {audio}')
|
|
117
|
+
|
|
118
|
+
# Concatenate all frames
|
|
119
|
+
waveform = np.concatenate(frames, axis=1)
|
|
120
|
+
# Average multiple channels to mono
|
|
121
|
+
if waveform.shape[0] > 1:
|
|
122
|
+
waveform = np.mean(waveform, axis=0, keepdims=True)
|
|
123
|
+
|
|
124
|
+
sample_rate = audio_stream.codec_context.sample_rate
|
|
125
|
+
|
|
82
126
|
if sample_rate != self.config['sample_rate']:
|
|
83
|
-
waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=
|
|
84
|
-
|
|
85
|
-
return waveform
|
|
127
|
+
waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=1)
|
|
128
|
+
return torch.from_numpy(waveform).to(self.device) # (1, L)
|
|
86
129
|
|
|
87
130
|
def alignment(
|
|
88
131
|
self, audio: Union[Union[Pathlike, BinaryIO], torch.tensor], lattice_graph: Tuple[str, int, float]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattifai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
5
|
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
6
|
Maintainer-email: Lattice <tech@lattifai.com>
|
|
@@ -61,6 +61,7 @@ Requires-Dist: onnxruntime
|
|
|
61
61
|
Requires-Dist: resampy
|
|
62
62
|
Requires-Dist: g2p-phonemizer==0.1.1
|
|
63
63
|
Requires-Dist: wtpsplit>=2.1.6
|
|
64
|
+
Requires-Dist: av
|
|
64
65
|
Provides-Extra: numpy
|
|
65
66
|
Requires-Dist: numpy; extra == "numpy"
|
|
66
67
|
Provides-Extra: test
|
|
@@ -13,10 +13,10 @@ lattifai/tokenizer/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvBf
|
|
|
13
13
|
lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
|
|
14
14
|
lattifai/tokenizer/tokenizer.py,sha256=Yuo0pLPQnF2uX0Fm5g8i5vtcADn7GeLpSqdGpMJgTww,11492
|
|
15
15
|
lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
|
|
16
|
-
lattifai/workers/lattice1_alpha.py,sha256=
|
|
17
|
-
lattifai-0.2.
|
|
18
|
-
lattifai-0.2.
|
|
19
|
-
lattifai-0.2.
|
|
20
|
-
lattifai-0.2.
|
|
21
|
-
lattifai-0.2.
|
|
22
|
-
lattifai-0.2.
|
|
16
|
+
lattifai/workers/lattice1_alpha.py,sha256=fnimZqhPQ1fBCUjcDVblnFGTWP0vbNLRM7E7lOdHJu8,7428
|
|
17
|
+
lattifai-0.2.4.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
|
|
18
|
+
lattifai-0.2.4.dist-info/METADATA,sha256=7SNTA_Egpv3F5rd0F20-4MigC7muz5x6kyr_xxj4CIk,10989
|
|
19
|
+
lattifai-0.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
lattifai-0.2.4.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
|
|
21
|
+
lattifai-0.2.4.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
|
|
22
|
+
lattifai-0.2.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|