PyPI - lattifai - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

lattifai 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

lattifai/workers/lattice1_alpha.py CHANGED Viewed

@@ -9,6 +9,7 @@ import resampy
 import soundfile as sf
 import torch
 from lhotse import FbankConfig
+from lhotse.audio import read_audio
 from lhotse.features.kaldi.layers import Wav2LogFilterBank
 from lhotse.utils import Pathlike
@@ -76,13 +77,55 @@ class Lattice1AlphaWorker:
     def load_audio(self, audio: Union[Pathlike, BinaryIO]) -> Tuple[torch.Tensor, int]:
         # load audio
-        waveform, sample_rate = sf.read(audio, always_2d=True, dtype='float32')
-        if waveform.shape[1] > 1:  # TODO: support choose channel
-            waveform = np.mean(waveform, axis=1, keepdims=True)
+        try:
+            waveform, sample_rate = read_audio(audio)  # numpy array
+            if len(waveform.shape) == 1:
+                waveform = waveform.reshape([1, -1])  # (1, L)
+            else:  # make sure channel first
+                if waveform.shape[0] > waveform.shape[1]:
+                    waveform = waveform.transpose(0, 1)
+                # average multiple channels
+                waveform = np.mean(waveform, axis=0, keepdims=True)  # (1, L)
+        except Exception:
+            # Fallback to PyAV for formats not supported by soundfile
+            import av
+            container = av.open(audio)
+            audio_stream = next((s for s in container.streams if s.type == 'audio'), None)
+            if audio_stream is None:
+                raise ValueError(f'No audio stream found in {audio}')
+            # Resample to target sample rate during decoding
+            audio_stream.codec_context.format = av.AudioFormat('flt')  # 32-bit float
+            frames = []
+            for frame in container.decode(audio_stream):
+                # Convert frame to numpy array
+                array = frame.to_ndarray()
+                # Ensure shape is (channels, samples)
+                if array.ndim == 1:
+                    array = array.reshape(1, -1)
+                elif array.ndim == 2 and array.shape[0] > array.shape[1]:
+                    array = array.T
+                frames.append(array)
+            container.close()
+            if not frames:
+                raise ValueError(f'No audio data found in {audio}')
+            # Concatenate all frames
+            waveform = np.concatenate(frames, axis=1)
+            # Average multiple channels to mono
+            if waveform.shape[0] > 1:
+                waveform = np.mean(waveform, axis=0, keepdims=True)
+            sample_rate = audio_stream.codec_context.sample_rate
         if sample_rate != self.config['sample_rate']:
-            waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=0)
-        waveform = torch.from_numpy(waveform.T).to(self.device)  # (1, L)
-        return waveform
+            waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=1)
+        return torch.from_numpy(waveform).to(self.device)  # (1, L)
     def alignment(
         self, audio: Union[Union[Pathlike, BinaryIO], torch.tensor], lattice_graph: Tuple[str, int, float]

{lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lattifai
-Version: 0.2.2
+Version: 0.2.4
 Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
 Author-email: Lattifai Technologies <tech@lattifai.com>
 Maintainer-email: Lattice <tech@lattifai.com>
@@ -61,6 +61,7 @@ Requires-Dist: onnxruntime
 Requires-Dist: resampy
 Requires-Dist: g2p-phonemizer==0.1.1
 Requires-Dist: wtpsplit>=2.1.6
+Requires-Dist: av
 Provides-Extra: numpy
 Requires-Dist: numpy; extra == "numpy"
 Provides-Extra: test

{lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/RECORD RENAMED Viewed

@@ -13,10 +13,10 @@ lattifai/tokenizer/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvBf
 lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
 lattifai/tokenizer/tokenizer.py,sha256=Yuo0pLPQnF2uX0Fm5g8i5vtcADn7GeLpSqdGpMJgTww,11492
 lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
-lattifai/workers/lattice1_alpha.py,sha256=1VFo59EcygEctTHOhkcII8v3_mrj8JEJ8Fcaqk_7LVo,5762
-lattifai-0.2.2.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
-lattifai-0.2.2.dist-info/METADATA,sha256=4vmPOYKsIlvADiw0zUDQ2dbDpe-vOV-o5A0Hs1p7xfg,10971
-lattifai-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lattifai-0.2.2.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
-lattifai-0.2.2.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
-lattifai-0.2.2.dist-info/RECORD,,
+lattifai/workers/lattice1_alpha.py,sha256=fnimZqhPQ1fBCUjcDVblnFGTWP0vbNLRM7E7lOdHJu8,7428
+lattifai-0.2.4.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
+lattifai-0.2.4.dist-info/METADATA,sha256=7SNTA_Egpv3F5rd0F20-4MigC7muz5x6kyr_xxj4CIk,10989
+lattifai-0.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lattifai-0.2.4.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
+lattifai-0.2.4.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
+lattifai-0.2.4.dist-info/RECORD,,

{lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lattifai-0.2.2.dist-info → lattifai-0.2.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

lattifai 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

lattifai 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl