lattifai 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +28 -0
- lattifai/bin/align.py +14 -2
- lattifai/bin/cli_base.py +5 -0
- lattifai/client.py +26 -16
- lattifai/tokenizers/phonemizer.py +1 -1
- lattifai/tokenizers/tokenizer.py +7 -3
- lattifai/workers/lattice1_alpha.py +40 -16
- lattifai-0.2.0.dist-info/METADATA +294 -0
- lattifai-0.2.0.dist-info/RECORD +22 -0
- {lattifai-0.1.4.dist-info → lattifai-0.2.0.dist-info}/entry_points.txt +1 -0
- {lattifai-0.1.4.dist-info → lattifai-0.2.0.dist-info}/top_level.txt +1 -0
- lattifai-0.1.4.dist-info/METADATA +0 -467
- lattifai-0.1.4.dist-info/RECORD +0 -22
- {lattifai-0.1.4.dist-info → lattifai-0.2.0.dist-info}/WHEEL +0 -0
- {lattifai-0.1.4.dist-info → lattifai-0.2.0.dist-info}/licenses/LICENSE +0 -0
lattifai/__init__.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
1
5
|
from .base_client import LattifAIError
|
|
2
6
|
from .io import SubtitleIO
|
|
3
7
|
|
|
@@ -13,6 +17,30 @@ except Exception:
|
|
|
13
17
|
__version__ = '0.1.0' # fallback version
|
|
14
18
|
|
|
15
19
|
|
|
20
|
+
# Check and auto-install k2 if not present
|
|
21
|
+
def _check_and_install_k2():
|
|
22
|
+
"""Check if k2 is installed and attempt to install it if not."""
|
|
23
|
+
try:
|
|
24
|
+
import k2
|
|
25
|
+
except ImportError:
|
|
26
|
+
import subprocess
|
|
27
|
+
|
|
28
|
+
print('k2 is not installed. Attempting to install k2...')
|
|
29
|
+
try:
|
|
30
|
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'install-k2'])
|
|
31
|
+
subprocess.check_call([sys.executable, '-m', 'install_k2'])
|
|
32
|
+
import k2 # Try importing again after installation
|
|
33
|
+
|
|
34
|
+
print('k2 installed successfully.')
|
|
35
|
+
except Exception as e:
|
|
36
|
+
warnings.warn(f'Failed to install k2 automatically. Please install it manually. Error: {e}')
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Auto-install k2 on first import
|
|
41
|
+
_check_and_install_k2()
|
|
42
|
+
|
|
43
|
+
|
|
16
44
|
# Lazy import for LattifAI to avoid dependency issues during basic import
|
|
17
45
|
def __getattr__(name):
|
|
18
46
|
if name == 'LattifAI':
|
lattifai/bin/align.py
CHANGED
|
@@ -13,6 +13,16 @@ from lattifai.bin.cli_base import cli
|
|
|
13
13
|
default='auto',
|
|
14
14
|
help='Input Subtitle format.',
|
|
15
15
|
)
|
|
16
|
+
@click.option(
|
|
17
|
+
'-D',
|
|
18
|
+
'--device',
|
|
19
|
+
type=click.Choice(['cpu', 'cuda', 'mps'], case_sensitive=False),
|
|
20
|
+
default='cpu',
|
|
21
|
+
help='Device to use for inference.',
|
|
22
|
+
)
|
|
23
|
+
@click.option(
|
|
24
|
+
'-M', '--model_name_or_path', type=str, default='Lattifai/Lattice-1-Alpha', help='Lattifai model name or path'
|
|
25
|
+
)
|
|
16
26
|
@click.argument(
|
|
17
27
|
'input_audio_path',
|
|
18
28
|
type=click.Path(exists=True, dir_okay=False),
|
|
@@ -30,13 +40,15 @@ def align(
|
|
|
30
40
|
input_subtitle_path: Pathlike,
|
|
31
41
|
output_subtitle_path: Pathlike,
|
|
32
42
|
input_format: str = 'auto',
|
|
43
|
+
device: str = 'cpu',
|
|
44
|
+
model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
|
|
33
45
|
):
|
|
34
46
|
"""
|
|
35
47
|
Command used to align audio with subtitles
|
|
36
48
|
"""
|
|
37
49
|
from lattifai import LattifAI
|
|
38
50
|
|
|
39
|
-
client = LattifAI()
|
|
51
|
+
client = LattifAI(model_name_or_path=model_name_or_path, device=device)
|
|
40
52
|
client.alignment(
|
|
41
|
-
input_audio_path, input_subtitle_path, format=input_format, output_subtitle_path=output_subtitle_path
|
|
53
|
+
input_audio_path, input_subtitle_path, format=input_format.lower(), output_subtitle_path=output_subtitle_path
|
|
42
54
|
)
|
lattifai/bin/cli_base.py
CHANGED
|
@@ -8,6 +8,11 @@ def cli():
|
|
|
8
8
|
"""
|
|
9
9
|
The shell entry point to Lattifai, a tool for audio data manipulation.
|
|
10
10
|
"""
|
|
11
|
+
# Load environment variables from .env file
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
|
|
14
|
+
load_dotenv()
|
|
15
|
+
|
|
11
16
|
logging.basicConfig(
|
|
12
17
|
format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s',
|
|
13
18
|
level=logging.INFO,
|
lattifai/client.py
CHANGED
|
@@ -24,9 +24,10 @@ class LattifAI(SyncAPIClient):
|
|
|
24
24
|
self,
|
|
25
25
|
*,
|
|
26
26
|
api_key: Optional[str] = None,
|
|
27
|
+
model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
|
|
28
|
+
device: Optional[str] = None,
|
|
27
29
|
base_url: Optional[str] = None,
|
|
28
|
-
|
|
29
|
-
timeout: Union[float, int] = 60.0,
|
|
30
|
+
timeout: Union[float, int] = 120.0,
|
|
30
31
|
max_retries: int = 2,
|
|
31
32
|
default_headers: Optional[Dict[str, str]] = None,
|
|
32
33
|
) -> None:
|
|
@@ -52,19 +53,31 @@ class LattifAI(SyncAPIClient):
|
|
|
52
53
|
)
|
|
53
54
|
|
|
54
55
|
# Initialize components
|
|
55
|
-
model_name_or_path = '/Users/feiteng/GEEK/OmniCaptions/HF_models/Lattice-1-Alpha'
|
|
56
|
-
|
|
57
56
|
if not Path(model_name_or_path).exists():
|
|
58
|
-
from huggingface_hub import
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
from huggingface_hub import snapshot_download
|
|
58
|
+
from huggingface_hub.errors import LocalEntryNotFoundError
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
|
|
62
|
+
except LocalEntryNotFoundError:
|
|
63
|
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
|
64
|
+
model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
|
|
61
65
|
else:
|
|
62
66
|
model_path = model_name_or_path
|
|
63
67
|
|
|
68
|
+
# device setup
|
|
69
|
+
if device is None:
|
|
70
|
+
import torch
|
|
71
|
+
|
|
72
|
+
device = 'cpu'
|
|
73
|
+
if torch.backends.mps.is_available():
|
|
74
|
+
device = 'mps'
|
|
75
|
+
elif torch.cuda.is_available():
|
|
76
|
+
device = 'cuda'
|
|
77
|
+
|
|
64
78
|
self.tokenizer = LatticeTokenizer.from_pretrained(
|
|
65
79
|
client_wrapper=self,
|
|
66
|
-
model_path=
|
|
67
|
-
g2p_model_path=f'{model_path}/g2p.bin' if Path(f'{model_path}/g2p.bin').exists() else None,
|
|
80
|
+
model_path=model_path,
|
|
68
81
|
device=device,
|
|
69
82
|
)
|
|
70
83
|
self.worker = Lattice1AlphaWorker(model_path, device=device, num_threads=8)
|
|
@@ -119,13 +132,10 @@ if __name__ == '__main__':
|
|
|
119
132
|
import sys
|
|
120
133
|
|
|
121
134
|
if len(sys.argv) == 4:
|
|
122
|
-
|
|
135
|
+
audio, subtitle, output = sys.argv[1:]
|
|
123
136
|
else:
|
|
124
137
|
audio = 'tests/data/SA1.wav'
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
alignments = client.alignment(audio, text)
|
|
128
|
-
print(alignments)
|
|
138
|
+
subtitle = 'tests/data/SA1.TXT'
|
|
139
|
+
output = None
|
|
129
140
|
|
|
130
|
-
alignments = client.alignment(audio,
|
|
131
|
-
print(alignments)
|
|
141
|
+
alignments = client.alignment(audio, subtitle, output_subtitle_path=output)
|
lattifai/tokenizers/tokenizer.py
CHANGED
|
@@ -33,22 +33,26 @@ class LatticeTokenizer:
|
|
|
33
33
|
def from_pretrained(
|
|
34
34
|
client_wrapper: SyncAPIClient,
|
|
35
35
|
model_path: str,
|
|
36
|
-
g2p_model_path: Optional[str] = None,
|
|
37
36
|
device: str = 'cpu',
|
|
38
37
|
compressed: bool = True,
|
|
39
38
|
):
|
|
40
39
|
"""Load tokenizer from exported binary file"""
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
|
|
42
|
+
words_model_path = f'{model_path}/words.bin'
|
|
41
43
|
if compressed:
|
|
42
|
-
with gzip.open(
|
|
44
|
+
with gzip.open(words_model_path, 'rb') as f:
|
|
43
45
|
data = pickle.load(f)
|
|
44
46
|
else:
|
|
45
|
-
with open(
|
|
47
|
+
with open(words_model_path, 'rb') as f:
|
|
46
48
|
data = pickle.load(f)
|
|
47
49
|
|
|
48
50
|
tokenizer = LatticeTokenizer(client_wrapper=client_wrapper)
|
|
49
51
|
tokenizer.words = data['words']
|
|
50
52
|
tokenizer.dictionaries = defaultdict(list, data['dictionaries'])
|
|
51
53
|
tokenizer.oov_word = data['oov_word']
|
|
54
|
+
|
|
55
|
+
g2p_model_path = f'{model_path}/g2p.bin' if Path(f'{model_path}/g2p.bin').exists() else None
|
|
52
56
|
if g2p_model_path:
|
|
53
57
|
tokenizer.g2p_model = G2Phonemizer(g2p_model_path, device=device)
|
|
54
58
|
return tokenizer
|
|
@@ -5,8 +5,9 @@ from typing import Any, BinaryIO, Dict, Tuple, Union
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import onnxruntime as ort
|
|
8
|
+
import resampy
|
|
9
|
+
import soundfile as sf
|
|
8
10
|
import torch
|
|
9
|
-
import torchaudio
|
|
10
11
|
from lhotse import FbankConfig
|
|
11
12
|
from lhotse.features.kaldi.layers import Wav2LogFilterBank
|
|
12
13
|
from lhotse.utils import Pathlike
|
|
@@ -16,8 +17,6 @@ class Lattice1AlphaWorker:
|
|
|
16
17
|
"""Worker for processing audio with LatticeGraph."""
|
|
17
18
|
|
|
18
19
|
def __init__(self, model_path: Pathlike, device: str = 'cpu', num_threads: int = 8) -> None:
|
|
19
|
-
if device != 'cpu':
|
|
20
|
-
raise NotImplementedError(f'Only cpu is supported for now, got device={device}.')
|
|
21
20
|
self.config = json.load(open(f'{model_path}/config.json'))
|
|
22
21
|
|
|
23
22
|
# SessionOptions
|
|
@@ -28,8 +27,11 @@ class Lattice1AlphaWorker:
|
|
|
28
27
|
sess_options.add_session_config_entry('session.intra_op.allow_spinning', '0')
|
|
29
28
|
|
|
30
29
|
providers = []
|
|
31
|
-
if device.startswith('cuda')
|
|
30
|
+
if device.startswith('cuda') and ort.get_all_providers().count('CUDAExecutionProvider') > 0:
|
|
32
31
|
providers.append('CUDAExecutionProvider')
|
|
32
|
+
elif device.startswith('mps') and ort.get_all_providers().count('MPSExecutionProvider') > 0:
|
|
33
|
+
providers.append('MPSExecutionProvider')
|
|
34
|
+
|
|
33
35
|
self.acoustic_ort = ort.InferenceSession(
|
|
34
36
|
f'{model_path}/acoustic_opt.onnx',
|
|
35
37
|
sess_options,
|
|
@@ -48,21 +50,38 @@ class Lattice1AlphaWorker:
|
|
|
48
50
|
_start = time.time()
|
|
49
51
|
# audio -> features -> emission
|
|
50
52
|
features = self.extractor(audio) # (1, T, D)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
53
|
+
if features.shape[1] > 6000:
|
|
54
|
+
features_list = torch.split(features, 6000, dim=1)
|
|
55
|
+
emissions = []
|
|
56
|
+
for features in features_list:
|
|
57
|
+
ort_inputs = {
|
|
58
|
+
'features': features.cpu().numpy(),
|
|
59
|
+
'feature_lengths': np.array([features.size(1)], dtype=np.int64),
|
|
60
|
+
}
|
|
61
|
+
emission = self.acoustic_ort.run(None, ort_inputs)[0] # (1, T, vocab_size) numpy
|
|
62
|
+
emissions.append(emission)
|
|
63
|
+
emission = torch.cat(
|
|
64
|
+
[torch.from_numpy(emission).to(self.device) for emission in emissions], dim=1
|
|
65
|
+
) # (1, T, vocab_size)
|
|
66
|
+
else:
|
|
67
|
+
ort_inputs = {
|
|
68
|
+
'features': features.cpu().numpy(),
|
|
69
|
+
'feature_lengths': np.array([features.size(1)], dtype=np.int64),
|
|
70
|
+
}
|
|
71
|
+
emission = self.acoustic_ort.run(None, ort_inputs)[0] # (1, T, vocab_size) numpy
|
|
72
|
+
emission = torch.from_numpy(emission).to(self.device)
|
|
73
|
+
|
|
56
74
|
self.timings['emission'] += time.time() - _start
|
|
57
|
-
return
|
|
75
|
+
return emission # (1, T, vocab_size) torch
|
|
58
76
|
|
|
59
77
|
def load_audio(self, audio: Union[Pathlike, BinaryIO]) -> Tuple[torch.Tensor, int]:
|
|
60
78
|
# load audio
|
|
61
|
-
waveform, sample_rate =
|
|
62
|
-
if waveform.
|
|
63
|
-
waveform =
|
|
79
|
+
waveform, sample_rate = sf.read(audio, always_2d=True, dtype='float32')
|
|
80
|
+
if waveform.shape[1] > 1: # TODO: support choose channel
|
|
81
|
+
waveform = np.mean(waveform, axis=1, keepdims=True)
|
|
64
82
|
if sample_rate != self.config['sample_rate']:
|
|
65
|
-
waveform =
|
|
83
|
+
waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=0)
|
|
84
|
+
waveform = torch.from_numpy(waveform.T).to(self.device) # (1, L)
|
|
66
85
|
return waveform
|
|
67
86
|
|
|
68
87
|
def alignment(
|
|
@@ -102,9 +121,14 @@ class Lattice1AlphaWorker:
|
|
|
102
121
|
self.timings['decoding_graph'] += time.time() - _start
|
|
103
122
|
|
|
104
123
|
_start = time.time()
|
|
124
|
+
if self.device.type == 'mps':
|
|
125
|
+
device = 'cpu' # k2 does not support mps yet
|
|
126
|
+
else:
|
|
127
|
+
device = self.device
|
|
128
|
+
|
|
105
129
|
results, labels = align_segments(
|
|
106
|
-
emission.to(
|
|
107
|
-
decoding_graph.to(
|
|
130
|
+
emission.to(device) * acoustic_scale,
|
|
131
|
+
decoding_graph.to(device),
|
|
108
132
|
torch.tensor([emission.shape[1]], dtype=torch.int32),
|
|
109
133
|
search_beam=100,
|
|
110
134
|
output_beam=40,
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lattifai
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
|
+
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
|
+
Maintainer-email: Lattice <tech@lattifai.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2025 Lattifai.
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
Project-URL: Homepage, https://github.com/lattifai/lattifai-python
|
|
29
|
+
Project-URL: Documentation, https://github.com/lattifai/lattifai-python/README.md
|
|
30
|
+
Project-URL: Bug Tracker, https://github.com/lattifai/lattifai-python/issues
|
|
31
|
+
Project-URL: Discussions, https://github.com/lattifai/lattifai-python/discussions
|
|
32
|
+
Project-URL: Changelog, https://github.com/lattifai/lattifai-python/CHANGELOG.md
|
|
33
|
+
Keywords: lattifai,speech recognition,video analysis,ai,sdk,api client
|
|
34
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: Intended Audience :: Science/Research
|
|
37
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
43
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
44
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
45
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
46
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
47
|
+
Classifier: Topic :: Multimedia :: Video
|
|
48
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
49
|
+
Requires-Python: >=3.9
|
|
50
|
+
Description-Content-Type: text/markdown
|
|
51
|
+
License-File: LICENSE
|
|
52
|
+
Requires-Dist: lattifai-core>=0.2.0
|
|
53
|
+
Requires-Dist: httpx
|
|
54
|
+
Requires-Dist: python-dotenv
|
|
55
|
+
Requires-Dist: lhotse>=1.26.0
|
|
56
|
+
Requires-Dist: colorful>=0.5.6
|
|
57
|
+
Requires-Dist: pysubs2
|
|
58
|
+
Requires-Dist: praatio
|
|
59
|
+
Requires-Dist: tgt
|
|
60
|
+
Requires-Dist: onnxruntime
|
|
61
|
+
Requires-Dist: resampy
|
|
62
|
+
Requires-Dist: g2p-phonemizer==0.1.1
|
|
63
|
+
Provides-Extra: numpy
|
|
64
|
+
Requires-Dist: numpy; extra == "numpy"
|
|
65
|
+
Provides-Extra: test
|
|
66
|
+
Requires-Dist: pytest; extra == "test"
|
|
67
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
68
|
+
Requires-Dist: ruff; extra == "test"
|
|
69
|
+
Requires-Dist: numpy; extra == "test"
|
|
70
|
+
Provides-Extra: all
|
|
71
|
+
Requires-Dist: numpy; extra == "all"
|
|
72
|
+
Requires-Dist: pytest; extra == "all"
|
|
73
|
+
Requires-Dist: pytest-cov; extra == "all"
|
|
74
|
+
Requires-Dist: ruff; extra == "all"
|
|
75
|
+
Dynamic: license-file
|
|
76
|
+
|
|
77
|
+
# LattifAI Python
|
|
78
|
+
|
|
79
|
+
[](https://badge.fury.io/py/lattifai)
|
|
80
|
+
|
|
81
|
+
<p align="center">
|
|
82
|
+
🌐 <a href="https://lattifai.com"><b>Official Website</b></a>    |    🖥️ <a href="https://github.com/lattifai/lattifai-python">GitHub</a>    |    🤗 <a href="https://huggingface.co/Lattifai/Lattice-1-Alpha">Model</a>    |    📑 <a href="https://lattifai.com/blogs">Blog</a>    |    <a href="https://discord.gg/gTZqdaBJ"><img src="https://img.shields.io/badge/Discord-Join-5865F2?logo=discord&logoColor=white" alt="Discord" style="vertical-align: middle;"></a>
|
|
83
|
+
</p>
|
|
84
|
+
|
|
85
|
+
Advanced forced alignment and subtitle generation powered by [Lattice-1-Alpha](https://huggingface.co/Lattifai/Lattice-1-Alpha) model.
|
|
86
|
+
|
|
87
|
+
## Installation
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install install-k2
|
|
91
|
+
# The installation will automatically detect and use your already installed PyTorch version.
|
|
92
|
+
install-k2 # Install k2
|
|
93
|
+
|
|
94
|
+
pip install lattifai
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
> **⚠️ Important**: You must run `install-k2` before using the lattifai library.
|
|
98
|
+
|
|
99
|
+
## Quick Start
|
|
100
|
+
|
|
101
|
+
### Command Line
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Align audio with subtitle
|
|
105
|
+
lattifai align audio.wav subtitle.srt output.srt
|
|
106
|
+
|
|
107
|
+
# Convert subtitle format
|
|
108
|
+
lattifai subtitle convert input.srt output.vtt
|
|
109
|
+
```
|
|
110
|
+
#### lattifai align options
|
|
111
|
+
```
|
|
112
|
+
> lattifai align --help
|
|
113
|
+
Usage: lattifai align [OPTIONS] INPUT_AUDIO_PATH INPUT_SUBTITLE_PATH OUTPUT_SUBTITLE_PATH
|
|
114
|
+
|
|
115
|
+
Command used to align audio with subtitles
|
|
116
|
+
|
|
117
|
+
Options:
|
|
118
|
+
-F, --input_format [srt|vtt|ass|txt|auto] Input Subtitle format.
|
|
119
|
+
-D, --device [cpu|cuda|mps] Device to use for inference.
|
|
120
|
+
--help Show this message and exit.
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Python API
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from lattifai import LattifAI
|
|
127
|
+
|
|
128
|
+
# Initialize client
|
|
129
|
+
client = LattifAI(
|
|
130
|
+
api_key: Optional[str] = None,
|
|
131
|
+
model_name_or_path='Lattifai/Lattice-1-Alpha',
|
|
132
|
+
device='cpu', # 'cpu', 'cuda', or 'mps'
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Perform alignment
|
|
136
|
+
result = client.alignment(
|
|
137
|
+
audio="audio.wav",
|
|
138
|
+
subtitle="subtitle.srt",
|
|
139
|
+
output_subtitle_path="output.srt"
|
|
140
|
+
)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Supported Formats
|
|
144
|
+
|
|
145
|
+
**Audio**: WAV, MP3, FLAC, M4A, OGG
|
|
146
|
+
**Subtitle**: SRT, VTT, ASS, TXT (plain text)
|
|
147
|
+
|
|
148
|
+
## API Reference
|
|
149
|
+
|
|
150
|
+
### LattifAI
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
LattifAI(
|
|
154
|
+
api_key: Optional[str] = None,
|
|
155
|
+
model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
|
|
156
|
+
device: str = 'cpu' # 'cpu', 'cuda', or 'mps'
|
|
157
|
+
)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### alignment()
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
client.alignment(
|
|
164
|
+
audio: str, # Path to audio file
|
|
165
|
+
subtitle: str, # Path to subtitle/text file
|
|
166
|
+
format: Optional[str] = None, # 'srt', 'vtt', 'ass', 'txt' (auto-detect if None)
|
|
167
|
+
output_subtitle_path: Optional[str] = None
|
|
168
|
+
) -> str
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Examples
|
|
172
|
+
|
|
173
|
+
### Basic Text Alignment
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
client = LattifAI()
|
|
177
|
+
client.alignment(
|
|
178
|
+
audio="speech.wav",
|
|
179
|
+
subtitle="transcript.txt",
|
|
180
|
+
format="txt",
|
|
181
|
+
output_subtitle_path="output.srt"
|
|
182
|
+
)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Batch Processing
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
from pathlib import Path
|
|
189
|
+
|
|
190
|
+
client = LattifAI()
|
|
191
|
+
audio_dir = Path("audio_files")
|
|
192
|
+
subtitle_dir = Path("subtitles")
|
|
193
|
+
output_dir = Path("aligned")
|
|
194
|
+
|
|
195
|
+
for audio in audio_dir.glob("*.wav"):
|
|
196
|
+
subtitle = subtitle_dir / f"{audio.stem}.srt"
|
|
197
|
+
if subtitle.exists():
|
|
198
|
+
client.alignment(
|
|
199
|
+
audio=audio,
|
|
200
|
+
subtitle=subtitle,
|
|
201
|
+
output_subtitle_path=output_dir / f"{audio.stem}_aligned.srt"
|
|
202
|
+
)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### GPU Acceleration
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
# NVIDIA GPU
|
|
209
|
+
client = LattifAI(device='cuda')
|
|
210
|
+
|
|
211
|
+
# Apple Silicon
|
|
212
|
+
client = LattifAI(device='mps')
|
|
213
|
+
|
|
214
|
+
# CLI
|
|
215
|
+
lattifai align --device mps audio.wav subtitle.srt output.srt
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Configuration
|
|
219
|
+
|
|
220
|
+
### API Key Setup
|
|
221
|
+
|
|
222
|
+
First, create your API key at [https://lattifai.com/dashboard/api-keys](https://lattifai.com/dashboard/api-keys)
|
|
223
|
+
|
|
224
|
+
**Recommended: Using .env file**
|
|
225
|
+
|
|
226
|
+
Create a `.env` file in your project root:
|
|
227
|
+
```bash
|
|
228
|
+
LATTIFAI_API_KEY=your-api-key
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
The library automatically loads the `.env` file (python-dotenv is included as a dependency).
|
|
232
|
+
|
|
233
|
+
**Alternative: Environment variable**
|
|
234
|
+
```bash
|
|
235
|
+
export LATTIFAI_API_KEY="your-api-key"
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Model Information
|
|
239
|
+
|
|
240
|
+
**[Lattice-1-Alpha](https://huggingface.co/Lattifai/Lattice-1-Alpha)** features:
|
|
241
|
+
- State-of-the-art alignment precision
|
|
242
|
+
- **Language Support**: Currently supports English only. The upcoming **Lattice-1** release will support English, Chinese, and mixed English-Chinese content.
|
|
243
|
+
- Handles noisy audio and imperfect transcripts
|
|
244
|
+
- Optimized for CPU and GPU (CUDA/MPS)
|
|
245
|
+
|
|
246
|
+
**Requirements**:
|
|
247
|
+
- Python 3.9+
|
|
248
|
+
- 4GB RAM recommended
|
|
249
|
+
- ~2GB storage for model files
|
|
250
|
+
|
|
251
|
+
## Development
|
|
252
|
+
|
|
253
|
+
### Setup
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
git clone https://github.com/lattifai/lattifai-python.git
|
|
257
|
+
cd lattifai-python
|
|
258
|
+
pip install -e ".[test]"
|
|
259
|
+
./scripts/install-hooks.sh # Optional: install pre-commit hooks
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Testing
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
pytest # Run all tests
|
|
266
|
+
pytest --cov=src # With coverage
|
|
267
|
+
pytest tests/test_basic.py # Specific test
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Code Quality
|
|
271
|
+
|
|
272
|
+
```bash
|
|
273
|
+
ruff check src/ tests/ # Lint
|
|
274
|
+
ruff format src/ tests/ # Format
|
|
275
|
+
isort src/ tests/ # Sort imports
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Contributing
|
|
279
|
+
|
|
280
|
+
1. Fork the repository
|
|
281
|
+
2. Create a feature branch
|
|
282
|
+
3. Make changes and add tests
|
|
283
|
+
4. Run `pytest` and `ruff check`
|
|
284
|
+
5. Submit a pull request
|
|
285
|
+
|
|
286
|
+
## License
|
|
287
|
+
|
|
288
|
+
Apache License 2.0
|
|
289
|
+
|
|
290
|
+
## Support
|
|
291
|
+
|
|
292
|
+
- **Issues**: [GitHub Issues](https://github.com/lattifai/lattifai-python/issues)
|
|
293
|
+
- **Discussions**: [GitHub Discussions](https://github.com/lattifai/lattifai-python/discussions)
|
|
294
|
+
- **Discord**: [Join our community](https://discord.gg/gTZqdaBJ)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
lattifai/__init__.py,sha256=JXUg0dT74UyAtKOjewRs9ijr5sl9SYsc6oU_WItY314,1497
|
|
2
|
+
lattifai/base_client.py,sha256=ktFtATjL9pLSJUD-VqeJKA1FHkrsGHX7Uq_x00H7gO8,3322
|
|
3
|
+
lattifai/client.py,sha256=NjHUqMZFoRxuyxdzBNEcn5kU57gJl31FSb6i9DDcoCw,5059
|
|
4
|
+
lattifai/bin/__init__.py,sha256=7YhmtEM8kbxJtz2-KIskvpLKBZAvkMSceVx8z4fkgQ4,61
|
|
5
|
+
lattifai/bin/align.py,sha256=nw-wABsNyxC8zN3siiqgNi1Foou3cYhVzIAomuVrFOY,1436
|
|
6
|
+
lattifai/bin/cli_base.py,sha256=y535WXDRX8StloFn9icpfw7nQt0JxuWBIuPMnRxAYy8,392
|
|
7
|
+
lattifai/bin/subtitle.py,sha256=bUWImAHpvyY59Vskqb5loQiD5ytQOxR8lTQRiQ4LyNA,647
|
|
8
|
+
lattifai/io/__init__.py,sha256=vHWRN7MvAch-GUeFqqO-gM57SM-4YOpGUjIxFJdjfPA,671
|
|
9
|
+
lattifai/io/reader.py,sha256=ErPnPMUvYQpjZ7Vd86EsHUkOcEfKdoI8iM3yKHRzSOQ,2576
|
|
10
|
+
lattifai/io/supervision.py,sha256=5UfSsgBhXoDU3-6drDtoD7y8HIiA4xRKZnbOKgeejwM,354
|
|
11
|
+
lattifai/io/writer.py,sha256=1eAEFLlL8kricxRDPFBtVmeC4IiFyFnjbWXvw0VU-q4,2036
|
|
12
|
+
lattifai/tokenizers/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvBfs,72
|
|
13
|
+
lattifai/tokenizers/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
|
|
14
|
+
lattifai/tokenizers/tokenizer.py,sha256=u4lgS6-jN9cLuMNIojA4Swfsqb1EcyI7Bh_iw7tuL-s,5818
|
|
15
|
+
lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
|
|
16
|
+
lattifai/workers/lattice1_alpha.py,sha256=1VFo59EcygEctTHOhkcII8v3_mrj8JEJ8Fcaqk_7LVo,5762
|
|
17
|
+
lattifai-0.2.0.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
|
|
18
|
+
lattifai-0.2.0.dist-info/METADATA,sha256=bXb6z5D1C-9YwHeycSFs8SAhUp8VNJbE9u-J9lvYMZ8,8997
|
|
19
|
+
lattifai-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
lattifai-0.2.0.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
|
|
21
|
+
lattifai-0.2.0.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
|
|
22
|
+
lattifai-0.2.0.dist-info/RECORD,,
|
|
@@ -1,467 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: lattifai
|
|
3
|
-
Version: 0.1.4
|
|
4
|
-
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
|
-
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
|
-
Maintainer-email: Lattice <tech@lattifai.com>
|
|
7
|
-
License: MIT License
|
|
8
|
-
|
|
9
|
-
Copyright (c) 2025 Lattifai.
|
|
10
|
-
|
|
11
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
-
in the Software without restriction, including without limitation the rights
|
|
14
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
-
furnished to do so, subject to the following conditions:
|
|
17
|
-
|
|
18
|
-
The above copyright notice and this permission notice shall be included in all
|
|
19
|
-
copies or substantial portions of the Software.
|
|
20
|
-
|
|
21
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
-
SOFTWARE.
|
|
28
|
-
Project-URL: Homepage, https://github.com/lattifai/lattifai-python
|
|
29
|
-
Project-URL: Documentation, https://github.com/lattifai/lattifai-python/api.md
|
|
30
|
-
Project-URL: Bug Tracker, https://github.com/lattifai/lattifai-python/issues
|
|
31
|
-
Project-URL: Discussions, https://github.com/lattifai/lattifai-python/discussions
|
|
32
|
-
Project-URL: Changelog, https://github.com/lattifai/lattifai-python/CHANGELOG
|
|
33
|
-
Keywords: lattifai,speech recognition,video analysis,ai,sdk,api client
|
|
34
|
-
Classifier: Development Status :: 5 - Production/Stable
|
|
35
|
-
Classifier: Intended Audience :: Developers
|
|
36
|
-
Classifier: Intended Audience :: Science/Research
|
|
37
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
38
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
39
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
40
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
41
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
42
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
43
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
44
|
-
Classifier: Operating System :: POSIX :: Linux
|
|
45
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
46
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
47
|
-
Classifier: Topic :: Multimedia :: Video
|
|
48
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
49
|
-
Requires-Python: >=3.9
|
|
50
|
-
Description-Content-Type: text/markdown
|
|
51
|
-
License-File: LICENSE
|
|
52
|
-
Requires-Dist: httpx
|
|
53
|
-
Requires-Dist: python-dotenv
|
|
54
|
-
Requires-Dist: lhotse>=1.26.0
|
|
55
|
-
Requires-Dist: colorful>=0.5.6
|
|
56
|
-
Requires-Dist: lattifai-core>=0.1.4
|
|
57
|
-
Provides-Extra: numpy
|
|
58
|
-
Requires-Dist: numpy; extra == "numpy"
|
|
59
|
-
Provides-Extra: test
|
|
60
|
-
Requires-Dist: pytest; extra == "test"
|
|
61
|
-
Requires-Dist: pytest-cov; extra == "test"
|
|
62
|
-
Requires-Dist: ruff; extra == "test"
|
|
63
|
-
Requires-Dist: numpy; extra == "test"
|
|
64
|
-
Provides-Extra: all
|
|
65
|
-
Requires-Dist: numpy; extra == "all"
|
|
66
|
-
Requires-Dist: pytest; extra == "all"
|
|
67
|
-
Requires-Dist: pytest-cov; extra == "all"
|
|
68
|
-
Requires-Dist: ruff; extra == "all"
|
|
69
|
-
Dynamic: license-file
|
|
70
|
-
|
|
71
|
-
# LattifAI Python
|
|
72
|
-
|
|
73
|
-
> ⚠️ **Under Active Development** - Official release scheduled for October 18, 2025
|
|
74
|
-
|
|
75
|
-
[](https://badge.fury.io/py/lattifai)
|
|
76
|
-
|
|
77
|
-
The official Python library for the LattifAI API - Advanced forced alignment and subtitle generation powered by `Lattice-1-Alpha` model.
|
|
78
|
-
|
|
79
|
-
## Installation
|
|
80
|
-
|
|
81
|
-
```bash
|
|
82
|
-
pip install lattifai
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
## GPU Support Status
|
|
86
|
-
|
|
87
|
-
> **🚧 GPU Support Coming Soon**: CUDA/GPU acceleration is currently under active development. The current version supports CPU-only processing. GPU support will be available in an upcoming release, providing significant performance improvements for large audio files.
|
|
88
|
-
|
|
89
|
-
## Quick Start
|
|
90
|
-
|
|
91
|
-
### Python API
|
|
92
|
-
|
|
93
|
-
```python
|
|
94
|
-
import os
|
|
95
|
-
from lattifai import LattifAI
|
|
96
|
-
|
|
97
|
-
# Initialize client
|
|
98
|
-
client = LattifAI(
|
|
99
|
-
api_key=os.environ.get("LATTIFAI_API_KEY"), # Optional if set in environment
|
|
100
|
-
device='cpu', # Currently only CPU is supported, GPU support coming soon
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
# Perform alignment
|
|
104
|
-
result = client.alignment(
|
|
105
|
-
audio="path/to/audio.wav",
|
|
106
|
-
subtitle="path/to/subtitle.srt", # or .txt, .vtt, .ass
|
|
107
|
-
format="srt", # auto-detect if not specified
|
|
108
|
-
output_subtitle_path="output.srt"
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
print(f"Alignment complete! Output saved to: {result}")
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
### Command Line Interface
|
|
115
|
-
|
|
116
|
-
The library provides powerful command-line tools for batch processing and automation.
|
|
117
|
-
|
|
118
|
-
#### Basic Alignment
|
|
119
|
-
|
|
120
|
-
```bash
|
|
121
|
-
# Align audio with subtitle file
|
|
122
|
-
lattifai-align input_audio.wav input_subtitle.srt output_aligned.srt
|
|
123
|
-
|
|
124
|
-
# Specify input format explicitly
|
|
125
|
-
lattifai-align -F srt input_audio.wav transcript.txt output.srt
|
|
126
|
-
|
|
127
|
-
# Auto-detect input format (default)
|
|
128
|
-
lattifai-align input_audio.wav subtitle_file.vtt output.vtt
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
#### Using the Main CLI
|
|
132
|
-
|
|
133
|
-
```bash
|
|
134
|
-
# Show available commands
|
|
135
|
-
lattifai --help
|
|
136
|
-
|
|
137
|
-
# Perform alignment (same as lattifai-align)
|
|
138
|
-
lattifai align input_audio.wav input_subtitle.srt output.srt
|
|
139
|
-
|
|
140
|
-
# Subtitle format conversion
|
|
141
|
-
lattifai subtitle convert input.srt output.vtt
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
#### Supported Input Formats
|
|
145
|
-
|
|
146
|
-
- **SRT**: SubRip Subtitle format (`.srt`)
|
|
147
|
-
- **VTT**: WebVTT format (`.vtt`)
|
|
148
|
-
- **ASS**: Advanced SubStation Alpha format (`.ass`)
|
|
149
|
-
- **TXT**: Plain text format (`.txt`)
|
|
150
|
-
- **AUTO**: Automatic format detection (default)
|
|
151
|
-
|
|
152
|
-
#### Command Line Options
|
|
153
|
-
|
|
154
|
-
- `-F, --input_format`: Specify input subtitle format (`srt`, `vtt`, `ass`, `txt`, `auto`)
|
|
155
|
-
- `--help`: Show help message and available options
|
|
156
|
-
|
|
157
|
-
## API Reference
|
|
158
|
-
|
|
159
|
-
### LattifAI Class
|
|
160
|
-
|
|
161
|
-
#### Constructor Parameters
|
|
162
|
-
|
|
163
|
-
```python
|
|
164
|
-
LattifAI(
|
|
165
|
-
api_key: Optional[str] = None, # API key (or set LATTIFAI_API_KEY env var)
|
|
166
|
-
base_url: Optional[str] = None, # API base URL (or set LATTIFAI_BASE_URL env var)
|
|
167
|
-
device: str = 'cpu', # Device for processing (currently only 'cpu' supported)
|
|
168
|
-
)
|
|
169
|
-
```
|
|
170
|
-
|
|
171
|
-
> **Note**: The `device` parameter currently only supports `'cpu'`. GPU support (`'cuda'`) is under active development and will be available in future releases.
|
|
172
|
-
|
|
173
|
-
#### Methods
|
|
174
|
-
|
|
175
|
-
##### `alignment()`
|
|
176
|
-
|
|
177
|
-
Perform forced alignment between audio and text/subtitles.
|
|
178
|
-
|
|
179
|
-
```python
|
|
180
|
-
def alignment(
|
|
181
|
-
audio: Pathlike, # Path to audio file
|
|
182
|
-
subtitle: Pathlike, # Path to subtitle/text file
|
|
183
|
-
format: Optional[SubtitleFormat] = None, # Input format (auto-detect if None)
|
|
184
|
-
output_subtitle_path: Optional[Pathlike] = None # Output file path
|
|
185
|
-
) -> str:
|
|
186
|
-
```
|
|
187
|
-
|
|
188
|
-
**Parameters:**
|
|
189
|
-
- `audio`: Path to the audio file (WAV, MP3, etc.)
|
|
190
|
-
- `subtitle`: Path to subtitle or text file
|
|
191
|
-
- `format`: Input format (`'srt'`, `'vtt'`, `'ass'`, `'txt'`, or `None` for auto-detection)
|
|
192
|
-
- `output_subtitle_path`: Path where aligned subtitle will be saved
|
|
193
|
-
|
|
194
|
-
**Returns:**
|
|
195
|
-
- Path to output file (if `output_subtitle_path` specified) or alignment results
|
|
196
|
-
|
|
197
|
-
**Example:**
|
|
198
|
-
```python
|
|
199
|
-
# Basic usage
|
|
200
|
-
result = client.alignment("audio.wav", "subtitle.srt")
|
|
201
|
-
|
|
202
|
-
# With explicit format and output path
|
|
203
|
-
result = client.alignment(
|
|
204
|
-
audio="interview.mp3",
|
|
205
|
-
subtitle="transcript.txt",
|
|
206
|
-
format="txt",
|
|
207
|
-
output_subtitle_path="aligned_interview.srt"
|
|
208
|
-
)
|
|
209
|
-
```
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
## Configuration
|
|
213
|
-
|
|
214
|
-
### Environment Variables
|
|
215
|
-
|
|
216
|
-
- `LATTIFAI_API_KEY`: Your LattifAI API key (required)
|
|
217
|
-
- `LATTIFAI_BASE_URL`: Base URL for the API (default: `https://api.lattifai.com/v1`)
|
|
218
|
-
|
|
219
|
-
### Device Configuration
|
|
220
|
-
|
|
221
|
-
> **Note**: GPU/CUDA support is currently under development and will be available in a future release. Currently, only CPU processing is supported.
|
|
222
|
-
|
|
223
|
-
The library is designed to support both CPU and GPU processing:
|
|
224
|
-
|
|
225
|
-
```python
|
|
226
|
-
# CPU processing (currently supported)
|
|
227
|
-
client = LattifAI(device='cpu')
|
|
228
|
-
|
|
229
|
-
# GPU processing (coming soon)
|
|
230
|
-
# client = LattifAI(device='cuda') # Will be available in future releases
|
|
231
|
-
```
|
|
232
|
-
|
|
233
|
-
### Performance Tuning
|
|
234
|
-
|
|
235
|
-
For better performance with large files:
|
|
236
|
-
|
|
237
|
-
```python
|
|
238
|
-
client = LattifAI(
|
|
239
|
-
device='cpu', # Currently only CPU is supported
|
|
240
|
-
)
|
|
241
|
-
```
|
|
242
|
-
|
|
243
|
-
> **GPU Acceleration**: CUDA support is in active development and will significantly improve processing speed for large audio files. Expected in upcoming releases.
|
|
244
|
-
|
|
245
|
-
## Examples
|
|
246
|
-
|
|
247
|
-
### Basic Text Alignment
|
|
248
|
-
|
|
249
|
-
```python
|
|
250
|
-
from lattifai import LattifAI
|
|
251
|
-
|
|
252
|
-
client = LattifAI()
|
|
253
|
-
|
|
254
|
-
# Align plain text with audio
|
|
255
|
-
result = client.alignment(
|
|
256
|
-
audio="speech.wav",
|
|
257
|
-
subtitle="Hello world. This is a test.",
|
|
258
|
-
format="txt",
|
|
259
|
-
output_subtitle_path="aligned.srt"
|
|
260
|
-
)
|
|
261
|
-
```
|
|
262
|
-
|
|
263
|
-
### Subtitle Refinement
|
|
264
|
-
|
|
265
|
-
```python
|
|
266
|
-
# Improve existing subtitle timing
|
|
267
|
-
result = client.alignment(
|
|
268
|
-
audio="movie.mp4",
|
|
269
|
-
subtitle="rough_subtitles.srt",
|
|
270
|
-
output_subtitle_path="precise_subtitles.srt"
|
|
271
|
-
)
|
|
272
|
-
```
|
|
273
|
-
|
|
274
|
-
### Batch Processing
|
|
275
|
-
|
|
276
|
-
```python
|
|
277
|
-
import os
|
|
278
|
-
from pathlib import Path
|
|
279
|
-
|
|
280
|
-
client = LattifAI()
|
|
281
|
-
|
|
282
|
-
# Process multiple files
|
|
283
|
-
audio_dir = Path("audio_files")
|
|
284
|
-
subtitle_dir = Path("subtitles")
|
|
285
|
-
output_dir = Path("aligned_output")
|
|
286
|
-
|
|
287
|
-
for audio_file in audio_dir.glob("*.wav"):
|
|
288
|
-
subtitle_file = subtitle_dir / f"{audio_file.stem}.srt"
|
|
289
|
-
output_file = output_dir / f"{audio_file.stem}_aligned.srt"
|
|
290
|
-
|
|
291
|
-
if subtitle_file.exists():
|
|
292
|
-
client.alignment(
|
|
293
|
-
audio=audio_file,
|
|
294
|
-
subtitle=subtitle_file,
|
|
295
|
-
output_subtitle_path=output_file
|
|
296
|
-
)
|
|
297
|
-
```
|
|
298
|
-
|
|
299
|
-
## Supported Formats
|
|
300
|
-
|
|
301
|
-
### Audio Formats
|
|
302
|
-
- **WAV**: Recommended for best quality
|
|
303
|
-
- **MP3**: Widely supported compressed format
|
|
304
|
-
- **FLAC**: Lossless compression
|
|
305
|
-
- **M4A**: Apple audio format
|
|
306
|
-
- **OGG**: Open source audio format
|
|
307
|
-
|
|
308
|
-
### Subtitle Formats
|
|
309
|
-
- **SRT**: SubRip Subtitle format (most common)
|
|
310
|
-
- **VTT**: WebVTT format (web standard)
|
|
311
|
-
- **ASS**: Advanced SubStation Alpha format (advanced styling)
|
|
312
|
-
- **TXT**: Plain text format (one sentence per line)
|
|
313
|
-
|
|
314
|
-
## Error Handling
|
|
315
|
-
|
|
316
|
-
```python
|
|
317
|
-
from lattifai import LattifAI, LattifAIError
|
|
318
|
-
|
|
319
|
-
try:
|
|
320
|
-
client = LattifAI()
|
|
321
|
-
result = client.alignment(
|
|
322
|
-
audio="audio.wav",
|
|
323
|
-
subtitle="subtitle.srt"
|
|
324
|
-
)
|
|
325
|
-
except LattifAIError as e:
|
|
326
|
-
print(f"LattifAI error: {e}")
|
|
327
|
-
except FileNotFoundError as e:
|
|
328
|
-
print(f"File not found: {e}")
|
|
329
|
-
except Exception as e:
|
|
330
|
-
print(f"Unexpected error: {e}")
|
|
331
|
-
```
|
|
332
|
-
|
|
333
|
-
### Common Error Scenarios
|
|
334
|
-
|
|
335
|
-
- **Missing API Key**: Set `LATTIFAI_API_KEY` environment variable
|
|
336
|
-
- **File Not Found**: Verify input file paths exist
|
|
337
|
-
- **Unsupported Format**: Check supported audio/subtitle formats
|
|
338
|
-
- **Network Issues**: Check internet connection and API endpoint
|
|
339
|
-
- **Device Configuration**: Currently only `device='cpu'` is supported (GPU support coming soon)
|
|
340
|
-
|
|
341
|
-
## Model Information
|
|
342
|
-
|
|
343
|
-
This library uses the **Lattice-1-Alpha** model for high-quality forced alignment and subtitle generation.
|
|
344
|
-
|
|
345
|
-
### Model Features
|
|
346
|
-
- **High Accuracy**: State-of-the-art alignment precision
|
|
347
|
-
- **Multi-language Support**: Supports various languages and accents
|
|
348
|
-
- **Robust Processing**: Handles noisy audio and imperfect transcripts
|
|
349
|
-
- **Fast Processing**: Optimized for both CPU and GPU execution
|
|
350
|
-
|
|
351
|
-
### Model Requirements
|
|
352
|
-
- **Minimum Python**: 3.9+
|
|
353
|
-
- **Memory**: 4GB RAM recommended
|
|
354
|
-
- **Storage**: ~2GB for model files
|
|
355
|
-
- **GPU**: CUDA support coming soon (currently CPU-only)
|
|
356
|
-
|
|
357
|
-
## Requirements
|
|
358
|
-
|
|
359
|
-
- **Python**: 3.9-3.13
|
|
360
|
-
- **Core Dependencies**:
|
|
361
|
-
- httpx (HTTP client)
|
|
362
|
-
- lhotse (audio processing)
|
|
363
|
-
- colorful (colored output)
|
|
364
|
-
- python-dotenv (environment management)
|
|
365
|
-
- click (command line interface)
|
|
366
|
-
|
|
367
|
-
## Development
|
|
368
|
-
|
|
369
|
-
### Setting Up Development Environment
|
|
370
|
-
|
|
371
|
-
1. **Clone the repository:**
|
|
372
|
-
```bash
|
|
373
|
-
git clone https://github.com/lattifai/lattifai-python.git
|
|
374
|
-
cd lattifai-python
|
|
375
|
-
```
|
|
376
|
-
|
|
377
|
-
2. **Install development dependencies:**
|
|
378
|
-
```bash
|
|
379
|
-
pip install -e ".[test]"
|
|
380
|
-
```
|
|
381
|
-
|
|
382
|
-
3. **Install Git hooks (recommended):**
|
|
383
|
-
```bash
|
|
384
|
-
./scripts/install-hooks.sh
|
|
385
|
-
```
|
|
386
|
-
|
|
387
|
-
This will install pre-commit hooks that automatically run `isort` and `ruff` checks before each commit to ensure code quality.
|
|
388
|
-
|
|
389
|
-
### Code Quality Tools
|
|
390
|
-
|
|
391
|
-
We use the following tools to maintain code quality:
|
|
392
|
-
|
|
393
|
-
- **isort**: Automatically sorts and organizes imports
|
|
394
|
-
- **ruff**: Fast Python linter and formatter
|
|
395
|
-
|
|
396
|
-
To manually run these tools:
|
|
397
|
-
|
|
398
|
-
```bash
|
|
399
|
-
# Sort imports
|
|
400
|
-
isort src/ tests/
|
|
401
|
-
|
|
402
|
-
# Run linter
|
|
403
|
-
ruff check src/ tests/
|
|
404
|
-
|
|
405
|
-
# Run formatter
|
|
406
|
-
ruff format src/ tests/
|
|
407
|
-
|
|
408
|
-
# Fix issues automatically
|
|
409
|
-
ruff check --fix src/ tests/
|
|
410
|
-
```
|
|
411
|
-
|
|
412
|
-
### Running Tests
|
|
413
|
-
|
|
414
|
-
```bash
|
|
415
|
-
# Run all tests
|
|
416
|
-
pytest
|
|
417
|
-
|
|
418
|
-
# Run with coverage
|
|
419
|
-
pytest --cov=src --cov-report=html
|
|
420
|
-
|
|
421
|
-
# Run specific test file
|
|
422
|
-
pytest tests/test_basic.py -v
|
|
423
|
-
```
|
|
424
|
-
|
|
425
|
-
### Building and Testing Locally
|
|
426
|
-
|
|
427
|
-
```bash
|
|
428
|
-
# Build package
|
|
429
|
-
python -m build
|
|
430
|
-
|
|
431
|
-
# Install locally
|
|
432
|
-
pip install -e .
|
|
433
|
-
|
|
434
|
-
# Test CLI commands
|
|
435
|
-
lattifai --help
|
|
436
|
-
lattifai-align --help
|
|
437
|
-
```
|
|
438
|
-
|
|
439
|
-
### Contributing
|
|
440
|
-
|
|
441
|
-
1. Fork the repository
|
|
442
|
-
2. Create a feature branch: `git checkout -b feature-name`
|
|
443
|
-
3. Make your changes and add tests
|
|
444
|
-
4. Run the test suite: `pytest`
|
|
445
|
-
5. Run code quality checks: `ruff check src/ tests/`
|
|
446
|
-
6. Commit your changes: `git commit -am 'Add some feature'`
|
|
447
|
-
7. Push to the branch: `git push origin feature-name`
|
|
448
|
-
8. Submit a pull request
|
|
449
|
-
|
|
450
|
-
### Bypassing Pre-commit Hooks
|
|
451
|
-
|
|
452
|
-
If you need to commit without running the hooks (not recommended):
|
|
453
|
-
|
|
454
|
-
```bash
|
|
455
|
-
git commit --no-verify
|
|
456
|
-
```
|
|
457
|
-
|
|
458
|
-
## License
|
|
459
|
-
|
|
460
|
-
This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
|
|
461
|
-
|
|
462
|
-
## Support
|
|
463
|
-
|
|
464
|
-
- **Documentation**: [API Documentation](https://github.com/lattifai/lattifai-python/api.md)
|
|
465
|
-
- **Issues**: [GitHub Issues](https://github.com/lattifai/lattifai-python/issues)
|
|
466
|
-
- **Discussions**: [GitHub Discussions](https://github.com/lattifai/lattifai-python/discussions)
|
|
467
|
-
- **Changelog**: [CHANGELOG.md](https://github.com/lattifai/lattifai-python/CHANGELOG)
|
lattifai-0.1.4.dist-info/RECORD
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
lattifai/__init__.py,sha256=8vZF9_yObaoDZ6sUaJBuQqUp5mGBRfD4z0SdTY_8BcQ,672
|
|
2
|
-
lattifai/base_client.py,sha256=ktFtATjL9pLSJUD-VqeJKA1FHkrsGHX7Uq_x00H7gO8,3322
|
|
3
|
-
lattifai/client.py,sha256=szz9aT_GTa5JKA2ZGsd7XHr56aG_H-nf83yqRcPUVxs,4664
|
|
4
|
-
lattifai/bin/__init__.py,sha256=7YhmtEM8kbxJtz2-KIskvpLKBZAvkMSceVx8z4fkgQ4,61
|
|
5
|
-
lattifai/bin/align.py,sha256=uX8VaATzn8CgdHUtry1ZGhXiz0Jr89ELdfRK6GWC1t8,989
|
|
6
|
-
lattifai/bin/cli_base.py,sha256=4xlN4cnJZh54ErhHUHgJpyVsrcB-ftwniFzRQL_7SlU,289
|
|
7
|
-
lattifai/bin/subtitle.py,sha256=bUWImAHpvyY59Vskqb5loQiD5ytQOxR8lTQRiQ4LyNA,647
|
|
8
|
-
lattifai/io/__init__.py,sha256=vHWRN7MvAch-GUeFqqO-gM57SM-4YOpGUjIxFJdjfPA,671
|
|
9
|
-
lattifai/io/reader.py,sha256=ErPnPMUvYQpjZ7Vd86EsHUkOcEfKdoI8iM3yKHRzSOQ,2576
|
|
10
|
-
lattifai/io/supervision.py,sha256=5UfSsgBhXoDU3-6drDtoD7y8HIiA4xRKZnbOKgeejwM,354
|
|
11
|
-
lattifai/io/writer.py,sha256=1eAEFLlL8kricxRDPFBtVmeC4IiFyFnjbWXvw0VU-q4,2036
|
|
12
|
-
lattifai/tokenizers/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvBfs,72
|
|
13
|
-
lattifai/tokenizers/phonemizer.py,sha256=Q5Z-4rbT3AjAPLNPnyvWGcEaJuKXRudgeIK6tUhVsJs,1741
|
|
14
|
-
lattifai/tokenizers/tokenizer.py,sha256=Qqg12zihl192Tlax6plVyxthrnzBciGLSRuzFQRgOdc,5663
|
|
15
|
-
lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
|
|
16
|
-
lattifai/workers/lattice1_alpha.py,sha256=kR5wNLMn1qN14PvRA6RlWjQUGblYrJ636ILC-XkvS0s,4770
|
|
17
|
-
lattifai-0.1.4.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
|
|
18
|
-
lattifai-0.1.4.dist-info/METADATA,sha256=HEsoKRRfjRoeNwUff7Cx44sYP9trid_bdtX3j5WqK6o,13628
|
|
19
|
-
lattifai-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
-
lattifai-0.1.4.dist-info/entry_points.txt,sha256=BaixG8HYzE3Ff0QDAmsbX8cjsxZSLwUv5u4cnIpunr0,88
|
|
21
|
-
lattifai-0.1.4.dist-info/top_level.txt,sha256=tHSoXF26r-IGfbIP_JoYATqbmf14h5NrnNJGH4j5reI,9
|
|
22
|
-
lattifai-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|