lattifai 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +12 -47
- lattifai/bin/align.py +14 -2
- lattifai/bin/cli_base.py +5 -0
- lattifai/client.py +22 -10
- lattifai/workers/lattice1_alpha.py +33 -11
- lattifai-0.2.0.dist-info/METADATA +294 -0
- {lattifai-0.1.5.dist-info → lattifai-0.2.0.dist-info}/RECORD +11 -13
- lattifai-0.1.5.dist-info/METADATA +0 -444
- scripts/__init__.py +0 -1
- scripts/install_k2.py +0 -520
- {lattifai-0.1.5.dist-info → lattifai-0.2.0.dist-info}/WHEEL +0 -0
- {lattifai-0.1.5.dist-info → lattifai-0.2.0.dist-info}/entry_points.txt +0 -0
- {lattifai-0.1.5.dist-info → lattifai-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.1.5.dist-info → lattifai-0.2.0.dist-info}/top_level.txt +0 -0
lattifai/__init__.py
CHANGED
|
@@ -22,54 +22,19 @@ def _check_and_install_k2():
|
|
|
22
22
|
"""Check if k2 is installed and attempt to install it if not."""
|
|
23
23
|
try:
|
|
24
24
|
import k2
|
|
25
|
-
|
|
26
|
-
return True
|
|
27
25
|
except ImportError:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
'
|
|
37
|
-
|
|
38
|
-
'
|
|
39
|
-
|
|
40
|
-
RuntimeWarning,
|
|
41
|
-
stacklevel=2,
|
|
42
|
-
)
|
|
43
|
-
return False
|
|
44
|
-
|
|
45
|
-
print('\n' + '=' * 70)
|
|
46
|
-
print(' k2 is not installed. Attempting to install it now...')
|
|
47
|
-
print(' This is a one-time setup and may take a few minutes.')
|
|
48
|
-
print('=' * 70 + '\n')
|
|
49
|
-
|
|
50
|
-
try:
|
|
51
|
-
# Import and run the installation script
|
|
52
|
-
from scripts.install_k2 import install_k2_main
|
|
53
|
-
|
|
54
|
-
install_k2_main(dry_run=False)
|
|
55
|
-
|
|
56
|
-
print('\n' + '=' * 70)
|
|
57
|
-
print(' k2 has been installed successfully!')
|
|
58
|
-
print('=' * 70 + '\n')
|
|
59
|
-
return True
|
|
60
|
-
except Exception as e:
|
|
61
|
-
warnings.warn(
|
|
62
|
-
'\n' + '=' * 70 + '\n'
|
|
63
|
-
f' Failed to auto-install k2: {e}\n'
|
|
64
|
-
' \n'
|
|
65
|
-
' Please install k2 manually by running:\n'
|
|
66
|
-
' \n'
|
|
67
|
-
' install-k2\n'
|
|
68
|
-
' \n' + '=' * 70,
|
|
69
|
-
RuntimeWarning,
|
|
70
|
-
stacklevel=2,
|
|
71
|
-
)
|
|
72
|
-
return False
|
|
26
|
+
import subprocess
|
|
27
|
+
|
|
28
|
+
print('k2 is not installed. Attempting to install k2...')
|
|
29
|
+
try:
|
|
30
|
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'install-k2'])
|
|
31
|
+
subprocess.check_call([sys.executable, '-m', 'install_k2'])
|
|
32
|
+
import k2 # Try importing again after installation
|
|
33
|
+
|
|
34
|
+
print('k2 installed successfully.')
|
|
35
|
+
except Exception as e:
|
|
36
|
+
warnings.warn(f'Failed to install k2 automatically. Please install it manually. Error: {e}')
|
|
37
|
+
return True
|
|
73
38
|
|
|
74
39
|
|
|
75
40
|
# Auto-install k2 on first import
|
lattifai/bin/align.py
CHANGED
|
@@ -13,6 +13,16 @@ from lattifai.bin.cli_base import cli
|
|
|
13
13
|
default='auto',
|
|
14
14
|
help='Input Subtitle format.',
|
|
15
15
|
)
|
|
16
|
+
@click.option(
|
|
17
|
+
'-D',
|
|
18
|
+
'--device',
|
|
19
|
+
type=click.Choice(['cpu', 'cuda', 'mps'], case_sensitive=False),
|
|
20
|
+
default='cpu',
|
|
21
|
+
help='Device to use for inference.',
|
|
22
|
+
)
|
|
23
|
+
@click.option(
|
|
24
|
+
'-M', '--model_name_or_path', type=str, default='Lattifai/Lattice-1-Alpha', help='Lattifai model name or path'
|
|
25
|
+
)
|
|
16
26
|
@click.argument(
|
|
17
27
|
'input_audio_path',
|
|
18
28
|
type=click.Path(exists=True, dir_okay=False),
|
|
@@ -30,13 +40,15 @@ def align(
|
|
|
30
40
|
input_subtitle_path: Pathlike,
|
|
31
41
|
output_subtitle_path: Pathlike,
|
|
32
42
|
input_format: str = 'auto',
|
|
43
|
+
device: str = 'cpu',
|
|
44
|
+
model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
|
|
33
45
|
):
|
|
34
46
|
"""
|
|
35
47
|
Command used to align audio with subtitles
|
|
36
48
|
"""
|
|
37
49
|
from lattifai import LattifAI
|
|
38
50
|
|
|
39
|
-
client = LattifAI()
|
|
51
|
+
client = LattifAI(model_name_or_path=model_name_or_path, device=device)
|
|
40
52
|
client.alignment(
|
|
41
|
-
input_audio_path, input_subtitle_path, format=input_format, output_subtitle_path=output_subtitle_path
|
|
53
|
+
input_audio_path, input_subtitle_path, format=input_format.lower(), output_subtitle_path=output_subtitle_path
|
|
42
54
|
)
|
lattifai/bin/cli_base.py
CHANGED
|
@@ -8,6 +8,11 @@ def cli():
|
|
|
8
8
|
"""
|
|
9
9
|
The shell entry point to Lattifai, a tool for audio data manipulation.
|
|
10
10
|
"""
|
|
11
|
+
# Load environment variables from .env file
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
|
|
14
|
+
load_dotenv()
|
|
15
|
+
|
|
11
16
|
logging.basicConfig(
|
|
12
17
|
format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s',
|
|
13
18
|
level=logging.INFO,
|
lattifai/client.py
CHANGED
|
@@ -25,9 +25,9 @@ class LattifAI(SyncAPIClient):
|
|
|
25
25
|
*,
|
|
26
26
|
api_key: Optional[str] = None,
|
|
27
27
|
model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
|
|
28
|
-
device: str =
|
|
28
|
+
device: Optional[str] = None,
|
|
29
29
|
base_url: Optional[str] = None,
|
|
30
|
-
timeout: Union[float, int] =
|
|
30
|
+
timeout: Union[float, int] = 120.0,
|
|
31
31
|
max_retries: int = 2,
|
|
32
32
|
default_headers: Optional[Dict[str, str]] = None,
|
|
33
33
|
) -> None:
|
|
@@ -55,11 +55,26 @@ class LattifAI(SyncAPIClient):
|
|
|
55
55
|
# Initialize components
|
|
56
56
|
if not Path(model_name_or_path).exists():
|
|
57
57
|
from huggingface_hub import snapshot_download
|
|
58
|
+
from huggingface_hub.errors import LocalEntryNotFoundError
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
try:
|
|
61
|
+
model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
|
|
62
|
+
except LocalEntryNotFoundError:
|
|
63
|
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
|
64
|
+
model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
|
|
60
65
|
else:
|
|
61
66
|
model_path = model_name_or_path
|
|
62
67
|
|
|
68
|
+
# device setup
|
|
69
|
+
if device is None:
|
|
70
|
+
import torch
|
|
71
|
+
|
|
72
|
+
device = 'cpu'
|
|
73
|
+
if torch.backends.mps.is_available():
|
|
74
|
+
device = 'mps'
|
|
75
|
+
elif torch.cuda.is_available():
|
|
76
|
+
device = 'cuda'
|
|
77
|
+
|
|
63
78
|
self.tokenizer = LatticeTokenizer.from_pretrained(
|
|
64
79
|
client_wrapper=self,
|
|
65
80
|
model_path=model_path,
|
|
@@ -117,13 +132,10 @@ if __name__ == '__main__':
|
|
|
117
132
|
import sys
|
|
118
133
|
|
|
119
134
|
if len(sys.argv) == 4:
|
|
120
|
-
|
|
135
|
+
audio, subtitle, output = sys.argv[1:]
|
|
121
136
|
else:
|
|
122
137
|
audio = 'tests/data/SA1.wav'
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
alignments = client.alignment(audio, text)
|
|
126
|
-
print(alignments)
|
|
138
|
+
subtitle = 'tests/data/SA1.TXT'
|
|
139
|
+
output = None
|
|
127
140
|
|
|
128
|
-
alignments = client.alignment(audio,
|
|
129
|
-
print(alignments)
|
|
141
|
+
alignments = client.alignment(audio, subtitle, output_subtitle_path=output)
|
|
@@ -17,8 +17,6 @@ class Lattice1AlphaWorker:
|
|
|
17
17
|
"""Worker for processing audio with LatticeGraph."""
|
|
18
18
|
|
|
19
19
|
def __init__(self, model_path: Pathlike, device: str = 'cpu', num_threads: int = 8) -> None:
|
|
20
|
-
if device != 'cpu':
|
|
21
|
-
raise NotImplementedError(f'Only cpu is supported for now, got device={device}.')
|
|
22
20
|
self.config = json.load(open(f'{model_path}/config.json'))
|
|
23
21
|
|
|
24
22
|
# SessionOptions
|
|
@@ -29,8 +27,11 @@ class Lattice1AlphaWorker:
|
|
|
29
27
|
sess_options.add_session_config_entry('session.intra_op.allow_spinning', '0')
|
|
30
28
|
|
|
31
29
|
providers = []
|
|
32
|
-
if device.startswith('cuda')
|
|
30
|
+
if device.startswith('cuda') and ort.get_all_providers().count('CUDAExecutionProvider') > 0:
|
|
33
31
|
providers.append('CUDAExecutionProvider')
|
|
32
|
+
elif device.startswith('mps') and ort.get_all_providers().count('MPSExecutionProvider') > 0:
|
|
33
|
+
providers.append('MPSExecutionProvider')
|
|
34
|
+
|
|
34
35
|
self.acoustic_ort = ort.InferenceSession(
|
|
35
36
|
f'{model_path}/acoustic_opt.onnx',
|
|
36
37
|
sess_options,
|
|
@@ -49,13 +50,29 @@ class Lattice1AlphaWorker:
|
|
|
49
50
|
_start = time.time()
|
|
50
51
|
# audio -> features -> emission
|
|
51
52
|
features = self.extractor(audio) # (1, T, D)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
if features.shape[1] > 6000:
|
|
54
|
+
features_list = torch.split(features, 6000, dim=1)
|
|
55
|
+
emissions = []
|
|
56
|
+
for features in features_list:
|
|
57
|
+
ort_inputs = {
|
|
58
|
+
'features': features.cpu().numpy(),
|
|
59
|
+
'feature_lengths': np.array([features.size(1)], dtype=np.int64),
|
|
60
|
+
}
|
|
61
|
+
emission = self.acoustic_ort.run(None, ort_inputs)[0] # (1, T, vocab_size) numpy
|
|
62
|
+
emissions.append(emission)
|
|
63
|
+
emission = torch.cat(
|
|
64
|
+
[torch.from_numpy(emission).to(self.device) for emission in emissions], dim=1
|
|
65
|
+
) # (1, T, vocab_size)
|
|
66
|
+
else:
|
|
67
|
+
ort_inputs = {
|
|
68
|
+
'features': features.cpu().numpy(),
|
|
69
|
+
'feature_lengths': np.array([features.size(1)], dtype=np.int64),
|
|
70
|
+
}
|
|
71
|
+
emission = self.acoustic_ort.run(None, ort_inputs)[0] # (1, T, vocab_size) numpy
|
|
72
|
+
emission = torch.from_numpy(emission).to(self.device)
|
|
73
|
+
|
|
57
74
|
self.timings['emission'] += time.time() - _start
|
|
58
|
-
return
|
|
75
|
+
return emission # (1, T, vocab_size) torch
|
|
59
76
|
|
|
60
77
|
def load_audio(self, audio: Union[Pathlike, BinaryIO]) -> Tuple[torch.Tensor, int]:
|
|
61
78
|
# load audio
|
|
@@ -104,9 +121,14 @@ class Lattice1AlphaWorker:
|
|
|
104
121
|
self.timings['decoding_graph'] += time.time() - _start
|
|
105
122
|
|
|
106
123
|
_start = time.time()
|
|
124
|
+
if self.device.type == 'mps':
|
|
125
|
+
device = 'cpu' # k2 does not support mps yet
|
|
126
|
+
else:
|
|
127
|
+
device = self.device
|
|
128
|
+
|
|
107
129
|
results, labels = align_segments(
|
|
108
|
-
emission.to(
|
|
109
|
-
decoding_graph.to(
|
|
130
|
+
emission.to(device) * acoustic_scale,
|
|
131
|
+
decoding_graph.to(device),
|
|
110
132
|
torch.tensor([emission.shape[1]], dtype=torch.int32),
|
|
111
133
|
search_beam=100,
|
|
112
134
|
output_beam=40,
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lattifai
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
|
+
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
|
+
Maintainer-email: Lattice <tech@lattifai.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2025 Lattifai.
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
Project-URL: Homepage, https://github.com/lattifai/lattifai-python
|
|
29
|
+
Project-URL: Documentation, https://github.com/lattifai/lattifai-python/README.md
|
|
30
|
+
Project-URL: Bug Tracker, https://github.com/lattifai/lattifai-python/issues
|
|
31
|
+
Project-URL: Discussions, https://github.com/lattifai/lattifai-python/discussions
|
|
32
|
+
Project-URL: Changelog, https://github.com/lattifai/lattifai-python/CHANGELOG.md
|
|
33
|
+
Keywords: lattifai,speech recognition,video analysis,ai,sdk,api client
|
|
34
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: Intended Audience :: Science/Research
|
|
37
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
43
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
44
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
45
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
46
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
47
|
+
Classifier: Topic :: Multimedia :: Video
|
|
48
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
49
|
+
Requires-Python: >=3.9
|
|
50
|
+
Description-Content-Type: text/markdown
|
|
51
|
+
License-File: LICENSE
|
|
52
|
+
Requires-Dist: lattifai-core>=0.2.0
|
|
53
|
+
Requires-Dist: httpx
|
|
54
|
+
Requires-Dist: python-dotenv
|
|
55
|
+
Requires-Dist: lhotse>=1.26.0
|
|
56
|
+
Requires-Dist: colorful>=0.5.6
|
|
57
|
+
Requires-Dist: pysubs2
|
|
58
|
+
Requires-Dist: praatio
|
|
59
|
+
Requires-Dist: tgt
|
|
60
|
+
Requires-Dist: onnxruntime
|
|
61
|
+
Requires-Dist: resampy
|
|
62
|
+
Requires-Dist: g2p-phonemizer==0.1.1
|
|
63
|
+
Provides-Extra: numpy
|
|
64
|
+
Requires-Dist: numpy; extra == "numpy"
|
|
65
|
+
Provides-Extra: test
|
|
66
|
+
Requires-Dist: pytest; extra == "test"
|
|
67
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
68
|
+
Requires-Dist: ruff; extra == "test"
|
|
69
|
+
Requires-Dist: numpy; extra == "test"
|
|
70
|
+
Provides-Extra: all
|
|
71
|
+
Requires-Dist: numpy; extra == "all"
|
|
72
|
+
Requires-Dist: pytest; extra == "all"
|
|
73
|
+
Requires-Dist: pytest-cov; extra == "all"
|
|
74
|
+
Requires-Dist: ruff; extra == "all"
|
|
75
|
+
Dynamic: license-file
|
|
76
|
+
|
|
77
|
+
# LattifAI Python
|
|
78
|
+
|
|
79
|
+
[](https://badge.fury.io/py/lattifai)
|
|
80
|
+
|
|
81
|
+
<p align="center">
|
|
82
|
+
🌐 <a href="https://lattifai.com"><b>Official Website</b></a>    |    🖥️ <a href="https://github.com/lattifai/lattifai-python">GitHub</a>    |    🤗 <a href="https://huggingface.co/Lattifai/Lattice-1-Alpha">Model</a>    |    📑 <a href="https://lattifai.com/blogs">Blog</a>    |    <a href="https://discord.gg/gTZqdaBJ"><img src="https://img.shields.io/badge/Discord-Join-5865F2?logo=discord&logoColor=white" alt="Discord" style="vertical-align: middle;"></a>
|
|
83
|
+
</p>
|
|
84
|
+
|
|
85
|
+
Advanced forced alignment and subtitle generation powered by [Lattice-1-Alpha](https://huggingface.co/Lattifai/Lattice-1-Alpha) model.
|
|
86
|
+
|
|
87
|
+
## Installation
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install install-k2
|
|
91
|
+
# The installation will automatically detect and use your already installed PyTorch version.
|
|
92
|
+
install-k2 # Install k2
|
|
93
|
+
|
|
94
|
+
pip install lattifai
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
> **⚠️ Important**: You must run `install-k2` before using the lattifai library.
|
|
98
|
+
|
|
99
|
+
## Quick Start
|
|
100
|
+
|
|
101
|
+
### Command Line
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Align audio with subtitle
|
|
105
|
+
lattifai align audio.wav subtitle.srt output.srt
|
|
106
|
+
|
|
107
|
+
# Convert subtitle format
|
|
108
|
+
lattifai subtitle convert input.srt output.vtt
|
|
109
|
+
```
|
|
110
|
+
#### lattifai align options
|
|
111
|
+
```
|
|
112
|
+
> lattifai align --help
|
|
113
|
+
Usage: lattifai align [OPTIONS] INPUT_AUDIO_PATH INPUT_SUBTITLE_PATH OUTPUT_SUBTITLE_PATH
|
|
114
|
+
|
|
115
|
+
Command used to align audio with subtitles
|
|
116
|
+
|
|
117
|
+
Options:
|
|
118
|
+
-F, --input_format [srt|vtt|ass|txt|auto] Input Subtitle format.
|
|
119
|
+
-D, --device [cpu|cuda|mps] Device to use for inference.
|
|
120
|
+
--help Show this message and exit.
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Python API
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from lattifai import LattifAI
|
|
127
|
+
|
|
128
|
+
# Initialize client
|
|
129
|
+
client = LattifAI(
|
|
130
|
+
api_key: Optional[str] = None,
|
|
131
|
+
model_name_or_path='Lattifai/Lattice-1-Alpha',
|
|
132
|
+
device='cpu', # 'cpu', 'cuda', or 'mps'
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Perform alignment
|
|
136
|
+
result = client.alignment(
|
|
137
|
+
audio="audio.wav",
|
|
138
|
+
subtitle="subtitle.srt",
|
|
139
|
+
output_subtitle_path="output.srt"
|
|
140
|
+
)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Supported Formats
|
|
144
|
+
|
|
145
|
+
**Audio**: WAV, MP3, FLAC, M4A, OGG
|
|
146
|
+
**Subtitle**: SRT, VTT, ASS, TXT (plain text)
|
|
147
|
+
|
|
148
|
+
## API Reference
|
|
149
|
+
|
|
150
|
+
### LattifAI
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
LattifAI(
|
|
154
|
+
api_key: Optional[str] = None,
|
|
155
|
+
model_name_or_path: str = 'Lattifai/Lattice-1-Alpha',
|
|
156
|
+
device: str = 'cpu' # 'cpu', 'cuda', or 'mps'
|
|
157
|
+
)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### alignment()
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
client.alignment(
|
|
164
|
+
audio: str, # Path to audio file
|
|
165
|
+
subtitle: str, # Path to subtitle/text file
|
|
166
|
+
format: Optional[str] = None, # 'srt', 'vtt', 'ass', 'txt' (auto-detect if None)
|
|
167
|
+
output_subtitle_path: Optional[str] = None
|
|
168
|
+
) -> str
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Examples
|
|
172
|
+
|
|
173
|
+
### Basic Text Alignment
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
client = LattifAI()
|
|
177
|
+
client.alignment(
|
|
178
|
+
audio="speech.wav",
|
|
179
|
+
subtitle="transcript.txt",
|
|
180
|
+
format="txt",
|
|
181
|
+
output_subtitle_path="output.srt"
|
|
182
|
+
)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Batch Processing
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
from pathlib import Path
|
|
189
|
+
|
|
190
|
+
client = LattifAI()
|
|
191
|
+
audio_dir = Path("audio_files")
|
|
192
|
+
subtitle_dir = Path("subtitles")
|
|
193
|
+
output_dir = Path("aligned")
|
|
194
|
+
|
|
195
|
+
for audio in audio_dir.glob("*.wav"):
|
|
196
|
+
subtitle = subtitle_dir / f"{audio.stem}.srt"
|
|
197
|
+
if subtitle.exists():
|
|
198
|
+
client.alignment(
|
|
199
|
+
audio=audio,
|
|
200
|
+
subtitle=subtitle,
|
|
201
|
+
output_subtitle_path=output_dir / f"{audio.stem}_aligned.srt"
|
|
202
|
+
)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### GPU Acceleration
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
# NVIDIA GPU
|
|
209
|
+
client = LattifAI(device='cuda')
|
|
210
|
+
|
|
211
|
+
# Apple Silicon
|
|
212
|
+
client = LattifAI(device='mps')
|
|
213
|
+
|
|
214
|
+
# CLI
|
|
215
|
+
lattifai align --device mps audio.wav subtitle.srt output.srt
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Configuration
|
|
219
|
+
|
|
220
|
+
### API Key Setup
|
|
221
|
+
|
|
222
|
+
First, create your API key at [https://lattifai.com/dashboard/api-keys](https://lattifai.com/dashboard/api-keys)
|
|
223
|
+
|
|
224
|
+
**Recommended: Using .env file**
|
|
225
|
+
|
|
226
|
+
Create a `.env` file in your project root:
|
|
227
|
+
```bash
|
|
228
|
+
LATTIFAI_API_KEY=your-api-key
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
The library automatically loads the `.env` file (python-dotenv is included as a dependency).
|
|
232
|
+
|
|
233
|
+
**Alternative: Environment variable**
|
|
234
|
+
```bash
|
|
235
|
+
export LATTIFAI_API_KEY="your-api-key"
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Model Information
|
|
239
|
+
|
|
240
|
+
**[Lattice-1-Alpha](https://huggingface.co/Lattifai/Lattice-1-Alpha)** features:
|
|
241
|
+
- State-of-the-art alignment precision
|
|
242
|
+
- **Language Support**: Currently supports English only. The upcoming **Lattice-1** release will support English, Chinese, and mixed English-Chinese content.
|
|
243
|
+
- Handles noisy audio and imperfect transcripts
|
|
244
|
+
- Optimized for CPU and GPU (CUDA/MPS)
|
|
245
|
+
|
|
246
|
+
**Requirements**:
|
|
247
|
+
- Python 3.9+
|
|
248
|
+
- 4GB RAM recommended
|
|
249
|
+
- ~2GB storage for model files
|
|
250
|
+
|
|
251
|
+
## Development
|
|
252
|
+
|
|
253
|
+
### Setup
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
git clone https://github.com/lattifai/lattifai-python.git
|
|
257
|
+
cd lattifai-python
|
|
258
|
+
pip install -e ".[test]"
|
|
259
|
+
./scripts/install-hooks.sh # Optional: install pre-commit hooks
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Testing
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
pytest # Run all tests
|
|
266
|
+
pytest --cov=src # With coverage
|
|
267
|
+
pytest tests/test_basic.py # Specific test
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Code Quality
|
|
271
|
+
|
|
272
|
+
```bash
|
|
273
|
+
ruff check src/ tests/ # Lint
|
|
274
|
+
ruff format src/ tests/ # Format
|
|
275
|
+
isort src/ tests/ # Sort imports
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## Contributing
|
|
279
|
+
|
|
280
|
+
1. Fork the repository
|
|
281
|
+
2. Create a feature branch
|
|
282
|
+
3. Make changes and add tests
|
|
283
|
+
4. Run `pytest` and `ruff check`
|
|
284
|
+
5. Submit a pull request
|
|
285
|
+
|
|
286
|
+
## License
|
|
287
|
+
|
|
288
|
+
Apache License 2.0
|
|
289
|
+
|
|
290
|
+
## Support
|
|
291
|
+
|
|
292
|
+
- **Issues**: [GitHub Issues](https://github.com/lattifai/lattifai-python/issues)
|
|
293
|
+
- **Discussions**: [GitHub Discussions](https://github.com/lattifai/lattifai-python/discussions)
|
|
294
|
+
- **Discord**: [Join our community](https://discord.gg/gTZqdaBJ)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
lattifai/__init__.py,sha256=
|
|
1
|
+
lattifai/__init__.py,sha256=JXUg0dT74UyAtKOjewRs9ijr5sl9SYsc6oU_WItY314,1497
|
|
2
2
|
lattifai/base_client.py,sha256=ktFtATjL9pLSJUD-VqeJKA1FHkrsGHX7Uq_x00H7gO8,3322
|
|
3
|
-
lattifai/client.py,sha256=
|
|
3
|
+
lattifai/client.py,sha256=NjHUqMZFoRxuyxdzBNEcn5kU57gJl31FSb6i9DDcoCw,5059
|
|
4
4
|
lattifai/bin/__init__.py,sha256=7YhmtEM8kbxJtz2-KIskvpLKBZAvkMSceVx8z4fkgQ4,61
|
|
5
|
-
lattifai/bin/align.py,sha256=
|
|
6
|
-
lattifai/bin/cli_base.py,sha256=
|
|
5
|
+
lattifai/bin/align.py,sha256=nw-wABsNyxC8zN3siiqgNi1Foou3cYhVzIAomuVrFOY,1436
|
|
6
|
+
lattifai/bin/cli_base.py,sha256=y535WXDRX8StloFn9icpfw7nQt0JxuWBIuPMnRxAYy8,392
|
|
7
7
|
lattifai/bin/subtitle.py,sha256=bUWImAHpvyY59Vskqb5loQiD5ytQOxR8lTQRiQ4LyNA,647
|
|
8
8
|
lattifai/io/__init__.py,sha256=vHWRN7MvAch-GUeFqqO-gM57SM-4YOpGUjIxFJdjfPA,671
|
|
9
9
|
lattifai/io/reader.py,sha256=ErPnPMUvYQpjZ7Vd86EsHUkOcEfKdoI8iM3yKHRzSOQ,2576
|
|
@@ -13,12 +13,10 @@ lattifai/tokenizers/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvB
|
|
|
13
13
|
lattifai/tokenizers/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
|
|
14
14
|
lattifai/tokenizers/tokenizer.py,sha256=u4lgS6-jN9cLuMNIojA4Swfsqb1EcyI7Bh_iw7tuL-s,5818
|
|
15
15
|
lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
|
|
16
|
-
lattifai/workers/lattice1_alpha.py,sha256=
|
|
17
|
-
lattifai-0.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
lattifai-0.
|
|
21
|
-
lattifai-0.
|
|
22
|
-
lattifai-0.
|
|
23
|
-
lattifai-0.1.5.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
|
|
24
|
-
lattifai-0.1.5.dist-info/RECORD,,
|
|
16
|
+
lattifai/workers/lattice1_alpha.py,sha256=1VFo59EcygEctTHOhkcII8v3_mrj8JEJ8Fcaqk_7LVo,5762
|
|
17
|
+
lattifai-0.2.0.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
|
|
18
|
+
lattifai-0.2.0.dist-info/METADATA,sha256=bXb6z5D1C-9YwHeycSFs8SAhUp8VNJbE9u-J9lvYMZ8,8997
|
|
19
|
+
lattifai-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
lattifai-0.2.0.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
|
|
21
|
+
lattifai-0.2.0.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
|
|
22
|
+
lattifai-0.2.0.dist-info/RECORD,,
|