lattifai 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +27 -1
- lattifai/base_client.py +3 -6
- lattifai/client.py +110 -37
- lattifai/errors.py +219 -0
- lattifai/tokenizer/tokenizer.py +2 -0
- lattifai/workers/lattice1_alpha.py +130 -66
- {lattifai-0.2.4.dist-info → lattifai-0.2.5.dist-info}/METADATA +1 -1
- {lattifai-0.2.4.dist-info → lattifai-0.2.5.dist-info}/RECORD +12 -11
- {lattifai-0.2.4.dist-info → lattifai-0.2.5.dist-info}/WHEEL +0 -0
- {lattifai-0.2.4.dist-info → lattifai-0.2.5.dist-info}/entry_points.txt +0 -0
- {lattifai-0.2.4.dist-info → lattifai-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.2.4.dist-info → lattifai-0.2.5.dist-info}/top_level.txt +0 -0
lattifai/__init__.py
CHANGED
|
@@ -2,7 +2,21 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import warnings
|
|
4
4
|
|
|
5
|
-
from .
|
|
5
|
+
from .errors import (
|
|
6
|
+
AlignmentError,
|
|
7
|
+
APIError,
|
|
8
|
+
AudioFormatError,
|
|
9
|
+
AudioLoadError,
|
|
10
|
+
AudioProcessingError,
|
|
11
|
+
ConfigurationError,
|
|
12
|
+
DependencyError,
|
|
13
|
+
LatticeDecodingError,
|
|
14
|
+
LatticeEncodingError,
|
|
15
|
+
LattifAIError,
|
|
16
|
+
ModelLoadError,
|
|
17
|
+
SubtitleParseError,
|
|
18
|
+
SubtitleProcessingError,
|
|
19
|
+
)
|
|
6
20
|
from .io import SubtitleIO
|
|
7
21
|
|
|
8
22
|
try:
|
|
@@ -53,6 +67,18 @@ def __getattr__(name):
|
|
|
53
67
|
__all__ = [
|
|
54
68
|
'LattifAI', # noqa: F822
|
|
55
69
|
'LattifAIError',
|
|
70
|
+
'AudioProcessingError',
|
|
71
|
+
'AudioLoadError',
|
|
72
|
+
'AudioFormatError',
|
|
73
|
+
'SubtitleProcessingError',
|
|
74
|
+
'SubtitleParseError',
|
|
75
|
+
'AlignmentError',
|
|
76
|
+
'LatticeEncodingError',
|
|
77
|
+
'LatticeDecodingError',
|
|
78
|
+
'ModelLoadError',
|
|
79
|
+
'DependencyError',
|
|
80
|
+
'APIError',
|
|
81
|
+
'ConfigurationError',
|
|
56
82
|
'SubtitleIO',
|
|
57
83
|
'__version__',
|
|
58
84
|
]
|
lattifai/base_client.py
CHANGED
|
@@ -6,11 +6,8 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Union # noqa: F401
|
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
"""Base exception for LattifAI errors."""
|
|
12
|
-
|
|
13
|
-
pass
|
|
9
|
+
# Import from errors module for consistency
|
|
10
|
+
from .errors import APIError, ConfigurationError, LattifAIError
|
|
14
11
|
|
|
15
12
|
|
|
16
13
|
class BaseAPIClient(ABC):
|
|
@@ -28,7 +25,7 @@ class BaseAPIClient(ABC):
|
|
|
28
25
|
if api_key is None:
|
|
29
26
|
api_key = os.environ.get('LATTIFAI_API_KEY')
|
|
30
27
|
if api_key is None:
|
|
31
|
-
raise
|
|
28
|
+
raise ConfigurationError(
|
|
32
29
|
'The api_key client option must be set either by passing api_key to the client '
|
|
33
30
|
'or by setting the LATTIFAI_API_KEY environment variable'
|
|
34
31
|
)
|
lattifai/client.py
CHANGED
|
@@ -9,7 +9,17 @@ import colorful
|
|
|
9
9
|
from dotenv import load_dotenv
|
|
10
10
|
from lhotse.utils import Pathlike
|
|
11
11
|
|
|
12
|
-
from lattifai.base_client import AsyncAPIClient,
|
|
12
|
+
from lattifai.base_client import AsyncAPIClient, SyncAPIClient
|
|
13
|
+
from lattifai.errors import (
|
|
14
|
+
AlignmentError,
|
|
15
|
+
ConfigurationError,
|
|
16
|
+
LatticeDecodingError,
|
|
17
|
+
LatticeEncodingError,
|
|
18
|
+
LattifAIError,
|
|
19
|
+
ModelLoadError,
|
|
20
|
+
SubtitleProcessingError,
|
|
21
|
+
handle_exception,
|
|
22
|
+
)
|
|
13
23
|
from lattifai.io import SubtitleFormat, SubtitleIO
|
|
14
24
|
from lattifai.tokenizer import LatticeTokenizer
|
|
15
25
|
from lattifai.workers import Lattice1AlphaWorker
|
|
@@ -34,7 +44,7 @@ class LattifAI(SyncAPIClient):
|
|
|
34
44
|
if api_key is None:
|
|
35
45
|
api_key = os.environ.get('LATTIFAI_API_KEY')
|
|
36
46
|
if api_key is None:
|
|
37
|
-
raise
|
|
47
|
+
raise ConfigurationError(
|
|
38
48
|
'The api_key client option must be set either by passing api_key to the client '
|
|
39
49
|
'or by setting the LATTIFAI_API_KEY environment variable'
|
|
40
50
|
)
|
|
@@ -60,8 +70,13 @@ class LattifAI(SyncAPIClient):
|
|
|
60
70
|
try:
|
|
61
71
|
model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
|
|
62
72
|
except LocalEntryNotFoundError:
|
|
63
|
-
|
|
64
|
-
|
|
73
|
+
try:
|
|
74
|
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
|
75
|
+
model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise ModelLoadError(model_name_or_path, original_error=e)
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise ModelLoadError(model_name_or_path, original_error=e)
|
|
65
80
|
else:
|
|
66
81
|
model_path = model_name_or_path
|
|
67
82
|
|
|
@@ -75,12 +90,19 @@ class LattifAI(SyncAPIClient):
|
|
|
75
90
|
elif torch.cuda.is_available():
|
|
76
91
|
device = 'cuda'
|
|
77
92
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
93
|
+
try:
|
|
94
|
+
self.tokenizer = LatticeTokenizer.from_pretrained(
|
|
95
|
+
client_wrapper=self,
|
|
96
|
+
model_path=model_path,
|
|
97
|
+
device=device,
|
|
98
|
+
)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
raise ModelLoadError(f'tokenizer from {model_path}', original_error=e)
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
self.worker = Lattice1AlphaWorker(model_path, device=device, num_threads=8)
|
|
104
|
+
except Exception as e:
|
|
105
|
+
raise ModelLoadError(f'worker from {model_path}', original_error=e)
|
|
84
106
|
|
|
85
107
|
def alignment(
|
|
86
108
|
self,
|
|
@@ -95,37 +117,88 @@ class LattifAI(SyncAPIClient):
|
|
|
95
117
|
Args:
|
|
96
118
|
audio: Audio file path
|
|
97
119
|
subtitle: Subtitle/Text to align with audio
|
|
98
|
-
|
|
120
|
+
format: Output format (srt, vtt, ass, txt)
|
|
121
|
+
split_sentence: Whether to split sentences during processing
|
|
122
|
+
output_subtitle_path: Path to save output file
|
|
99
123
|
|
|
100
124
|
Returns:
|
|
101
125
|
Aligned subtitles in specified format
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
SubtitleProcessingError: If subtitle file cannot be parsed
|
|
129
|
+
LatticeEncodingError: If lattice graph generation fails
|
|
130
|
+
AlignmentError: If audio alignment fails
|
|
131
|
+
LatticeDecodingError: If lattice decoding fails
|
|
102
132
|
"""
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
print(colorful.
|
|
127
|
-
|
|
128
|
-
|
|
133
|
+
try:
|
|
134
|
+
# step1: parse text or subtitles
|
|
135
|
+
print(colorful.cyan(f'📖 Step 1: Reading subtitle file from {subtitle}'))
|
|
136
|
+
try:
|
|
137
|
+
supervisions = SubtitleIO.read(subtitle, format=format)
|
|
138
|
+
print(colorful.green(f' ✓ Parsed {len(supervisions)} subtitle segments'))
|
|
139
|
+
except Exception as e:
|
|
140
|
+
raise SubtitleProcessingError(
|
|
141
|
+
f'Failed to parse subtitle file: {subtitle}',
|
|
142
|
+
subtitle_path=str(subtitle),
|
|
143
|
+
context={'original_error': str(e)},
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# step2: make lattice by call Lattifai API
|
|
147
|
+
print(colorful.cyan('🔗 Step 2: Creating lattice graph from text'))
|
|
148
|
+
try:
|
|
149
|
+
lattice_id, lattice_graph = self.tokenizer.tokenize(supervisions, split_sentence=split_sentence)
|
|
150
|
+
print(colorful.green(f' ✓ Generated lattice graph with ID: {lattice_id}'))
|
|
151
|
+
except Exception as e:
|
|
152
|
+
text_content = ' '.join([sup.text for sup in supervisions]) if supervisions else ''
|
|
153
|
+
raise LatticeEncodingError(text_content, original_error=e)
|
|
154
|
+
|
|
155
|
+
# step3: align audio with text
|
|
156
|
+
print(colorful.cyan(f'🎵 Step 3: Performing alignment on audio file: {audio}'))
|
|
157
|
+
try:
|
|
158
|
+
lattice_results = self.worker.alignment(audio, lattice_graph)
|
|
159
|
+
print(colorful.green(' ✓ Alignment completed successfully'))
|
|
160
|
+
except Exception as e:
|
|
161
|
+
raise AlignmentError(
|
|
162
|
+
f'Audio alignment failed for {audio}',
|
|
163
|
+
audio_path=str(audio),
|
|
164
|
+
subtitle_path=str(subtitle),
|
|
165
|
+
context={'original_error': str(e)},
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# step4: decode the lattice paths
|
|
169
|
+
print(colorful.cyan('🔍 Step 4: Decoding lattice paths to final alignments'))
|
|
170
|
+
try:
|
|
171
|
+
alignments = self.tokenizer.detokenize(lattice_id, lattice_results)
|
|
172
|
+
print(colorful.green(f' ✓ Decoded {len(alignments)} aligned segments'))
|
|
173
|
+
except Exception as e:
|
|
174
|
+
print(colorful.red(' x Failed to decode lattice alignment results'))
|
|
175
|
+
raise LatticeDecodingError(lattice_id, original_error=e)
|
|
176
|
+
|
|
177
|
+
# step5: export alignments to target format
|
|
178
|
+
if output_subtitle_path:
|
|
179
|
+
try:
|
|
180
|
+
SubtitleIO.write(alignments, output_path=output_subtitle_path)
|
|
181
|
+
print(colorful.green(f'🎉🎉🎉🎉🎉 Subtitle file written to: {output_subtitle_path}'))
|
|
182
|
+
except Exception as e:
|
|
183
|
+
raise SubtitleProcessingError(
|
|
184
|
+
f'Failed to write output file: {output_subtitle_path}',
|
|
185
|
+
subtitle_path=str(output_subtitle_path),
|
|
186
|
+
context={'original_error': str(e)},
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
return output_subtitle_path or alignments
|
|
190
|
+
|
|
191
|
+
except (SubtitleProcessingError, LatticeEncodingError, AlignmentError, LatticeDecodingError):
|
|
192
|
+
# Re-raise our specific errors as-is
|
|
193
|
+
raise
|
|
194
|
+
except Exception as e:
|
|
195
|
+
# Catch any unexpected errors and wrap them
|
|
196
|
+
raise AlignmentError(
|
|
197
|
+
'Unexpected error during alignment process',
|
|
198
|
+
audio_path=str(audio),
|
|
199
|
+
subtitle_path=str(subtitle),
|
|
200
|
+
context={'original_error': str(e), 'error_type': e.__class__.__name__},
|
|
201
|
+
)
|
|
129
202
|
|
|
130
203
|
|
|
131
204
|
if __name__ == '__main__':
|
lattifai/errors.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""Error handling and exception classes for LattifAI SDK."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import traceback
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
import colorful
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LattifAIError(Exception):
|
|
11
|
+
"""Base exception for LattifAI errors."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, message: str, error_code: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
|
|
14
|
+
"""Initialize LattifAI error.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
message: Error message
|
|
18
|
+
error_code: Optional error code for categorization
|
|
19
|
+
context: Optional context information about the error
|
|
20
|
+
"""
|
|
21
|
+
super().__init__(message)
|
|
22
|
+
self.message = message
|
|
23
|
+
self.error_code = error_code or self.__class__.__name__
|
|
24
|
+
self.context = context or {}
|
|
25
|
+
|
|
26
|
+
def get_support_info(self) -> str:
|
|
27
|
+
"""Get support information for users."""
|
|
28
|
+
return (
|
|
29
|
+
f'\n\n{colorful.green("🔧 Need help? Here are two ways to get support:")}\n'
|
|
30
|
+
f' 1. 📝 Create a GitHub issue: {colorful.green("https://github.com/lattifai/lattifai-python/issues")}\n'
|
|
31
|
+
' Please include:\n'
|
|
32
|
+
' - Your audio file format and duration\n'
|
|
33
|
+
" - The text/subtitle content you're trying to align\n"
|
|
34
|
+
' - This error message and stack trace\n'
|
|
35
|
+
f' 2. 💬 Join our Discord community: {colorful.green("https://discord.gg/vzmTzzZgNu")}\n'
|
|
36
|
+
' Our team and community can help you troubleshoot\n'
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def __str__(self) -> str:
|
|
40
|
+
"""Return formatted error message with support information."""
|
|
41
|
+
base_message = f'{colorful.red(f"[{self.error_code}] {self.message}")}'
|
|
42
|
+
if self.context:
|
|
43
|
+
context_str = f'\n{colorful.yellow("Context:")} ' + ', '.join(f'{k}={v}' for k, v in self.context.items())
|
|
44
|
+
base_message += context_str
|
|
45
|
+
return base_message + self.get_support_info()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AudioProcessingError(LattifAIError):
|
|
49
|
+
"""Error during audio processing operations."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, message: str, audio_path: Optional[str] = None, **kwargs):
|
|
52
|
+
context = kwargs.get('context', {})
|
|
53
|
+
if audio_path:
|
|
54
|
+
context['audio_path'] = audio_path
|
|
55
|
+
kwargs['context'] = context
|
|
56
|
+
super().__init__(message, **kwargs)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AudioLoadError(AudioProcessingError):
|
|
60
|
+
"""Error loading or reading audio file."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, audio_path: str, original_error: Optional[Exception] = None, **kwargs):
|
|
63
|
+
message = f'Failed to load audio file: {colorful.red(audio_path)}'
|
|
64
|
+
if original_error:
|
|
65
|
+
message += f' - {colorful.red(str(original_error))}'
|
|
66
|
+
|
|
67
|
+
context = kwargs.get('context', {})
|
|
68
|
+
context.update({'audio_path': audio_path, 'original_error': str(original_error) if original_error else None})
|
|
69
|
+
kwargs['context'] = context
|
|
70
|
+
|
|
71
|
+
super().__init__(message, audio_path=audio_path, **kwargs)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class AudioFormatError(AudioProcessingError):
|
|
75
|
+
"""Error with audio format or codec."""
|
|
76
|
+
|
|
77
|
+
def __init__(self, audio_path: str, format_issue: str, **kwargs):
|
|
78
|
+
message = f'Audio format error for {colorful.red(audio_path)}: {colorful.red(format_issue)}'
|
|
79
|
+
context = kwargs.get('context', {})
|
|
80
|
+
context.update({'audio_path': audio_path, 'format_issue': format_issue})
|
|
81
|
+
kwargs['context'] = context
|
|
82
|
+
super().__init__(message, audio_path=audio_path, **kwargs)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SubtitleProcessingError(LattifAIError):
|
|
86
|
+
"""Error during subtitle/text processing operations."""
|
|
87
|
+
|
|
88
|
+
def __init__(self, message: str, subtitle_path: Optional[str] = None, **kwargs):
|
|
89
|
+
context = kwargs.get('context', {})
|
|
90
|
+
if subtitle_path:
|
|
91
|
+
context['subtitle_path'] = subtitle_path
|
|
92
|
+
kwargs['context'] = context
|
|
93
|
+
super().__init__(message, **kwargs)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class SubtitleParseError(SubtitleProcessingError):
|
|
97
|
+
"""Error parsing subtitle or text file."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, subtitle_path: str, parse_issue: str, **kwargs):
|
|
100
|
+
message = f'Failed to parse subtitle file {subtitle_path}: {parse_issue}'
|
|
101
|
+
context = kwargs.get('context', {})
|
|
102
|
+
context.update({'subtitle_path': subtitle_path, 'parse_issue': parse_issue})
|
|
103
|
+
kwargs['context'] = context
|
|
104
|
+
super().__init__(message, subtitle_path=subtitle_path, **kwargs)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class AlignmentError(LattifAIError):
|
|
108
|
+
"""Error during audio-text alignment process."""
|
|
109
|
+
|
|
110
|
+
def __init__(self, message: str, audio_path: Optional[str] = None, subtitle_path: Optional[str] = None, **kwargs):
|
|
111
|
+
context = kwargs.get('context', {})
|
|
112
|
+
if audio_path:
|
|
113
|
+
context['audio_path'] = audio_path
|
|
114
|
+
if subtitle_path:
|
|
115
|
+
context['subtitle_path'] = subtitle_path
|
|
116
|
+
kwargs['context'] = context
|
|
117
|
+
super().__init__(message, **kwargs)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class LatticeEncodingError(AlignmentError):
|
|
121
|
+
"""Error generating lattice graph from text."""
|
|
122
|
+
|
|
123
|
+
def __init__(self, text_content: str, original_error: Optional[Exception] = None, **kwargs):
|
|
124
|
+
message = 'Failed to generate lattice graph from text'
|
|
125
|
+
if original_error:
|
|
126
|
+
message += f': {colorful.red(str(original_error))}'
|
|
127
|
+
|
|
128
|
+
context = kwargs.get('context', {})
|
|
129
|
+
context.update(
|
|
130
|
+
{
|
|
131
|
+
'text_content_length': len(text_content),
|
|
132
|
+
'text_preview': text_content[:100] + '...' if len(text_content) > 100 else text_content,
|
|
133
|
+
'original_error': str(original_error) if original_error else None,
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
kwargs['context'] = context
|
|
137
|
+
super().__init__(message, **kwargs)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class LatticeDecodingError(AlignmentError):
|
|
141
|
+
"""Error decoding lattice alignment results."""
|
|
142
|
+
|
|
143
|
+
def __init__(self, lattice_id: str, original_error: Optional[Exception] = None, **kwargs):
|
|
144
|
+
message = f'Failed to decode lattice alignment results for lattice ID: {colorful.red(lattice_id)}'
|
|
145
|
+
if original_error:
|
|
146
|
+
message += f' - {colorful.red(str(original_error))}'
|
|
147
|
+
|
|
148
|
+
context = kwargs.get('context', {})
|
|
149
|
+
context.update({'lattice_id': lattice_id, 'original_error': str(original_error) if original_error else None})
|
|
150
|
+
kwargs['context'] = context
|
|
151
|
+
super().__init__(message, **kwargs)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class ModelLoadError(LattifAIError):
|
|
155
|
+
"""Error loading AI model."""
|
|
156
|
+
|
|
157
|
+
def __init__(self, model_name: str, original_error: Optional[Exception] = None, **kwargs):
|
|
158
|
+
message = f'Failed to load model: {colorful.red(model_name)}'
|
|
159
|
+
if original_error:
|
|
160
|
+
message += f' - {colorful.red(str(original_error))}'
|
|
161
|
+
|
|
162
|
+
context = kwargs.get('context', {})
|
|
163
|
+
context.update({'model_name': model_name, 'original_error': str(original_error) if original_error else None})
|
|
164
|
+
kwargs['context'] = context
|
|
165
|
+
super().__init__(message, **kwargs)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class DependencyError(LattifAIError):
|
|
169
|
+
"""Error with required dependencies."""
|
|
170
|
+
|
|
171
|
+
def __init__(self, dependency_name: str, install_command: Optional[str] = None, **kwargs):
|
|
172
|
+
message = f'Missing required dependency: {colorful.red(dependency_name)}'
|
|
173
|
+
if install_command:
|
|
174
|
+
message += f'\nPlease install it using: {colorful.yellow(install_command)}'
|
|
175
|
+
|
|
176
|
+
context = kwargs.get('context', {})
|
|
177
|
+
context.update({'dependency_name': dependency_name, 'install_command': install_command})
|
|
178
|
+
kwargs['context'] = context
|
|
179
|
+
super().__init__(message, **kwargs)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class APIError(LattifAIError):
|
|
183
|
+
"""Error communicating with LattifAI API."""
|
|
184
|
+
|
|
185
|
+
def __init__(self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None, **kwargs):
|
|
186
|
+
context = kwargs.get('context', {})
|
|
187
|
+
context.update({'status_code': status_code, 'response_text': response_text})
|
|
188
|
+
kwargs['context'] = context
|
|
189
|
+
super().__init__(message, **kwargs)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class ConfigurationError(LattifAIError):
|
|
193
|
+
"""Error with client configuration."""
|
|
194
|
+
|
|
195
|
+
def __init__(self, config_issue: str, **kwargs):
|
|
196
|
+
message = f'Configuration error: {config_issue}'
|
|
197
|
+
super().__init__(message, **kwargs)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def handle_exception(func):
|
|
201
|
+
"""Decorator to handle exceptions and convert them to LattifAI errors."""
|
|
202
|
+
|
|
203
|
+
def wrapper(*args, **kwargs):
|
|
204
|
+
try:
|
|
205
|
+
return func(*args, **kwargs)
|
|
206
|
+
except LattifAIError:
|
|
207
|
+
# Re-raise LattifAI errors as-is
|
|
208
|
+
raise
|
|
209
|
+
except Exception as e:
|
|
210
|
+
# Convert other exceptions to LattifAI errors
|
|
211
|
+
error_msg = f'Unexpected error in {func.__name__}: {str(e)}'
|
|
212
|
+
context = {
|
|
213
|
+
'function': func.__name__,
|
|
214
|
+
'original_exception': e.__class__.__name__,
|
|
215
|
+
'traceback': traceback.format_exc(),
|
|
216
|
+
}
|
|
217
|
+
raise LattifAIError(error_msg, context=context) from e
|
|
218
|
+
|
|
219
|
+
return wrapper
|
lattifai/tokenizer/tokenizer.py
CHANGED
|
@@ -271,6 +271,8 @@ class LatticeTokenizer:
|
|
|
271
271
|
if response.status_code != 200:
|
|
272
272
|
raise Exception(f'Failed to detokenize lattice: {response.text}')
|
|
273
273
|
result = response.json()
|
|
274
|
+
if not result.get('success'):
|
|
275
|
+
return Exception('Failed to detokenize the alignment results.')
|
|
274
276
|
# if return_details:
|
|
275
277
|
# raise NotImplementedError("return_details is not implemented yet")
|
|
276
278
|
return [Supervision.from_dict(s) for s in result['supervisions']]
|
|
@@ -13,12 +13,23 @@ from lhotse.audio import read_audio
|
|
|
13
13
|
from lhotse.features.kaldi.layers import Wav2LogFilterBank
|
|
14
14
|
from lhotse.utils import Pathlike
|
|
15
15
|
|
|
16
|
+
from lattifai.errors import (
|
|
17
|
+
AlignmentError,
|
|
18
|
+
AudioFormatError,
|
|
19
|
+
AudioLoadError,
|
|
20
|
+
DependencyError,
|
|
21
|
+
ModelLoadError,
|
|
22
|
+
)
|
|
23
|
+
|
|
16
24
|
|
|
17
25
|
class Lattice1AlphaWorker:
|
|
18
26
|
"""Worker for processing audio with LatticeGraph."""
|
|
19
27
|
|
|
20
28
|
def __init__(self, model_path: Pathlike, device: str = 'cpu', num_threads: int = 8) -> None:
|
|
21
|
-
|
|
29
|
+
try:
|
|
30
|
+
self.config = json.load(open(f'{model_path}/config.json'))
|
|
31
|
+
except Exception as e:
|
|
32
|
+
raise ModelLoadError(f'config from {model_path}', original_error=e)
|
|
22
33
|
|
|
23
34
|
# SessionOptions
|
|
24
35
|
sess_options = ort.SessionOptions()
|
|
@@ -33,15 +44,22 @@ class Lattice1AlphaWorker:
|
|
|
33
44
|
elif device.startswith('mps') and ort.get_all_providers().count('MPSExecutionProvider') > 0:
|
|
34
45
|
providers.append('MPSExecutionProvider')
|
|
35
46
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
47
|
+
try:
|
|
48
|
+
self.acoustic_ort = ort.InferenceSession(
|
|
49
|
+
f'{model_path}/acoustic_opt.onnx',
|
|
50
|
+
sess_options,
|
|
51
|
+
providers=providers + ['CoreMLExecutionProvider', 'CPUExecutionProvider'],
|
|
52
|
+
)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
raise ModelLoadError(f'acoustic model from {model_path}', original_error=e)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
config = FbankConfig(num_mel_bins=80, device=device, snip_edges=False)
|
|
58
|
+
config_dict = config.to_dict()
|
|
59
|
+
config_dict.pop('device')
|
|
60
|
+
self.extractor = Wav2LogFilterBank(**config_dict).to(device).eval()
|
|
61
|
+
except Exception as e:
|
|
62
|
+
raise ModelLoadError(f'feature extractor for device {device}', original_error=e)
|
|
45
63
|
|
|
46
64
|
self.device = torch.device(device)
|
|
47
65
|
self.timings = defaultdict(lambda: 0.0)
|
|
@@ -86,45 +104,59 @@ class Lattice1AlphaWorker:
|
|
|
86
104
|
waveform = waveform.transpose(0, 1)
|
|
87
105
|
# average multiple channels
|
|
88
106
|
waveform = np.mean(waveform, axis=0, keepdims=True) # (1, L)
|
|
89
|
-
except Exception:
|
|
107
|
+
except Exception as primary_error:
|
|
90
108
|
# Fallback to PyAV for formats not supported by soundfile
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
109
|
+
try:
|
|
110
|
+
import av
|
|
111
|
+
except ImportError:
|
|
112
|
+
raise DependencyError(
|
|
113
|
+
'av (PyAV)', install_command='pip install av', context={'primary_error': str(primary_error)}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
container = av.open(audio)
|
|
118
|
+
audio_stream = next((s for s in container.streams if s.type == 'audio'), None)
|
|
119
|
+
|
|
120
|
+
if audio_stream is None:
|
|
121
|
+
raise AudioFormatError(str(audio), 'No audio stream found in file')
|
|
122
|
+
|
|
123
|
+
# Resample to target sample rate during decoding
|
|
124
|
+
audio_stream.codec_context.format = av.AudioFormat('flt') # 32-bit float
|
|
125
|
+
|
|
126
|
+
frames = []
|
|
127
|
+
for frame in container.decode(audio_stream):
|
|
128
|
+
# Convert frame to numpy array
|
|
129
|
+
array = frame.to_ndarray()
|
|
130
|
+
# Ensure shape is (channels, samples)
|
|
131
|
+
if array.ndim == 1:
|
|
132
|
+
array = array.reshape(1, -1)
|
|
133
|
+
elif array.ndim == 2 and array.shape[0] > array.shape[1]:
|
|
134
|
+
array = array.T
|
|
135
|
+
frames.append(array)
|
|
136
|
+
|
|
137
|
+
container.close()
|
|
138
|
+
|
|
139
|
+
if not frames:
|
|
140
|
+
raise AudioFormatError(str(audio), 'No audio data found in file')
|
|
141
|
+
|
|
142
|
+
# Concatenate all frames
|
|
143
|
+
waveform = np.concatenate(frames, axis=1)
|
|
144
|
+
# Average multiple channels to mono
|
|
145
|
+
if waveform.shape[0] > 1:
|
|
146
|
+
waveform = np.mean(waveform, axis=0, keepdims=True)
|
|
147
|
+
|
|
148
|
+
sample_rate = audio_stream.codec_context.sample_rate
|
|
149
|
+
except Exception as e:
|
|
150
|
+
raise AudioLoadError(str(audio), original_error=e)
|
|
114
151
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
waveform = np.mean(waveform, axis=0, keepdims=True)
|
|
123
|
-
|
|
124
|
-
sample_rate = audio_stream.codec_context.sample_rate
|
|
152
|
+
try:
|
|
153
|
+
if sample_rate != self.config['sample_rate']:
|
|
154
|
+
waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=1)
|
|
155
|
+
except Exception:
|
|
156
|
+
raise AudioFormatError(
|
|
157
|
+
str(audio), f'Failed to resample from {sample_rate}Hz to {self.config["sample_rate"]}Hz'
|
|
158
|
+
)
|
|
125
159
|
|
|
126
|
-
if sample_rate != self.config['sample_rate']:
|
|
127
|
-
waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=1)
|
|
128
160
|
return torch.from_numpy(waveform).to(self.device) # (1, L)
|
|
129
161
|
|
|
130
162
|
def alignment(
|
|
@@ -138,6 +170,11 @@ class Lattice1AlphaWorker:
|
|
|
138
170
|
|
|
139
171
|
Returns:
|
|
140
172
|
Processed LatticeGraph
|
|
173
|
+
|
|
174
|
+
Raises:
|
|
175
|
+
AudioLoadError: If audio cannot be loaded
|
|
176
|
+
DependencyError: If required dependencies are missing
|
|
177
|
+
AlignmentError: If alignment process fails
|
|
141
178
|
"""
|
|
142
179
|
# load audio
|
|
143
180
|
if isinstance(audio, torch.Tensor):
|
|
@@ -146,21 +183,41 @@ class Lattice1AlphaWorker:
|
|
|
146
183
|
waveform = self.load_audio(audio) # (1, L)
|
|
147
184
|
|
|
148
185
|
_start = time.time()
|
|
149
|
-
|
|
186
|
+
try:
|
|
187
|
+
emission = self.emission(waveform.to(self.device)) # (1, T, vocab_size)
|
|
188
|
+
except Exception as e:
|
|
189
|
+
raise AlignmentError(
|
|
190
|
+
'Failed to compute acoustic features from audio',
|
|
191
|
+
audio_path=str(audio) if not isinstance(audio, torch.Tensor) else 'tensor',
|
|
192
|
+
context={'original_error': str(e)},
|
|
193
|
+
)
|
|
150
194
|
self.timings['emission'] += time.time() - _start
|
|
151
195
|
|
|
152
|
-
|
|
153
|
-
|
|
196
|
+
try:
|
|
197
|
+
import k2
|
|
198
|
+
except ImportError:
|
|
199
|
+
raise DependencyError('k2', install_command='pip install install-k2 && python -m install_k2')
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
from lattifai_core.lattice.decode import align_segments
|
|
203
|
+
except ImportError:
|
|
204
|
+
raise DependencyError('lattifai_core', install_command='Contact support for lattifai_core installation')
|
|
154
205
|
|
|
155
206
|
lattice_graph_str, final_state, acoustic_scale = lattice_graph
|
|
156
207
|
|
|
157
208
|
_start = time.time()
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
209
|
+
try:
|
|
210
|
+
# graph
|
|
211
|
+
decoding_graph = k2.Fsa.from_str(lattice_graph_str, acceptor=False)
|
|
212
|
+
decoding_graph.requires_grad_(False)
|
|
213
|
+
decoding_graph = k2.arc_sort(decoding_graph)
|
|
214
|
+
decoding_graph.skip_id = int(final_state)
|
|
215
|
+
decoding_graph.return_id = int(final_state + 1)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
raise AlignmentError(
|
|
218
|
+
'Failed to create decoding graph from lattice',
|
|
219
|
+
context={'original_error': str(e), 'lattice_graph_length': len(lattice_graph_str)},
|
|
220
|
+
)
|
|
164
221
|
self.timings['decoding_graph'] += time.time() - _start
|
|
165
222
|
|
|
166
223
|
_start = time.time()
|
|
@@ -169,17 +226,24 @@ class Lattice1AlphaWorker:
|
|
|
169
226
|
else:
|
|
170
227
|
device = self.device
|
|
171
228
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
229
|
+
try:
|
|
230
|
+
results, labels = align_segments(
|
|
231
|
+
emission.to(device) * acoustic_scale,
|
|
232
|
+
decoding_graph.to(device),
|
|
233
|
+
torch.tensor([emission.shape[1]], dtype=torch.int32),
|
|
234
|
+
search_beam=100,
|
|
235
|
+
output_beam=40,
|
|
236
|
+
min_active_states=200,
|
|
237
|
+
max_active_states=10000,
|
|
238
|
+
subsampling_factor=1,
|
|
239
|
+
reject_low_confidence=False,
|
|
240
|
+
)
|
|
241
|
+
except Exception as e:
|
|
242
|
+
raise AlignmentError(
|
|
243
|
+
'Failed to perform forced alignment',
|
|
244
|
+
audio_path=str(audio) if not isinstance(audio, torch.Tensor) else 'tensor',
|
|
245
|
+
context={'original_error': str(e), 'emission_shape': list(emission.shape), 'device': str(device)},
|
|
246
|
+
)
|
|
183
247
|
self.timings['align_segments'] += time.time() - _start
|
|
184
248
|
|
|
185
249
|
channel = 0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lattifai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
|
|
5
5
|
Author-email: Lattifai Technologies <tech@lattifai.com>
|
|
6
6
|
Maintainer-email: Lattice <tech@lattifai.com>
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
lattifai/__init__.py,sha256=
|
|
2
|
-
lattifai/base_client.py,sha256=
|
|
3
|
-
lattifai/client.py,sha256=
|
|
1
|
+
lattifai/__init__.py,sha256=moXL8kuk_Xmis2xK5JKpTx8hvcWb__JI75OI73Yf20I,2069
|
|
2
|
+
lattifai/base_client.py,sha256=q0l-hiAQKTWQiRXiwaHsCpSpieK_PrWRk7kmDtEfTig,3346
|
|
3
|
+
lattifai/client.py,sha256=XJAaNI02swLdiHG6lRAgh3SHso-FI_tJAPFENttzO10,8341
|
|
4
|
+
lattifai/errors.py,sha256=BAN57q_PE6HTQJW4mM6X0k-CZOzemsIQBGo5xmJDSsE,8735
|
|
4
5
|
lattifai/bin/__init__.py,sha256=7YhmtEM8kbxJtz2-KIskvpLKBZAvkMSceVx8z4fkgQ4,61
|
|
5
6
|
lattifai/bin/align.py,sha256=nQs901SDYmxyH2AXBtjgZGzrpwLaxANQRYP49Bd1AWo,1669
|
|
6
7
|
lattifai/bin/cli_base.py,sha256=y535WXDRX8StloFn9icpfw7nQt0JxuWBIuPMnRxAYy8,392
|
|
@@ -11,12 +12,12 @@ lattifai/io/supervision.py,sha256=5UfSsgBhXoDU3-6drDtoD7y8HIiA4xRKZnbOKgeejwM,35
|
|
|
11
12
|
lattifai/io/writer.py,sha256=1eAEFLlL8kricxRDPFBtVmeC4IiFyFnjbWXvw0VU-q4,2036
|
|
12
13
|
lattifai/tokenizer/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvBfs,72
|
|
13
14
|
lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
|
|
14
|
-
lattifai/tokenizer/tokenizer.py,sha256=
|
|
15
|
+
lattifai/tokenizer/tokenizer.py,sha256=0UxlEIkOq9u6e8RMtlYOTVRjUxExo5r8LXpkzedlJNE,11606
|
|
15
16
|
lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
|
|
16
|
-
lattifai/workers/lattice1_alpha.py,sha256=
|
|
17
|
-
lattifai-0.2.
|
|
18
|
-
lattifai-0.2.
|
|
19
|
-
lattifai-0.2.
|
|
20
|
-
lattifai-0.2.
|
|
21
|
-
lattifai-0.2.
|
|
22
|
-
lattifai-0.2.
|
|
17
|
+
lattifai/workers/lattice1_alpha.py,sha256=1lCq0-bgWMXvYslAbCTFgHC0p6UWPto1y0wkTw9WrmQ,10177
|
|
18
|
+
lattifai-0.2.5.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
|
|
19
|
+
lattifai-0.2.5.dist-info/METADATA,sha256=-EEq4g932BbO1YlMKWc-rz4tpy-rr5cHb0GSGdKdfSI,10989
|
|
20
|
+
lattifai-0.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
21
|
+
lattifai-0.2.5.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
|
|
22
|
+
lattifai-0.2.5.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
|
|
23
|
+
lattifai-0.2.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|