lattifai 0.2.2__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lattifai/__init__.py CHANGED
@@ -2,7 +2,21 @@ import os
2
2
  import sys
3
3
  import warnings
4
4
 
5
- from .base_client import LattifAIError
5
+ from .errors import (
6
+ AlignmentError,
7
+ APIError,
8
+ AudioFormatError,
9
+ AudioLoadError,
10
+ AudioProcessingError,
11
+ ConfigurationError,
12
+ DependencyError,
13
+ LatticeDecodingError,
14
+ LatticeEncodingError,
15
+ LattifAIError,
16
+ ModelLoadError,
17
+ SubtitleParseError,
18
+ SubtitleProcessingError,
19
+ )
6
20
  from .io import SubtitleIO
7
21
 
8
22
  try:
@@ -53,6 +67,18 @@ def __getattr__(name):
53
67
  __all__ = [
54
68
  'LattifAI', # noqa: F822
55
69
  'LattifAIError',
70
+ 'AudioProcessingError',
71
+ 'AudioLoadError',
72
+ 'AudioFormatError',
73
+ 'SubtitleProcessingError',
74
+ 'SubtitleParseError',
75
+ 'AlignmentError',
76
+ 'LatticeEncodingError',
77
+ 'LatticeDecodingError',
78
+ 'ModelLoadError',
79
+ 'DependencyError',
80
+ 'APIError',
81
+ 'ConfigurationError',
56
82
  'SubtitleIO',
57
83
  '__version__',
58
84
  ]
lattifai/base_client.py CHANGED
@@ -6,11 +6,8 @@ from typing import Any, Awaitable, Callable, Dict, Optional, Union # noqa: F401
6
6
 
7
7
  import httpx
8
8
 
9
-
10
- class LattifAIError(Exception):
11
- """Base exception for LattifAI errors."""
12
-
13
- pass
9
+ # Import from errors module for consistency
10
+ from .errors import APIError, ConfigurationError, LattifAIError
14
11
 
15
12
 
16
13
  class BaseAPIClient(ABC):
@@ -28,7 +25,7 @@ class BaseAPIClient(ABC):
28
25
  if api_key is None:
29
26
  api_key = os.environ.get('LATTIFAI_API_KEY')
30
27
  if api_key is None:
31
- raise LattifAIError(
28
+ raise ConfigurationError(
32
29
  'The api_key client option must be set either by passing api_key to the client '
33
30
  'or by setting the LATTIFAI_API_KEY environment variable'
34
31
  )
lattifai/client.py CHANGED
@@ -9,7 +9,17 @@ import colorful
9
9
  from dotenv import load_dotenv
10
10
  from lhotse.utils import Pathlike
11
11
 
12
- from lattifai.base_client import AsyncAPIClient, LattifAIError, SyncAPIClient
12
+ from lattifai.base_client import AsyncAPIClient, SyncAPIClient
13
+ from lattifai.errors import (
14
+ AlignmentError,
15
+ ConfigurationError,
16
+ LatticeDecodingError,
17
+ LatticeEncodingError,
18
+ LattifAIError,
19
+ ModelLoadError,
20
+ SubtitleProcessingError,
21
+ handle_exception,
22
+ )
13
23
  from lattifai.io import SubtitleFormat, SubtitleIO
14
24
  from lattifai.tokenizer import LatticeTokenizer
15
25
  from lattifai.workers import Lattice1AlphaWorker
@@ -34,7 +44,7 @@ class LattifAI(SyncAPIClient):
34
44
  if api_key is None:
35
45
  api_key = os.environ.get('LATTIFAI_API_KEY')
36
46
  if api_key is None:
37
- raise LattifAIError(
47
+ raise ConfigurationError(
38
48
  'The api_key client option must be set either by passing api_key to the client '
39
49
  'or by setting the LATTIFAI_API_KEY environment variable'
40
50
  )
@@ -60,8 +70,13 @@ class LattifAI(SyncAPIClient):
60
70
  try:
61
71
  model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
62
72
  except LocalEntryNotFoundError:
63
- os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
64
- model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
73
+ try:
74
+ os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
75
+ model_path = snapshot_download(repo_id=model_name_or_path, repo_type='model')
76
+ except Exception as e:
77
+ raise ModelLoadError(model_name_or_path, original_error=e)
78
+ except Exception as e:
79
+ raise ModelLoadError(model_name_or_path, original_error=e)
65
80
  else:
66
81
  model_path = model_name_or_path
67
82
 
@@ -75,12 +90,19 @@ class LattifAI(SyncAPIClient):
75
90
  elif torch.cuda.is_available():
76
91
  device = 'cuda'
77
92
 
78
- self.tokenizer = LatticeTokenizer.from_pretrained(
79
- client_wrapper=self,
80
- model_path=model_path,
81
- device=device,
82
- )
83
- self.worker = Lattice1AlphaWorker(model_path, device=device, num_threads=8)
93
+ try:
94
+ self.tokenizer = LatticeTokenizer.from_pretrained(
95
+ client_wrapper=self,
96
+ model_path=model_path,
97
+ device=device,
98
+ )
99
+ except Exception as e:
100
+ raise ModelLoadError(f'tokenizer from {model_path}', original_error=e)
101
+
102
+ try:
103
+ self.worker = Lattice1AlphaWorker(model_path, device=device, num_threads=8)
104
+ except Exception as e:
105
+ raise ModelLoadError(f'worker from {model_path}', original_error=e)
84
106
 
85
107
  def alignment(
86
108
  self,
@@ -95,37 +117,88 @@ class LattifAI(SyncAPIClient):
95
117
  Args:
96
118
  audio: Audio file path
97
119
  subtitle: Subtitle/Text to align with audio
98
- export_format: Output format (srt, vtt, ass, txt)
120
+ format: Output format (srt, vtt, ass, txt)
121
+ split_sentence: Whether to split sentences during processing
122
+ output_subtitle_path: Path to save output file
99
123
 
100
124
  Returns:
101
125
  Aligned subtitles in specified format
126
+
127
+ Raises:
128
+ SubtitleProcessingError: If subtitle file cannot be parsed
129
+ LatticeEncodingError: If lattice graph generation fails
130
+ AlignmentError: If audio alignment fails
131
+ LatticeDecodingError: If lattice decoding fails
102
132
  """
103
- # step1: parse text or subtitles
104
- print(colorful.cyan(f'📖 Step 1: Reading subtitle file from {subtitle}'))
105
- supervisions = SubtitleIO.read(subtitle, format=format)
106
- print(colorful.green(f' ✓ Parsed {len(supervisions)} subtitle segments'))
107
-
108
- # step2: make lattice by call Lattifai API
109
- print(colorful.cyan('🔗 Step 2: Creating lattice graph from text'))
110
- lattice_id, lattice_graph = self.tokenizer.tokenize(supervisions, split_sentence=split_sentence)
111
- print(colorful.green(f' Generated lattice graph with ID: {lattice_id}'))
112
-
113
- # step3: align audio with text
114
- print(colorful.cyan(f'🎵 Step 3: Performing alignment on audio file: {audio}'))
115
- lattice_results = self.worker.alignment(audio, lattice_graph)
116
- print(colorful.green(' ✓ Alignment completed successfully'))
117
-
118
- # step4: decode the lattice paths
119
- print(colorful.cyan('🔍 Step 4: Decoding lattice paths to final alignments'))
120
- alignments = self.tokenizer.detokenize(lattice_id, lattice_results)
121
- print(colorful.green(f' ✓ Decoded {len(alignments)} aligned segments'))
122
-
123
- # step5: export alignments to target format
124
- if output_subtitle_path:
125
- SubtitleIO.write(alignments, output_path=output_subtitle_path)
126
- print(colorful.green(f'🎉🎉🎉🎉🎉 Subtitle file written to: {output_subtitle_path}'))
127
-
128
- return output_subtitle_path or alignments
133
+ try:
134
+ # step1: parse text or subtitles
135
+ print(colorful.cyan(f'📖 Step 1: Reading subtitle file from {subtitle}'))
136
+ try:
137
+ supervisions = SubtitleIO.read(subtitle, format=format)
138
+ print(colorful.green(f' ✓ Parsed {len(supervisions)} subtitle segments'))
139
+ except Exception as e:
140
+ raise SubtitleProcessingError(
141
+ f'Failed to parse subtitle file: {subtitle}',
142
+ subtitle_path=str(subtitle),
143
+ context={'original_error': str(e)},
144
+ )
145
+
146
+ # step2: make lattice by call Lattifai API
147
+ print(colorful.cyan('🔗 Step 2: Creating lattice graph from text'))
148
+ try:
149
+ lattice_id, lattice_graph = self.tokenizer.tokenize(supervisions, split_sentence=split_sentence)
150
+ print(colorful.green(f' ✓ Generated lattice graph with ID: {lattice_id}'))
151
+ except Exception as e:
152
+ text_content = ' '.join([sup.text for sup in supervisions]) if supervisions else ''
153
+ raise LatticeEncodingError(text_content, original_error=e)
154
+
155
+ # step3: align audio with text
156
+ print(colorful.cyan(f'🎵 Step 3: Performing alignment on audio file: {audio}'))
157
+ try:
158
+ lattice_results = self.worker.alignment(audio, lattice_graph)
159
+ print(colorful.green(' ✓ Alignment completed successfully'))
160
+ except Exception as e:
161
+ raise AlignmentError(
162
+ f'Audio alignment failed for {audio}',
163
+ audio_path=str(audio),
164
+ subtitle_path=str(subtitle),
165
+ context={'original_error': str(e)},
166
+ )
167
+
168
+ # step4: decode the lattice paths
169
+ print(colorful.cyan('🔍 Step 4: Decoding lattice paths to final alignments'))
170
+ try:
171
+ alignments = self.tokenizer.detokenize(lattice_id, lattice_results)
172
+ print(colorful.green(f' ✓ Decoded {len(alignments)} aligned segments'))
173
+ except Exception as e:
174
+ print(colorful.red(' x Failed to decode lattice alignment results'))
175
+ raise LatticeDecodingError(lattice_id, original_error=e)
176
+
177
+ # step5: export alignments to target format
178
+ if output_subtitle_path:
179
+ try:
180
+ SubtitleIO.write(alignments, output_path=output_subtitle_path)
181
+ print(colorful.green(f'🎉🎉🎉🎉🎉 Subtitle file written to: {output_subtitle_path}'))
182
+ except Exception as e:
183
+ raise SubtitleProcessingError(
184
+ f'Failed to write output file: {output_subtitle_path}',
185
+ subtitle_path=str(output_subtitle_path),
186
+ context={'original_error': str(e)},
187
+ )
188
+
189
+ return output_subtitle_path or alignments
190
+
191
+ except (SubtitleProcessingError, LatticeEncodingError, AlignmentError, LatticeDecodingError):
192
+ # Re-raise our specific errors as-is
193
+ raise
194
+ except Exception as e:
195
+ # Catch any unexpected errors and wrap them
196
+ raise AlignmentError(
197
+ 'Unexpected error during alignment process',
198
+ audio_path=str(audio),
199
+ subtitle_path=str(subtitle),
200
+ context={'original_error': str(e), 'error_type': e.__class__.__name__},
201
+ )
129
202
 
130
203
 
131
204
  if __name__ == '__main__':
lattifai/errors.py ADDED
@@ -0,0 +1,219 @@
1
+ """Error handling and exception classes for LattifAI SDK."""
2
+
3
+ import sys
4
+ import traceback
5
+ from typing import Any, Dict, Optional
6
+
7
+ import colorful
8
+
9
+
10
+ class LattifAIError(Exception):
11
+ """Base exception for LattifAI errors."""
12
+
13
+ def __init__(self, message: str, error_code: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
14
+ """Initialize LattifAI error.
15
+
16
+ Args:
17
+ message: Error message
18
+ error_code: Optional error code for categorization
19
+ context: Optional context information about the error
20
+ """
21
+ super().__init__(message)
22
+ self.message = message
23
+ self.error_code = error_code or self.__class__.__name__
24
+ self.context = context or {}
25
+
26
+ def get_support_info(self) -> str:
27
+ """Get support information for users."""
28
+ return (
29
+ f'\n\n{colorful.green("🔧 Need help? Here are two ways to get support:")}\n'
30
+ f' 1. 📝 Create a GitHub issue: {colorful.green("https://github.com/lattifai/lattifai-python/issues")}\n'
31
+ ' Please include:\n'
32
+ ' - Your audio file format and duration\n'
33
+ " - The text/subtitle content you're trying to align\n"
34
+ ' - This error message and stack trace\n'
35
+ f' 2. 💬 Join our Discord community: {colorful.green("https://discord.gg/vzmTzzZgNu")}\n'
36
+ ' Our team and community can help you troubleshoot\n'
37
+ )
38
+
39
+ def __str__(self) -> str:
40
+ """Return formatted error message with support information."""
41
+ base_message = f'{colorful.red(f"[{self.error_code}] {self.message}")}'
42
+ if self.context:
43
+ context_str = f'\n{colorful.yellow("Context:")} ' + ', '.join(f'{k}={v}' for k, v in self.context.items())
44
+ base_message += context_str
45
+ return base_message + self.get_support_info()
46
+
47
+
48
+ class AudioProcessingError(LattifAIError):
49
+ """Error during audio processing operations."""
50
+
51
+ def __init__(self, message: str, audio_path: Optional[str] = None, **kwargs):
52
+ context = kwargs.get('context', {})
53
+ if audio_path:
54
+ context['audio_path'] = audio_path
55
+ kwargs['context'] = context
56
+ super().__init__(message, **kwargs)
57
+
58
+
59
+ class AudioLoadError(AudioProcessingError):
60
+ """Error loading or reading audio file."""
61
+
62
+ def __init__(self, audio_path: str, original_error: Optional[Exception] = None, **kwargs):
63
+ message = f'Failed to load audio file: {colorful.red(audio_path)}'
64
+ if original_error:
65
+ message += f' - {colorful.red(str(original_error))}'
66
+
67
+ context = kwargs.get('context', {})
68
+ context.update({'audio_path': audio_path, 'original_error': str(original_error) if original_error else None})
69
+ kwargs['context'] = context
70
+
71
+ super().__init__(message, audio_path=audio_path, **kwargs)
72
+
73
+
74
+ class AudioFormatError(AudioProcessingError):
75
+ """Error with audio format or codec."""
76
+
77
+ def __init__(self, audio_path: str, format_issue: str, **kwargs):
78
+ message = f'Audio format error for {colorful.red(audio_path)}: {colorful.red(format_issue)}'
79
+ context = kwargs.get('context', {})
80
+ context.update({'audio_path': audio_path, 'format_issue': format_issue})
81
+ kwargs['context'] = context
82
+ super().__init__(message, audio_path=audio_path, **kwargs)
83
+
84
+
85
+ class SubtitleProcessingError(LattifAIError):
86
+ """Error during subtitle/text processing operations."""
87
+
88
+ def __init__(self, message: str, subtitle_path: Optional[str] = None, **kwargs):
89
+ context = kwargs.get('context', {})
90
+ if subtitle_path:
91
+ context['subtitle_path'] = subtitle_path
92
+ kwargs['context'] = context
93
+ super().__init__(message, **kwargs)
94
+
95
+
96
+ class SubtitleParseError(SubtitleProcessingError):
97
+ """Error parsing subtitle or text file."""
98
+
99
+ def __init__(self, subtitle_path: str, parse_issue: str, **kwargs):
100
+ message = f'Failed to parse subtitle file {subtitle_path}: {parse_issue}'
101
+ context = kwargs.get('context', {})
102
+ context.update({'subtitle_path': subtitle_path, 'parse_issue': parse_issue})
103
+ kwargs['context'] = context
104
+ super().__init__(message, subtitle_path=subtitle_path, **kwargs)
105
+
106
+
107
+ class AlignmentError(LattifAIError):
108
+ """Error during audio-text alignment process."""
109
+
110
+ def __init__(self, message: str, audio_path: Optional[str] = None, subtitle_path: Optional[str] = None, **kwargs):
111
+ context = kwargs.get('context', {})
112
+ if audio_path:
113
+ context['audio_path'] = audio_path
114
+ if subtitle_path:
115
+ context['subtitle_path'] = subtitle_path
116
+ kwargs['context'] = context
117
+ super().__init__(message, **kwargs)
118
+
119
+
120
+ class LatticeEncodingError(AlignmentError):
121
+ """Error generating lattice graph from text."""
122
+
123
+ def __init__(self, text_content: str, original_error: Optional[Exception] = None, **kwargs):
124
+ message = 'Failed to generate lattice graph from text'
125
+ if original_error:
126
+ message += f': {colorful.red(str(original_error))}'
127
+
128
+ context = kwargs.get('context', {})
129
+ context.update(
130
+ {
131
+ 'text_content_length': len(text_content),
132
+ 'text_preview': text_content[:100] + '...' if len(text_content) > 100 else text_content,
133
+ 'original_error': str(original_error) if original_error else None,
134
+ }
135
+ )
136
+ kwargs['context'] = context
137
+ super().__init__(message, **kwargs)
138
+
139
+
140
+ class LatticeDecodingError(AlignmentError):
141
+ """Error decoding lattice alignment results."""
142
+
143
+ def __init__(self, lattice_id: str, original_error: Optional[Exception] = None, **kwargs):
144
+ message = f'Failed to decode lattice alignment results for lattice ID: {colorful.red(lattice_id)}'
145
+ if original_error:
146
+ message += f' - {colorful.red(str(original_error))}'
147
+
148
+ context = kwargs.get('context', {})
149
+ context.update({'lattice_id': lattice_id, 'original_error': str(original_error) if original_error else None})
150
+ kwargs['context'] = context
151
+ super().__init__(message, **kwargs)
152
+
153
+
154
+ class ModelLoadError(LattifAIError):
155
+ """Error loading AI model."""
156
+
157
+ def __init__(self, model_name: str, original_error: Optional[Exception] = None, **kwargs):
158
+ message = f'Failed to load model: {colorful.red(model_name)}'
159
+ if original_error:
160
+ message += f' - {colorful.red(str(original_error))}'
161
+
162
+ context = kwargs.get('context', {})
163
+ context.update({'model_name': model_name, 'original_error': str(original_error) if original_error else None})
164
+ kwargs['context'] = context
165
+ super().__init__(message, **kwargs)
166
+
167
+
168
+ class DependencyError(LattifAIError):
169
+ """Error with required dependencies."""
170
+
171
+ def __init__(self, dependency_name: str, install_command: Optional[str] = None, **kwargs):
172
+ message = f'Missing required dependency: {colorful.red(dependency_name)}'
173
+ if install_command:
174
+ message += f'\nPlease install it using: {colorful.yellow(install_command)}'
175
+
176
+ context = kwargs.get('context', {})
177
+ context.update({'dependency_name': dependency_name, 'install_command': install_command})
178
+ kwargs['context'] = context
179
+ super().__init__(message, **kwargs)
180
+
181
+
182
+ class APIError(LattifAIError):
183
+ """Error communicating with LattifAI API."""
184
+
185
+ def __init__(self, message: str, status_code: Optional[int] = None, response_text: Optional[str] = None, **kwargs):
186
+ context = kwargs.get('context', {})
187
+ context.update({'status_code': status_code, 'response_text': response_text})
188
+ kwargs['context'] = context
189
+ super().__init__(message, **kwargs)
190
+
191
+
192
+ class ConfigurationError(LattifAIError):
193
+ """Error with client configuration."""
194
+
195
+ def __init__(self, config_issue: str, **kwargs):
196
+ message = f'Configuration error: {config_issue}'
197
+ super().__init__(message, **kwargs)
198
+
199
+
200
+ def handle_exception(func):
201
+ """Decorator to handle exceptions and convert them to LattifAI errors."""
202
+
203
+ def wrapper(*args, **kwargs):
204
+ try:
205
+ return func(*args, **kwargs)
206
+ except LattifAIError:
207
+ # Re-raise LattifAI errors as-is
208
+ raise
209
+ except Exception as e:
210
+ # Convert other exceptions to LattifAI errors
211
+ error_msg = f'Unexpected error in {func.__name__}: {str(e)}'
212
+ context = {
213
+ 'function': func.__name__,
214
+ 'original_exception': e.__class__.__name__,
215
+ 'traceback': traceback.format_exc(),
216
+ }
217
+ raise LattifAIError(error_msg, context=context) from e
218
+
219
+ return wrapper
@@ -271,6 +271,8 @@ class LatticeTokenizer:
271
271
  if response.status_code != 200:
272
272
  raise Exception(f'Failed to detokenize lattice: {response.text}')
273
273
  result = response.json()
274
+ if not result.get('success'):
275
+ return Exception('Failed to detokenize the alignment results.')
274
276
  # if return_details:
275
277
  # raise NotImplementedError("return_details is not implemented yet")
276
278
  return [Supervision.from_dict(s) for s in result['supervisions']]
@@ -9,15 +9,27 @@ import resampy
9
9
  import soundfile as sf
10
10
  import torch
11
11
  from lhotse import FbankConfig
12
+ from lhotse.audio import read_audio
12
13
  from lhotse.features.kaldi.layers import Wav2LogFilterBank
13
14
  from lhotse.utils import Pathlike
14
15
 
16
+ from lattifai.errors import (
17
+ AlignmentError,
18
+ AudioFormatError,
19
+ AudioLoadError,
20
+ DependencyError,
21
+ ModelLoadError,
22
+ )
23
+
15
24
 
16
25
  class Lattice1AlphaWorker:
17
26
  """Worker for processing audio with LatticeGraph."""
18
27
 
19
28
  def __init__(self, model_path: Pathlike, device: str = 'cpu', num_threads: int = 8) -> None:
20
- self.config = json.load(open(f'{model_path}/config.json'))
29
+ try:
30
+ self.config = json.load(open(f'{model_path}/config.json'))
31
+ except Exception as e:
32
+ raise ModelLoadError(f'config from {model_path}', original_error=e)
21
33
 
22
34
  # SessionOptions
23
35
  sess_options = ort.SessionOptions()
@@ -32,15 +44,22 @@ class Lattice1AlphaWorker:
32
44
  elif device.startswith('mps') and ort.get_all_providers().count('MPSExecutionProvider') > 0:
33
45
  providers.append('MPSExecutionProvider')
34
46
 
35
- self.acoustic_ort = ort.InferenceSession(
36
- f'{model_path}/acoustic_opt.onnx',
37
- sess_options,
38
- providers=providers + ['CoreMLExecutionProvider', 'CPUExecutionProvider'],
39
- )
40
- config = FbankConfig(num_mel_bins=80, device=device, snip_edges=False)
41
- config_dict = config.to_dict()
42
- config_dict.pop('device')
43
- self.extractor = Wav2LogFilterBank(**config_dict).to(device).eval()
47
+ try:
48
+ self.acoustic_ort = ort.InferenceSession(
49
+ f'{model_path}/acoustic_opt.onnx',
50
+ sess_options,
51
+ providers=providers + ['CoreMLExecutionProvider', 'CPUExecutionProvider'],
52
+ )
53
+ except Exception as e:
54
+ raise ModelLoadError(f'acoustic model from {model_path}', original_error=e)
55
+
56
+ try:
57
+ config = FbankConfig(num_mel_bins=80, device=device, snip_edges=False)
58
+ config_dict = config.to_dict()
59
+ config_dict.pop('device')
60
+ self.extractor = Wav2LogFilterBank(**config_dict).to(device).eval()
61
+ except Exception as e:
62
+ raise ModelLoadError(f'feature extractor for device {device}', original_error=e)
44
63
 
45
64
  self.device = torch.device(device)
46
65
  self.timings = defaultdict(lambda: 0.0)
@@ -76,13 +95,69 @@ class Lattice1AlphaWorker:
76
95
 
77
96
  def load_audio(self, audio: Union[Pathlike, BinaryIO]) -> Tuple[torch.Tensor, int]:
78
97
  # load audio
79
- waveform, sample_rate = sf.read(audio, always_2d=True, dtype='float32')
80
- if waveform.shape[1] > 1: # TODO: support choose channel
81
- waveform = np.mean(waveform, axis=1, keepdims=True)
82
- if sample_rate != self.config['sample_rate']:
83
- waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=0)
84
- waveform = torch.from_numpy(waveform.T).to(self.device) # (1, L)
85
- return waveform
98
+ try:
99
+ waveform, sample_rate = read_audio(audio) # numpy array
100
+ if len(waveform.shape) == 1:
101
+ waveform = waveform.reshape([1, -1]) # (1, L)
102
+ else: # make sure channel first
103
+ if waveform.shape[0] > waveform.shape[1]:
104
+ waveform = waveform.transpose(0, 1)
105
+ # average multiple channels
106
+ waveform = np.mean(waveform, axis=0, keepdims=True) # (1, L)
107
+ except Exception as primary_error:
108
+ # Fallback to PyAV for formats not supported by soundfile
109
+ try:
110
+ import av
111
+ except ImportError:
112
+ raise DependencyError(
113
+ 'av (PyAV)', install_command='pip install av', context={'primary_error': str(primary_error)}
114
+ )
115
+
116
+ try:
117
+ container = av.open(audio)
118
+ audio_stream = next((s for s in container.streams if s.type == 'audio'), None)
119
+
120
+ if audio_stream is None:
121
+ raise AudioFormatError(str(audio), 'No audio stream found in file')
122
+
123
+ # Resample to target sample rate during decoding
124
+ audio_stream.codec_context.format = av.AudioFormat('flt') # 32-bit float
125
+
126
+ frames = []
127
+ for frame in container.decode(audio_stream):
128
+ # Convert frame to numpy array
129
+ array = frame.to_ndarray()
130
+ # Ensure shape is (channels, samples)
131
+ if array.ndim == 1:
132
+ array = array.reshape(1, -1)
133
+ elif array.ndim == 2 and array.shape[0] > array.shape[1]:
134
+ array = array.T
135
+ frames.append(array)
136
+
137
+ container.close()
138
+
139
+ if not frames:
140
+ raise AudioFormatError(str(audio), 'No audio data found in file')
141
+
142
+ # Concatenate all frames
143
+ waveform = np.concatenate(frames, axis=1)
144
+ # Average multiple channels to mono
145
+ if waveform.shape[0] > 1:
146
+ waveform = np.mean(waveform, axis=0, keepdims=True)
147
+
148
+ sample_rate = audio_stream.codec_context.sample_rate
149
+ except Exception as e:
150
+ raise AudioLoadError(str(audio), original_error=e)
151
+
152
+ try:
153
+ if sample_rate != self.config['sample_rate']:
154
+ waveform = resampy.resample(waveform, sample_rate, self.config['sample_rate'], axis=1)
155
+ except Exception:
156
+ raise AudioFormatError(
157
+ str(audio), f'Failed to resample from {sample_rate}Hz to {self.config["sample_rate"]}Hz'
158
+ )
159
+
160
+ return torch.from_numpy(waveform).to(self.device) # (1, L)
86
161
 
87
162
  def alignment(
88
163
  self, audio: Union[Union[Pathlike, BinaryIO], torch.tensor], lattice_graph: Tuple[str, int, float]
@@ -95,6 +170,11 @@ class Lattice1AlphaWorker:
95
170
 
96
171
  Returns:
97
172
  Processed LatticeGraph
173
+
174
+ Raises:
175
+ AudioLoadError: If audio cannot be loaded
176
+ DependencyError: If required dependencies are missing
177
+ AlignmentError: If alignment process fails
98
178
  """
99
179
  # load audio
100
180
  if isinstance(audio, torch.Tensor):
@@ -103,21 +183,41 @@ class Lattice1AlphaWorker:
103
183
  waveform = self.load_audio(audio) # (1, L)
104
184
 
105
185
  _start = time.time()
106
- emission = self.emission(waveform.to(self.device)) # (1, T, vocab_size)
186
+ try:
187
+ emission = self.emission(waveform.to(self.device)) # (1, T, vocab_size)
188
+ except Exception as e:
189
+ raise AlignmentError(
190
+ 'Failed to compute acoustic features from audio',
191
+ audio_path=str(audio) if not isinstance(audio, torch.Tensor) else 'tensor',
192
+ context={'original_error': str(e)},
193
+ )
107
194
  self.timings['emission'] += time.time() - _start
108
195
 
109
- import k2
110
- from lattifai_core.lattice.decode import align_segments
196
+ try:
197
+ import k2
198
+ except ImportError:
199
+ raise DependencyError('k2', install_command='pip install install-k2 && python -m install_k2')
200
+
201
+ try:
202
+ from lattifai_core.lattice.decode import align_segments
203
+ except ImportError:
204
+ raise DependencyError('lattifai_core', install_command='Contact support for lattifai_core installation')
111
205
 
112
206
  lattice_graph_str, final_state, acoustic_scale = lattice_graph
113
207
 
114
208
  _start = time.time()
115
- # graph
116
- decoding_graph = k2.Fsa.from_str(lattice_graph_str, acceptor=False)
117
- decoding_graph.requires_grad_(False)
118
- decoding_graph = k2.arc_sort(decoding_graph)
119
- decoding_graph.skip_id = int(final_state)
120
- decoding_graph.return_id = int(final_state + 1)
209
+ try:
210
+ # graph
211
+ decoding_graph = k2.Fsa.from_str(lattice_graph_str, acceptor=False)
212
+ decoding_graph.requires_grad_(False)
213
+ decoding_graph = k2.arc_sort(decoding_graph)
214
+ decoding_graph.skip_id = int(final_state)
215
+ decoding_graph.return_id = int(final_state + 1)
216
+ except Exception as e:
217
+ raise AlignmentError(
218
+ 'Failed to create decoding graph from lattice',
219
+ context={'original_error': str(e), 'lattice_graph_length': len(lattice_graph_str)},
220
+ )
121
221
  self.timings['decoding_graph'] += time.time() - _start
122
222
 
123
223
  _start = time.time()
@@ -126,17 +226,24 @@ class Lattice1AlphaWorker:
126
226
  else:
127
227
  device = self.device
128
228
 
129
- results, labels = align_segments(
130
- emission.to(device) * acoustic_scale,
131
- decoding_graph.to(device),
132
- torch.tensor([emission.shape[1]], dtype=torch.int32),
133
- search_beam=100,
134
- output_beam=40,
135
- min_active_states=200,
136
- max_active_states=10000,
137
- subsampling_factor=1,
138
- reject_low_confidence=False,
139
- )
229
+ try:
230
+ results, labels = align_segments(
231
+ emission.to(device) * acoustic_scale,
232
+ decoding_graph.to(device),
233
+ torch.tensor([emission.shape[1]], dtype=torch.int32),
234
+ search_beam=100,
235
+ output_beam=40,
236
+ min_active_states=200,
237
+ max_active_states=10000,
238
+ subsampling_factor=1,
239
+ reject_low_confidence=False,
240
+ )
241
+ except Exception as e:
242
+ raise AlignmentError(
243
+ 'Failed to perform forced alignment',
244
+ audio_path=str(audio) if not isinstance(audio, torch.Tensor) else 'tensor',
245
+ context={'original_error': str(e), 'emission_shape': list(emission.shape), 'device': str(device)},
246
+ )
140
247
  self.timings['align_segments'] += time.time() - _start
141
248
 
142
249
  channel = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lattifai
3
- Version: 0.2.2
3
+ Version: 0.2.5
4
4
  Summary: Lattifai Python SDK: Seamless Integration with Lattifai's Speech and Video AI Services
5
5
  Author-email: Lattifai Technologies <tech@lattifai.com>
6
6
  Maintainer-email: Lattice <tech@lattifai.com>
@@ -61,6 +61,7 @@ Requires-Dist: onnxruntime
61
61
  Requires-Dist: resampy
62
62
  Requires-Dist: g2p-phonemizer==0.1.1
63
63
  Requires-Dist: wtpsplit>=2.1.6
64
+ Requires-Dist: av
64
65
  Provides-Extra: numpy
65
66
  Requires-Dist: numpy; extra == "numpy"
66
67
  Provides-Extra: test
@@ -1,6 +1,7 @@
1
- lattifai/__init__.py,sha256=JXUg0dT74UyAtKOjewRs9ijr5sl9SYsc6oU_WItY314,1497
2
- lattifai/base_client.py,sha256=ktFtATjL9pLSJUD-VqeJKA1FHkrsGHX7Uq_x00H7gO8,3322
3
- lattifai/client.py,sha256=QXbdTuDA5Aap2udu4iig7CVxlgwOIrydpuLlVASs0aA,5145
1
+ lattifai/__init__.py,sha256=moXL8kuk_Xmis2xK5JKpTx8hvcWb__JI75OI73Yf20I,2069
2
+ lattifai/base_client.py,sha256=q0l-hiAQKTWQiRXiwaHsCpSpieK_PrWRk7kmDtEfTig,3346
3
+ lattifai/client.py,sha256=XJAaNI02swLdiHG6lRAgh3SHso-FI_tJAPFENttzO10,8341
4
+ lattifai/errors.py,sha256=BAN57q_PE6HTQJW4mM6X0k-CZOzemsIQBGo5xmJDSsE,8735
4
5
  lattifai/bin/__init__.py,sha256=7YhmtEM8kbxJtz2-KIskvpLKBZAvkMSceVx8z4fkgQ4,61
5
6
  lattifai/bin/align.py,sha256=nQs901SDYmxyH2AXBtjgZGzrpwLaxANQRYP49Bd1AWo,1669
6
7
  lattifai/bin/cli_base.py,sha256=y535WXDRX8StloFn9icpfw7nQt0JxuWBIuPMnRxAYy8,392
@@ -11,12 +12,12 @@ lattifai/io/supervision.py,sha256=5UfSsgBhXoDU3-6drDtoD7y8HIiA4xRKZnbOKgeejwM,35
11
12
  lattifai/io/writer.py,sha256=1eAEFLlL8kricxRDPFBtVmeC4IiFyFnjbWXvw0VU-q4,2036
12
13
  lattifai/tokenizer/__init__.py,sha256=aqv44PDtq6g3oFFKW_l4HSR5ywT5W8eP1dHHywIvBfs,72
13
14
  lattifai/tokenizer/phonemizer.py,sha256=SfRi1KIMpmaao6OVmR1h_I_3QU-vrE6D5bh72Afg5XM,1759
14
- lattifai/tokenizer/tokenizer.py,sha256=Yuo0pLPQnF2uX0Fm5g8i5vtcADn7GeLpSqdGpMJgTww,11492
15
+ lattifai/tokenizer/tokenizer.py,sha256=0UxlEIkOq9u6e8RMtlYOTVRjUxExo5r8LXpkzedlJNE,11606
15
16
  lattifai/workers/__init__.py,sha256=s6YfkIq4FDIAzY9sPjRpXnJfszj2repqnMTqydRM5Zw,83
16
- lattifai/workers/lattice1_alpha.py,sha256=1VFo59EcygEctTHOhkcII8v3_mrj8JEJ8Fcaqk_7LVo,5762
17
- lattifai-0.2.2.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
18
- lattifai-0.2.2.dist-info/METADATA,sha256=4vmPOYKsIlvADiw0zUDQ2dbDpe-vOV-o5A0Hs1p7xfg,10971
19
- lattifai-0.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
20
- lattifai-0.2.2.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
21
- lattifai-0.2.2.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
22
- lattifai-0.2.2.dist-info/RECORD,,
17
+ lattifai/workers/lattice1_alpha.py,sha256=1lCq0-bgWMXvYslAbCTFgHC0p6UWPto1y0wkTw9WrmQ,10177
18
+ lattifai-0.2.5.dist-info/licenses/LICENSE,sha256=LNuoH5jpXXNKgjQ3XLwztFq8D3O7kZI-LSg81o4ym2M,1065
19
+ lattifai-0.2.5.dist-info/METADATA,sha256=-EEq4g932BbO1YlMKWc-rz4tpy-rr5cHb0GSGdKdfSI,10989
20
+ lattifai-0.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ lattifai-0.2.5.dist-info/entry_points.txt,sha256=CwTI2NbJvF9msIHboAfTA99cmDr_HOWoODjS8R64JOw,131
22
+ lattifai-0.2.5.dist-info/top_level.txt,sha256=-OVWZ68YYFcTN13ARkLasp2OUappe9wEVq-CKes7jM4,17
23
+ lattifai-0.2.5.dist-info/RECORD,,