lattifai 1.0.2__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,8 @@ import subprocess
5
5
  import sys
6
6
  from pathlib import Path
7
7
 
8
+ from lattifai.utils import safe_print
9
+
8
10
 
9
11
  def check_command_exists(cmd: str) -> bool:
10
12
  """Check if a command exists in PATH."""
@@ -19,17 +21,17 @@ def install_nodejs():
19
21
  """Install Node.js based on the operating system."""
20
22
  system = platform.system().lower()
21
23
 
22
- print("📦 Node.js not found. Installing Node.js...\n")
24
+ safe_print("📦 Node.js not found. Installing Node.js...\n")
23
25
 
24
26
  try:
25
27
  if system == "darwin": # macOS
26
28
  # Check if Homebrew is installed
27
29
  if check_command_exists("brew"):
28
- print("🍺 Using Homebrew to install Node.js...")
30
+ safe_print("🍺 Using Homebrew to install Node.js...")
29
31
  subprocess.run(["brew", "install", "node"], check=True)
30
- print("✓ Node.js installed via Homebrew\n")
32
+ safe_print("✓ Node.js installed via Homebrew\n")
31
33
  else:
32
- print("❌ Homebrew not found.")
34
+ safe_print("❌ Homebrew not found.")
33
35
  print(" Please install Homebrew first:")
34
36
  print(
35
37
  ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
@@ -40,46 +42,46 @@ def install_nodejs():
40
42
  elif system == "linux":
41
43
  # Try common package managers
42
44
  if check_command_exists("apt"):
43
- print("🐧 Using apt to install Node.js...")
45
+ safe_print("🐧 Using apt to install Node.js...")
44
46
  subprocess.run(["sudo", "apt", "update"], check=True)
45
47
  subprocess.run(["sudo", "apt", "install", "-y", "nodejs", "npm"], check=True)
46
- print("✓ Node.js installed via apt\n")
48
+ safe_print("✓ Node.js installed via apt\n")
47
49
  elif check_command_exists("yum"):
48
- print("🐧 Using yum to install Node.js...")
50
+ safe_print("🐧 Using yum to install Node.js...")
49
51
  subprocess.run(["sudo", "yum", "install", "-y", "nodejs", "npm"], check=True)
50
- print("✓ Node.js installed via yum\n")
52
+ safe_print("✓ Node.js installed via yum\n")
51
53
  elif check_command_exists("dnf"):
52
- print("🐧 Using dnf to install Node.js...")
54
+ safe_print("🐧 Using dnf to install Node.js...")
53
55
  subprocess.run(["sudo", "dnf", "install", "-y", "nodejs", "npm"], check=True)
54
- print("✓ Node.js installed via dnf\n")
56
+ safe_print("✓ Node.js installed via dnf\n")
55
57
  elif check_command_exists("pacman"):
56
- print("🐧 Using pacman to install Node.js...")
58
+ safe_print("🐧 Using pacman to install Node.js...")
57
59
  subprocess.run(["sudo", "pacman", "-S", "--noconfirm", "nodejs", "npm"], check=True)
58
- print("✓ Node.js installed via pacman\n")
60
+ safe_print("✓ Node.js installed via pacman\n")
59
61
  else:
60
- print("❌ No supported package manager found (apt/yum/dnf/pacman).")
62
+ safe_print("❌ No supported package manager found (apt/yum/dnf/pacman).")
61
63
  print(" Please install Node.js manually from: https://nodejs.org/")
62
64
  sys.exit(1)
63
65
 
64
66
  elif system == "windows":
65
- print("❌ Automatic installation on Windows is not supported.")
67
+ safe_print("❌ Automatic installation on Windows is not supported.")
66
68
  print(" Please download and install Node.js from: https://nodejs.org/")
67
69
  print(" Then run this command again.")
68
70
  sys.exit(1)
69
71
 
70
72
  else:
71
- print(f"❌ Unsupported operating system: {system}")
73
+ safe_print(f"❌ Unsupported operating system: {system}")
72
74
  print(" Please install Node.js manually from: https://nodejs.org/")
73
75
  sys.exit(1)
74
76
 
75
77
  # Verify installation
76
78
  if not check_command_exists("npm"):
77
- print("❌ Node.js installation verification failed.")
79
+ safe_print("❌ Node.js installation verification failed.")
78
80
  print(" Please restart your terminal and try again.")
79
81
  sys.exit(1)
80
82
 
81
83
  except subprocess.CalledProcessError as e:
82
- print(f"\n❌ Error during Node.js installation: {e}")
84
+ safe_print(f"\n❌ Error during Node.js installation: {e}")
83
85
  print(" Please install Node.js manually from: https://nodejs.org/")
84
86
  sys.exit(1)
85
87
 
@@ -90,49 +92,49 @@ def main():
90
92
  app_dir = Path(__file__).parent.parent.parent.parent / "app"
91
93
 
92
94
  if not app_dir.exists():
93
- print(f"❌ Error: app directory not found at {app_dir}")
95
+ safe_print(f"❌ Error: app directory not found at {app_dir}")
94
96
  print(" Make sure you're in the lattifai-python repository.")
95
97
  sys.exit(1)
96
98
 
97
- print("🚀 Installing lai-app (LattifAI Web Application)...\n")
99
+ safe_print("🚀 Installing lai-app (LattifAI Web Application)...\n")
98
100
 
99
101
  # Check if npm is installed, if not, install Node.js
100
102
  if not check_command_exists("npm"):
101
103
  install_nodejs()
102
104
  else:
103
105
  npm_version = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=True).stdout.strip()
104
- print(f"✓ npm is already installed (v{npm_version})\n")
106
+ safe_print(f"✓ npm is already installed (v{npm_version})\n")
105
107
 
106
108
  # Change to app directory and run installation
107
109
  try:
108
- print(f"📁 Working directory: {app_dir}\n")
110
+ safe_print(f"📁 Working directory: {app_dir}\n")
109
111
 
110
112
  # Install dependencies
111
- print("📦 Installing dependencies...")
113
+ safe_print("📦 Installing dependencies...")
112
114
  subprocess.run(["npm", "install"], cwd=app_dir, check=True)
113
- print("✓ Dependencies installed\n")
115
+ safe_print("✓ Dependencies installed\n")
114
116
 
115
117
  # Build the application
116
- print("🔨 Building application...")
118
+ safe_print("🔨 Building application...")
117
119
  subprocess.run(["npm", "run", "build"], cwd=app_dir, check=True)
118
- print("✓ Application built\n")
120
+ safe_print("✓ Application built\n")
119
121
 
120
122
  # Link globally
121
- print("🔗 Linking lai-app command globally...")
123
+ safe_print("🔗 Linking lai-app command globally...")
122
124
  subprocess.run(["npm", "link"], cwd=app_dir, check=True)
123
- print("✓ lai-app command linked globally\n")
125
+ safe_print("✓ lai-app command linked globally\n")
124
126
 
125
- print("=" * 60)
126
- print("✅ lai-app installed successfully!")
127
- print("=" * 60)
128
- print("\n🎉 You can now run:")
127
+ safe_print("=" * 60)
128
+ safe_print("✅ lai-app installed successfully!")
129
+ safe_print("=" * 60)
130
+ safe_print("\n🎉 You can now run:")
129
131
  print(" lai-app # Start the web application")
130
132
  print(" lai-app --help # Show help")
131
133
  print(" lai-app --port 8080 # Use custom port")
132
- print("\n📖 For more information, see app/CLI_USAGE.md\n")
134
+ safe_print("\n📖 For more information, see app/CLI_USAGE.md\n")
133
135
 
134
136
  except subprocess.CalledProcessError as e:
135
- print(f"\n❌ Error during installation: {e}")
137
+ safe_print(f"\n❌ Error during installation: {e}")
136
138
  sys.exit(1)
137
139
 
138
140
 
lattifai/cli/caption.py CHANGED
@@ -7,6 +7,7 @@ from lhotse.utils import Pathlike
7
7
  from typing_extensions import Annotated
8
8
 
9
9
  from lattifai.config import CaptionConfig
10
+ from lattifai.utils import safe_print
10
11
 
11
12
 
12
13
  @run.cli.entrypoint(name="convert", namespace="caption")
@@ -55,7 +56,7 @@ def convert(
55
56
  caption = Caption.read(input_path, normalize_text=normalize_text)
56
57
  caption.write(output_path, include_speaker_in_text=include_speaker_in_text)
57
58
 
58
- print(f"✅ Converted {input_path} -> {output_path}")
59
+ safe_print(f"✅ Converted {input_path} -> {output_path}")
59
60
  return output_path
60
61
 
61
62
 
@@ -63,7 +64,6 @@ def convert(
63
64
  def normalize(
64
65
  input_path: Pathlike,
65
66
  output_path: Pathlike,
66
- caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
67
67
  ):
68
68
  """
69
69
  Normalize caption text by cleaning HTML entities and whitespace.
@@ -81,9 +81,6 @@ def normalize(
81
81
  Args:
82
82
  input_path: Path to input caption file to normalize
83
83
  output_path: Path to output caption file (defaults to overwriting input file)
84
- caption: Caption configuration for text normalization.
85
- Fields: input_format, output_format, normalize_text (automatically enabled),
86
- encoding
87
84
 
88
85
  Examples:
89
86
  # Normalize and save to new file (positional arguments)
@@ -92,13 +89,9 @@ def normalize(
92
89
  # Normalize with format conversion
93
90
  lai caption normalize input.vtt output.srt
94
91
 
95
- # Normalize with custom caption config
96
- lai caption normalize input.srt output.srt \\
97
- caption.encoding=utf-8
98
-
99
92
  # Using keyword arguments (traditional syntax)
100
- lai caption normalize \\
101
- input_path=input.srt \\
93
+ lai caption normalize \
94
+ input_path=input.srt \
102
95
  output_path=output.srt
103
96
  """
104
97
  from pathlib import Path
@@ -112,9 +105,9 @@ def normalize(
112
105
  caption_obj.write(output_path, include_speaker_in_text=True)
113
106
 
114
107
  if output_path == input_path:
115
- print(f"✅ Normalized {input_path} (in-place)")
108
+ safe_print(f"✅ Normalized {input_path} (in-place)")
116
109
  else:
117
- print(f"✅ Normalized {input_path} -> {output_path}")
110
+ safe_print(f"✅ Normalized {input_path} -> {output_path}")
118
111
 
119
112
  return output_path
120
113
 
@@ -124,7 +117,6 @@ def shift(
124
117
  input_path: Pathlike,
125
118
  output_path: Pathlike,
126
119
  seconds: float,
127
- caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
128
120
  ):
129
121
  """
130
122
  Shift caption timestamps by a specified number of seconds.
@@ -140,8 +132,6 @@ def shift(
140
132
  output_path: Path to output caption file (can be same as input for in-place modification)
141
133
  seconds: Number of seconds to shift timestamps. Positive values delay captions,
142
134
  negative values advance them earlier.
143
- caption: Caption configuration for reading/writing.
144
- Fields: input_format, output_format, encoding
145
135
 
146
136
  Examples:
147
137
  # Delay captions by 2 seconds (positional arguments)
@@ -181,9 +171,9 @@ def shift(
181
171
  direction = f"advanced by {abs(seconds)}s"
182
172
 
183
173
  if output_path == input_path:
184
- print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
174
+ safe_print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
185
175
  else:
186
- print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
176
+ safe_print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
187
177
 
188
178
  return output_path
189
179
 
lattifai/cli/server.py CHANGED
@@ -4,6 +4,8 @@ import os
4
4
  import colorful
5
5
  import uvicorn
6
6
 
7
+ from lattifai.utils import safe_print
8
+
7
9
 
8
10
  def main():
9
11
  """Launch the LattifAI Web Interface."""
@@ -29,7 +31,7 @@ def main():
29
31
 
30
32
  args = parser.parse_args()
31
33
 
32
- print(colorful.bold_green("🚀 Launching LattifAI Backend Server..."))
34
+ safe_print(colorful.bold_green("🚀 Launching LattifAI Backend Server..."))
33
35
  print(colorful.cyan(f"Server running at http://localhost:{args.port}"))
34
36
  print(colorful.yellow(f"Host: {args.host}"))
35
37
  print(colorful.yellow(f"Auto-reload: {'disabled' if args.no_reload else 'enabled'}"))
@@ -3,10 +3,8 @@
3
3
  from typing import Optional
4
4
 
5
5
  import nemo_run as run
6
- from lhotse.utils import Pathlike
7
6
  from typing_extensions import Annotated
8
7
 
9
- from lattifai.audio2 import AudioLoader, ChannelSelectorType
10
8
  from lattifai.cli.alignment import align as alignment_align
11
9
  from lattifai.config import (
12
10
  AlignmentConfig,
@@ -23,9 +21,8 @@ from lattifai.utils import _resolve_model_path
23
21
  def transcribe(
24
22
  input: Optional[str] = None,
25
23
  output_caption: Optional[str] = None,
26
- output_dir: Optional[Pathlike] = None,
27
- media_format: str = "mp3",
28
- channel_selector: Optional[ChannelSelectorType] = "average",
24
+ media: Annotated[Optional[MediaConfig], run.Config[MediaConfig]] = None,
25
+ client: Annotated[Optional[ClientConfig], run.Config[ClientConfig]] = None,
29
26
  transcription: Annotated[Optional[TranscriptionConfig], run.Config[TranscriptionConfig]] = None,
30
27
  ):
31
28
  """
@@ -39,11 +36,8 @@ def transcribe(
39
36
  Args:
40
37
  input: Path to input audio/video file or YouTube URL (can be provided as positional argument)
41
38
  output_caption: Path for output caption file (can be provided as positional argument)
42
- output_dir: Directory for output files when using YouTube URL
43
- media_format: Media format for YouTube downloads (default: mp3)
44
- channel_selector: Audio channel selection strategy (default: average)
45
- Options: average, left, right, or an integer channel index.
46
- Note: Ignored when input is a URL and Gemini transcriber is used.
39
+ media: Media configuration for input/output handling.
40
+ Fields: input_path, output_dir, media_format, channel_selector, streaming_chunk_secs
47
41
  transcription: Transcription service configuration.
48
42
  Fields: model_name, device, language, gemini_api_key
49
43
 
@@ -67,6 +61,11 @@ def transcribe(
67
61
  lai transcribe run audio.wav output.srt \\
68
62
  transcription.language=zh
69
63
 
64
+ # With MediaConfig settings
65
+ lai transcribe run audio.wav output.srt \\
66
+ media.channel_selector=left \\
67
+ media.streaming_chunk_secs=30.0
68
+
70
69
  # Full configuration with keyword arguments
71
70
  lai transcribe run \\
72
71
  input=audio.wav \\
@@ -78,68 +77,84 @@ def transcribe(
78
77
  from pathlib import Path
79
78
 
80
79
  import colorful
80
+ from lattifai_core.client import SyncAPIClient
81
81
 
82
+ from lattifai.audio2 import AudioLoader
82
83
  from lattifai.transcription import create_transcriber
84
+ from lattifai.utils import safe_print
83
85
 
84
- # Initialize transcription config with defaults
86
+ # Initialize configs with defaults
87
+ client_config = client or ClientConfig()
85
88
  transcription_config = transcription or TranscriptionConfig()
89
+ media_config = media or MediaConfig()
90
+
91
+ # Initialize client wrapper to properly set client_wrapper
92
+ client_wrapper = SyncAPIClient(config=client_config)
93
+ transcription_config.client_wrapper = client_wrapper
94
+
95
+ # Initialize client wrapper to properly set client_wrapper
96
+ client_wrapper = SyncAPIClient(config=client_config)
97
+ transcription_config.client_wrapper = client_wrapper
86
98
 
87
99
  # Validate input is required
88
- if not input:
89
- raise ValueError("Input is required. Provide input as positional argument (file path or URL).")
100
+ if not input and not media_config.input_path:
101
+ raise ValueError("Input is required. Provide input as positional argument or media.input_path.")
102
+
103
+ # Assign input to media_config if provided
104
+ if input:
105
+ media_config.set_input_path(input)
90
106
 
91
107
  # Detect if input is a URL
92
- is_url = input.startswith(("http://", "https://"))
108
+ is_url = media_config.is_input_remote()
93
109
 
94
110
  # Prepare output paths
95
111
  if is_url:
96
- # For URLs, use output_dir
97
- if output_dir:
98
- output_path = Path(str(output_dir)).expanduser()
99
- output_path.mkdir(parents=True, exist_ok=True)
100
- else:
101
- output_path = Path.cwd()
112
+ # For URLs, use output_dir from media_config or current directory
113
+ output_path = media_config.output_dir
102
114
  else:
103
115
  # For files, use input path directory
104
- input_path = Path(str(input))
105
- output_path = input_path.parent
116
+ output_path = Path(media_config.input_path).parent
106
117
 
107
118
  # Create transcriber
108
119
  if not transcription_config.lattice_model_path:
109
- transcription_config.lattice_model_path = _resolve_model_path("Lattifai/Lattice-1")
120
+ transcription_config.lattice_model_path = _resolve_model_path("LattifAI/Lattice-1")
110
121
  transcriber = create_transcriber(transcription_config=transcription_config)
111
122
 
112
- print(colorful.cyan(f"🎤 Starting transcription with {transcriber.name}..."))
113
- print(colorful.cyan(f" Input: {input}"))
123
+ safe_print(colorful.cyan(f"🎤 Starting transcription with {transcriber.name}..."))
124
+ safe_print(colorful.cyan(f" Input: {media_config.input_path}"))
114
125
 
115
126
  # Perform transcription
116
127
  if is_url and transcriber.supports_url:
117
128
  # Check if transcriber supports URL directly
118
- print(colorful.cyan(" Transcribing from URL directly..."))
119
- transcript = asyncio.run(transcriber.transcribe(input))
129
+ safe_print(colorful.cyan(" Transcribing from URL directly..."))
130
+ transcript = asyncio.run(transcriber.transcribe(media_config.input_path))
120
131
  else:
121
132
  if is_url:
122
133
  # Download media first, then transcribe
123
- print(colorful.cyan(" Downloading media from URL..."))
134
+ safe_print(colorful.cyan(" Downloading media from URL..."))
124
135
  from lattifai.workflow.youtube import YouTubeDownloader
125
136
 
126
137
  downloader = YouTubeDownloader()
127
138
  input_path = asyncio.run(
128
139
  downloader.download_media(
129
- url=input,
140
+ url=media_config.input_path,
130
141
  output_dir=str(output_path),
131
- media_format=media_format,
132
- force_overwrite=False,
142
+ media_format=media_config.normalize_format(),
143
+ force_overwrite=media_config.force_overwrite,
133
144
  )
134
145
  )
135
- print(colorful.cyan(f" Media downloaded to: {input_path}"))
146
+ safe_print(colorful.cyan(f" Media downloaded to: {input_path}"))
136
147
  else:
137
- input_path = Path(str(input))
148
+ input_path = Path(media_config.input_path)
138
149
 
139
- print(colorful.cyan(" Loading audio..."))
150
+ safe_print(colorful.cyan(" Loading audio..."))
140
151
  # For files, load audio first
141
152
  audio_loader = AudioLoader(device=transcription_config.device)
142
- media_audio = audio_loader(input_path, channel_selector=channel_selector)
153
+ media_audio = audio_loader(
154
+ input_path,
155
+ channel_selector=media_config.channel_selector,
156
+ streaming_chunk_secs=media_config.streaming_chunk_secs,
157
+ )
143
158
  transcript = asyncio.run(transcriber.transcribe(media_audio))
144
159
 
145
160
  # Determine output caption path
@@ -153,14 +168,14 @@ def transcribe(
153
168
  final_output = output_path / f"youtube_LattifAI_{transcriber.name}.{output_format}"
154
169
  else:
155
170
  # For files, use input filename with suffix
156
- final_output = Path(str(input)).with_suffix(".LattifAI.srt")
171
+ final_output = Path(media_config.input_path).with_suffix(".LattifAI.srt")
157
172
 
158
- print(colorful.cyan(f" Output: {final_output}"))
173
+ safe_print(colorful.cyan(f" Output: {final_output}"))
159
174
 
160
175
  # Write output
161
176
  transcriber.write(transcript, final_output, encoding="utf-8", cache_audio_events=False)
162
177
 
163
- print(colorful.green(f"🎉 Transcription completed: {final_output}"))
178
+ safe_print(colorful.green(f"🎉 Transcription completed: {final_output}"))
164
179
 
165
180
  return transcript
166
181
 
lattifai/cli/youtube.py CHANGED
@@ -117,6 +117,7 @@ def youtube(
117
117
  force_overwrite=media_config.force_overwrite,
118
118
  split_sentence=caption_config.split_sentence,
119
119
  channel_selector=media_config.channel_selector,
120
+ streaming_chunk_secs=media_config.streaming_chunk_secs,
120
121
  )
121
122
 
122
123
 
lattifai/client.py CHANGED
@@ -18,6 +18,7 @@ from lattifai.errors import (
18
18
  LatticeEncodingError,
19
19
  )
20
20
  from lattifai.mixin import LattifAIClientMixin
21
+ from lattifai.utils import safe_print
21
22
 
22
23
  if TYPE_CHECKING:
23
24
  from lattifai.diarization import LattifAIDiarizer # noqa: F401
@@ -91,6 +92,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
91
92
  input_caption_format: Optional[InputCaptionFormat] = None,
92
93
  split_sentence: Optional[bool] = None,
93
94
  channel_selector: Optional[str | int] = "average",
95
+ streaming_chunk_secs: Optional[float] = None,
94
96
  ) -> Caption:
95
97
  try:
96
98
  # Step 1: Get caption
@@ -100,6 +102,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
100
102
  media_audio = self.audio_loader(
101
103
  input_media,
102
104
  channel_selector=channel_selector,
105
+ streaming_chunk_secs=streaming_chunk_secs,
103
106
  )
104
107
 
105
108
  if not input_caption:
@@ -113,7 +116,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
113
116
  alignment_strategy = self.aligner.config.strategy
114
117
 
115
118
  if alignment_strategy != "entire" or caption.transcription:
116
- print(colorful.cyan(f"🔄 Using segmented alignment strategy: {alignment_strategy}"))
119
+ safe_print(colorful.cyan(f"🔄 Using segmented alignment strategy: {alignment_strategy}"))
117
120
 
118
121
  if caption.supervisions and alignment_strategy == "transcription":
119
122
  # raise NotImplementedError("Transcription-based alignment is not yet implemented.")
@@ -126,7 +129,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
126
129
  if not caption.transcription:
127
130
  import asyncio
128
131
 
129
- print(colorful.cyan("📝 Transcribing media for alignment..."))
132
+ safe_print(colorful.cyan("📝 Transcribing media for alignment..."))
130
133
  if output_caption_path:
131
134
  transcript_file = (
132
135
  Path(str(output_caption_path)).parent
@@ -223,11 +226,11 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
223
226
  continue
224
227
 
225
228
  offset = round(start, 4)
226
- emission = self.aligner.emission(
227
- media_audio.tensor[
228
- :, int(start * media_audio.sampling_rate) : int(end * media_audio.sampling_rate)
229
- ]
230
- )
229
+ # Extract audio slice
230
+ audio_slice_ndarray = media_audio.ndarray[
231
+ :, int(start * media_audio.sampling_rate) : int(end * media_audio.sampling_rate)
232
+ ]
233
+ emission = self.aligner.emission(audio_slice_ndarray)
231
234
 
232
235
  # Align segment
233
236
  _supervisions, _alignments = self.aligner.alignment(
@@ -259,7 +262,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
259
262
 
260
263
  # Step 5: Speaker diarization
261
264
  if self.diarization_config.enabled and self.diarizer:
262
- print(colorful.cyan("🗣️ Performing speaker diarization..."))
265
+ safe_print(colorful.cyan("🗣️ Performing speaker diarization..."))
263
266
  caption = self.speaker_diarization(
264
267
  input_media=media_audio,
265
268
  caption=caption,
@@ -308,7 +311,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
308
311
  if output_caption_path:
309
312
  diarization_file = Path(str(output_caption_path)).with_suffix(".SpkDiar")
310
313
  if diarization_file.exists():
311
- print(colorful.cyan(f"Reading existing speaker diarization from {diarization_file}"))
314
+ safe_print(colorful.cyan(f"Reading existing speaker diarization from {diarization_file}"))
312
315
  caption.read_speaker_diarization(diarization_file)
313
316
 
314
317
  diarization, alignments = self.diarizer.diarize_with_alignments(
@@ -433,12 +436,13 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
433
436
  split_sentence: Optional[bool] = None,
434
437
  use_transcription: bool = False,
435
438
  channel_selector: Optional[str | int] = "average",
439
+ streaming_chunk_secs: Optional[float] = None,
436
440
  ) -> Caption:
437
441
  # Prepare output directory and media format
438
442
  output_dir = self._prepare_youtube_output_dir(output_dir)
439
443
  media_format = self._determine_media_format(media_format)
440
444
 
441
- print(colorful.cyan(f"🎬 Starting YouTube workflow for: {url}"))
445
+ safe_print(colorful.cyan(f"🎬 Starting YouTube workflow for: {url}"))
442
446
 
443
447
  # Step 1: Download media
444
448
  media_file = self._download_media_sync(url, output_dir, media_format, force_overwrite)
@@ -460,7 +464,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
460
464
  output_caption_path = self._generate_output_caption_path(output_caption_path, media_file, output_dir)
461
465
 
462
466
  # Step 4: Perform alignment
463
- print(colorful.cyan("🔗 Performing forced alignment..."))
467
+ safe_print(colorful.cyan("🔗 Performing forced alignment..."))
464
468
 
465
469
  caption: Caption = self.alignment(
466
470
  input_media=media_audio,
@@ -468,6 +472,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
468
472
  output_caption_path=output_caption_path,
469
473
  split_sentence=split_sentence,
470
474
  channel_selector=channel_selector,
475
+ streaming_chunk_secs=streaming_chunk_secs,
471
476
  )
472
477
 
473
478
  return caption
@@ -18,8 +18,8 @@ class AlignmentConfig:
18
18
  """
19
19
 
20
20
  # Alignment configuration
21
- model_name: str = "Lattifai/Lattice-1"
22
- """Model identifier or path to local model directory (e.g., 'Lattifai/Lattice-1')."""
21
+ model_name: str = "LattifAI/Lattice-1"
22
+ """Model identifier or path to local model directory (e.g., 'LattifAI/Lattice-1')."""
23
23
 
24
24
  device: Literal["cpu", "cuda", "mps", "auto"] = "auto"
25
25
  """Computation device: 'cpu' for CPU, 'cuda' for NVIDIA GPU, 'mps' for Apple Silicon."""
@@ -58,6 +58,27 @@ class AlignmentConfig:
58
58
  Default: 4.0 seconds. Useful for detecting scene changes or natural breaks in content.
59
59
  """
60
60
 
61
+ # Beam search parameters for forced alignment
62
+ search_beam: int = 200
63
+ """Search beam size for beam search decoding. Larger values explore more hypotheses but are slower.
64
+ Default: 200. Typical range: 20-500.
65
+ """
66
+
67
+ output_beam: int = 80
68
+ """Output beam size for keeping top hypotheses. Should be smaller than search_beam.
69
+ Default: 80. Typical range: 10-200.
70
+ """
71
+
72
+ min_active_states: int = 400
73
+ """Minimum number of active states during decoding. Controls memory and search space.
74
+ Default: 400. Typical range: 30-1000.
75
+ """
76
+
77
+ max_active_states: int = 10000
78
+ """Maximum number of active states during decoding. Prevents excessive memory usage.
79
+ Default: 10000. Typical range: 1000-20000.
80
+ """
81
+
61
82
  client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
62
83
  """Reference to the SyncAPIClient instance. Auto-set during client initialization."""
63
84
 
@@ -48,7 +48,7 @@ class CaptionConfig:
48
48
  include_speaker_in_text: bool = True
49
49
  """Preserve speaker labels in caption text content."""
50
50
 
51
- normalize_text: bool = False
51
+ normalize_text: bool = True
52
52
  """Clean HTML entities and normalize whitespace in caption text."""
53
53
 
54
54
  split_sentence: bool = False
lattifai/config/media.py CHANGED
@@ -52,12 +52,23 @@ class MediaConfig:
52
52
  sample_rate: Optional[int] = None
53
53
  """Audio sample rate in Hz (e.g., 16000, 44100)."""
54
54
 
55
- channels: Optional[int] = None
56
- """Number of audio channels (1=mono, 2=stereo)."""
57
-
58
55
  channel_selector: Optional[str | int] = "average"
59
56
  """Audio channel selection strategy: 'average', 'left', 'right', or channel index."""
60
57
 
58
+ # Audio Streaming Configuration
59
+ streaming_chunk_secs: Optional[float] = 600.0
60
+ """Duration in seconds of each audio chunk for streaming mode.
61
+ When set to a value (e.g., 600.0), enables streaming mode for processing very long audio files (>1 hour).
62
+ Audio is processed in chunks to keep memory usage low (<4GB peak), suitable for 20+ hour files.
63
+ When None, disables streaming and loads entire audio into memory.
64
+ Valid range: 1-1800 seconds (minimum 1 second, maximum 30 minutes).
65
+ Default: 600 seconds (10 minutes).
66
+ Recommended: Use 60 seconds or larger for optimal performance.
67
+ - Smaller chunks: Lower memory usage, more frequent I/O
68
+ - Larger chunks: Better alignment context, higher memory usage
69
+ Note: Streaming may add slight processing overhead but enables handling arbitrarily long files.
70
+ """
71
+
61
72
  # Output / download configuration
62
73
  output_dir: Path = field(default_factory=lambda: Path.cwd())
63
74
  """Directory for output files (default: current working directory)."""
@@ -87,12 +98,21 @@ class MediaConfig:
87
98
  self._normalize_media_format()
88
99
  self._process_input_path()
89
100
  self._process_output_path()
101
+ self._validate_streaming_config()
90
102
 
91
103
  def _setup_output_directory(self) -> None:
92
104
  """Ensure output directory exists and is valid."""
93
105
  resolved_output_dir = self._ensure_dir(self.output_dir)
94
106
  self.output_dir = resolved_output_dir
95
107
 
108
+ def _validate_streaming_config(self) -> None:
109
+ """Validate streaming configuration parameters."""
110
+ if self.streaming_chunk_secs is not None:
111
+ if not 1.0 <= self.streaming_chunk_secs <= 1800.0:
112
+ raise ValueError(
113
+ f"streaming_chunk_secs must be between 1 and 1800 seconds (1 second to 30 minutes), got {self.streaming_chunk_secs}. Recommended: 60 seconds or larger."
114
+ )
115
+
96
116
  def _validate_default_formats(self) -> None:
97
117
  """Validate default audio and video formats."""
98
118
  self.default_audio_format = self._normalize_format(self.default_audio_format)