lattifai 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +10 -0
- lattifai/alignment/lattice1_aligner.py +33 -13
- lattifai/alignment/lattice1_worker.py +121 -50
- lattifai/alignment/segmenter.py +3 -2
- lattifai/alignment/tokenizer.py +3 -3
- lattifai/audio2.py +269 -70
- lattifai/caption/caption.py +161 -3
- lattifai/cli/alignment.py +2 -1
- lattifai/cli/app_installer.py +35 -33
- lattifai/cli/caption.py +8 -18
- lattifai/cli/server.py +3 -1
- lattifai/cli/transcribe.py +53 -38
- lattifai/cli/youtube.py +1 -0
- lattifai/client.py +16 -11
- lattifai/config/alignment.py +23 -2
- lattifai/config/caption.py +1 -1
- lattifai/config/media.py +23 -3
- lattifai/errors.py +7 -3
- lattifai/mixin.py +26 -15
- lattifai/server/app.py +2 -1
- lattifai/utils.py +37 -0
- lattifai/workflow/file_manager.py +15 -13
- lattifai/workflow/youtube.py +16 -1
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/METADATA +65 -15
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/RECORD +29 -29
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/licenses/LICENSE +1 -1
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/WHEEL +0 -0
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/entry_points.txt +0 -0
- {lattifai-1.0.4.dist-info → lattifai-1.0.5.dist-info}/top_level.txt +0 -0
lattifai/cli/app_installer.py
CHANGED
|
@@ -5,6 +5,8 @@ import subprocess
|
|
|
5
5
|
import sys
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
|
+
from lattifai.utils import safe_print
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
def check_command_exists(cmd: str) -> bool:
|
|
10
12
|
"""Check if a command exists in PATH."""
|
|
@@ -19,17 +21,17 @@ def install_nodejs():
|
|
|
19
21
|
"""Install Node.js based on the operating system."""
|
|
20
22
|
system = platform.system().lower()
|
|
21
23
|
|
|
22
|
-
|
|
24
|
+
safe_print("📦 Node.js not found. Installing Node.js...\n")
|
|
23
25
|
|
|
24
26
|
try:
|
|
25
27
|
if system == "darwin": # macOS
|
|
26
28
|
# Check if Homebrew is installed
|
|
27
29
|
if check_command_exists("brew"):
|
|
28
|
-
|
|
30
|
+
safe_print("🍺 Using Homebrew to install Node.js...")
|
|
29
31
|
subprocess.run(["brew", "install", "node"], check=True)
|
|
30
|
-
|
|
32
|
+
safe_print("✓ Node.js installed via Homebrew\n")
|
|
31
33
|
else:
|
|
32
|
-
|
|
34
|
+
safe_print("❌ Homebrew not found.")
|
|
33
35
|
print(" Please install Homebrew first:")
|
|
34
36
|
print(
|
|
35
37
|
' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
|
|
@@ -40,46 +42,46 @@ def install_nodejs():
|
|
|
40
42
|
elif system == "linux":
|
|
41
43
|
# Try common package managers
|
|
42
44
|
if check_command_exists("apt"):
|
|
43
|
-
|
|
45
|
+
safe_print("🐧 Using apt to install Node.js...")
|
|
44
46
|
subprocess.run(["sudo", "apt", "update"], check=True)
|
|
45
47
|
subprocess.run(["sudo", "apt", "install", "-y", "nodejs", "npm"], check=True)
|
|
46
|
-
|
|
48
|
+
safe_print("✓ Node.js installed via apt\n")
|
|
47
49
|
elif check_command_exists("yum"):
|
|
48
|
-
|
|
50
|
+
safe_print("🐧 Using yum to install Node.js...")
|
|
49
51
|
subprocess.run(["sudo", "yum", "install", "-y", "nodejs", "npm"], check=True)
|
|
50
|
-
|
|
52
|
+
safe_print("✓ Node.js installed via yum\n")
|
|
51
53
|
elif check_command_exists("dnf"):
|
|
52
|
-
|
|
54
|
+
safe_print("🐧 Using dnf to install Node.js...")
|
|
53
55
|
subprocess.run(["sudo", "dnf", "install", "-y", "nodejs", "npm"], check=True)
|
|
54
|
-
|
|
56
|
+
safe_print("✓ Node.js installed via dnf\n")
|
|
55
57
|
elif check_command_exists("pacman"):
|
|
56
|
-
|
|
58
|
+
safe_print("🐧 Using pacman to install Node.js...")
|
|
57
59
|
subprocess.run(["sudo", "pacman", "-S", "--noconfirm", "nodejs", "npm"], check=True)
|
|
58
|
-
|
|
60
|
+
safe_print("✓ Node.js installed via pacman\n")
|
|
59
61
|
else:
|
|
60
|
-
|
|
62
|
+
safe_print("❌ No supported package manager found (apt/yum/dnf/pacman).")
|
|
61
63
|
print(" Please install Node.js manually from: https://nodejs.org/")
|
|
62
64
|
sys.exit(1)
|
|
63
65
|
|
|
64
66
|
elif system == "windows":
|
|
65
|
-
|
|
67
|
+
safe_print("❌ Automatic installation on Windows is not supported.")
|
|
66
68
|
print(" Please download and install Node.js from: https://nodejs.org/")
|
|
67
69
|
print(" Then run this command again.")
|
|
68
70
|
sys.exit(1)
|
|
69
71
|
|
|
70
72
|
else:
|
|
71
|
-
|
|
73
|
+
safe_print(f"❌ Unsupported operating system: {system}")
|
|
72
74
|
print(" Please install Node.js manually from: https://nodejs.org/")
|
|
73
75
|
sys.exit(1)
|
|
74
76
|
|
|
75
77
|
# Verify installation
|
|
76
78
|
if not check_command_exists("npm"):
|
|
77
|
-
|
|
79
|
+
safe_print("❌ Node.js installation verification failed.")
|
|
78
80
|
print(" Please restart your terminal and try again.")
|
|
79
81
|
sys.exit(1)
|
|
80
82
|
|
|
81
83
|
except subprocess.CalledProcessError as e:
|
|
82
|
-
|
|
84
|
+
safe_print(f"\n❌ Error during Node.js installation: {e}")
|
|
83
85
|
print(" Please install Node.js manually from: https://nodejs.org/")
|
|
84
86
|
sys.exit(1)
|
|
85
87
|
|
|
@@ -90,49 +92,49 @@ def main():
|
|
|
90
92
|
app_dir = Path(__file__).parent.parent.parent.parent / "app"
|
|
91
93
|
|
|
92
94
|
if not app_dir.exists():
|
|
93
|
-
|
|
95
|
+
safe_print(f"❌ Error: app directory not found at {app_dir}")
|
|
94
96
|
print(" Make sure you're in the lattifai-python repository.")
|
|
95
97
|
sys.exit(1)
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
safe_print("🚀 Installing lai-app (LattifAI Web Application)...\n")
|
|
98
100
|
|
|
99
101
|
# Check if npm is installed, if not, install Node.js
|
|
100
102
|
if not check_command_exists("npm"):
|
|
101
103
|
install_nodejs()
|
|
102
104
|
else:
|
|
103
105
|
npm_version = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=True).stdout.strip()
|
|
104
|
-
|
|
106
|
+
safe_print(f"✓ npm is already installed (v{npm_version})\n")
|
|
105
107
|
|
|
106
108
|
# Change to app directory and run installation
|
|
107
109
|
try:
|
|
108
|
-
|
|
110
|
+
safe_print(f"📁 Working directory: {app_dir}\n")
|
|
109
111
|
|
|
110
112
|
# Install dependencies
|
|
111
|
-
|
|
113
|
+
safe_print("📦 Installing dependencies...")
|
|
112
114
|
subprocess.run(["npm", "install"], cwd=app_dir, check=True)
|
|
113
|
-
|
|
115
|
+
safe_print("✓ Dependencies installed\n")
|
|
114
116
|
|
|
115
117
|
# Build the application
|
|
116
|
-
|
|
118
|
+
safe_print("🔨 Building application...")
|
|
117
119
|
subprocess.run(["npm", "run", "build"], cwd=app_dir, check=True)
|
|
118
|
-
|
|
120
|
+
safe_print("✓ Application built\n")
|
|
119
121
|
|
|
120
122
|
# Link globally
|
|
121
|
-
|
|
123
|
+
safe_print("🔗 Linking lai-app command globally...")
|
|
122
124
|
subprocess.run(["npm", "link"], cwd=app_dir, check=True)
|
|
123
|
-
|
|
125
|
+
safe_print("✓ lai-app command linked globally\n")
|
|
124
126
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
127
|
+
safe_print("=" * 60)
|
|
128
|
+
safe_print("✅ lai-app installed successfully!")
|
|
129
|
+
safe_print("=" * 60)
|
|
130
|
+
safe_print("\n🎉 You can now run:")
|
|
129
131
|
print(" lai-app # Start the web application")
|
|
130
132
|
print(" lai-app --help # Show help")
|
|
131
133
|
print(" lai-app --port 8080 # Use custom port")
|
|
132
|
-
|
|
134
|
+
safe_print("\n📖 For more information, see app/CLI_USAGE.md\n")
|
|
133
135
|
|
|
134
136
|
except subprocess.CalledProcessError as e:
|
|
135
|
-
|
|
137
|
+
safe_print(f"\n❌ Error during installation: {e}")
|
|
136
138
|
sys.exit(1)
|
|
137
139
|
|
|
138
140
|
|
lattifai/cli/caption.py
CHANGED
|
@@ -7,6 +7,7 @@ from lhotse.utils import Pathlike
|
|
|
7
7
|
from typing_extensions import Annotated
|
|
8
8
|
|
|
9
9
|
from lattifai.config import CaptionConfig
|
|
10
|
+
from lattifai.utils import safe_print
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
@run.cli.entrypoint(name="convert", namespace="caption")
|
|
@@ -55,7 +56,7 @@ def convert(
|
|
|
55
56
|
caption = Caption.read(input_path, normalize_text=normalize_text)
|
|
56
57
|
caption.write(output_path, include_speaker_in_text=include_speaker_in_text)
|
|
57
58
|
|
|
58
|
-
|
|
59
|
+
safe_print(f"✅ Converted {input_path} -> {output_path}")
|
|
59
60
|
return output_path
|
|
60
61
|
|
|
61
62
|
|
|
@@ -63,7 +64,6 @@ def convert(
|
|
|
63
64
|
def normalize(
|
|
64
65
|
input_path: Pathlike,
|
|
65
66
|
output_path: Pathlike,
|
|
66
|
-
caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
|
|
67
67
|
):
|
|
68
68
|
"""
|
|
69
69
|
Normalize caption text by cleaning HTML entities and whitespace.
|
|
@@ -81,9 +81,6 @@ def normalize(
|
|
|
81
81
|
Args:
|
|
82
82
|
input_path: Path to input caption file to normalize
|
|
83
83
|
output_path: Path to output caption file (defaults to overwriting input file)
|
|
84
|
-
caption: Caption configuration for text normalization.
|
|
85
|
-
Fields: input_format, output_format, normalize_text (automatically enabled),
|
|
86
|
-
encoding
|
|
87
84
|
|
|
88
85
|
Examples:
|
|
89
86
|
# Normalize and save to new file (positional arguments)
|
|
@@ -92,13 +89,9 @@ def normalize(
|
|
|
92
89
|
# Normalize with format conversion
|
|
93
90
|
lai caption normalize input.vtt output.srt
|
|
94
91
|
|
|
95
|
-
# Normalize with custom caption config
|
|
96
|
-
lai caption normalize input.srt output.srt \\
|
|
97
|
-
caption.encoding=utf-8
|
|
98
|
-
|
|
99
92
|
# Using keyword arguments (traditional syntax)
|
|
100
|
-
lai caption normalize
|
|
101
|
-
input_path=input.srt
|
|
93
|
+
lai caption normalize \
|
|
94
|
+
input_path=input.srt \
|
|
102
95
|
output_path=output.srt
|
|
103
96
|
"""
|
|
104
97
|
from pathlib import Path
|
|
@@ -112,9 +105,9 @@ def normalize(
|
|
|
112
105
|
caption_obj.write(output_path, include_speaker_in_text=True)
|
|
113
106
|
|
|
114
107
|
if output_path == input_path:
|
|
115
|
-
|
|
108
|
+
safe_print(f"✅ Normalized {input_path} (in-place)")
|
|
116
109
|
else:
|
|
117
|
-
|
|
110
|
+
safe_print(f"✅ Normalized {input_path} -> {output_path}")
|
|
118
111
|
|
|
119
112
|
return output_path
|
|
120
113
|
|
|
@@ -124,7 +117,6 @@ def shift(
|
|
|
124
117
|
input_path: Pathlike,
|
|
125
118
|
output_path: Pathlike,
|
|
126
119
|
seconds: float,
|
|
127
|
-
caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
|
|
128
120
|
):
|
|
129
121
|
"""
|
|
130
122
|
Shift caption timestamps by a specified number of seconds.
|
|
@@ -140,8 +132,6 @@ def shift(
|
|
|
140
132
|
output_path: Path to output caption file (can be same as input for in-place modification)
|
|
141
133
|
seconds: Number of seconds to shift timestamps. Positive values delay captions,
|
|
142
134
|
negative values advance them earlier.
|
|
143
|
-
caption: Caption configuration for reading/writing.
|
|
144
|
-
Fields: input_format, output_format, encoding
|
|
145
135
|
|
|
146
136
|
Examples:
|
|
147
137
|
# Delay captions by 2 seconds (positional arguments)
|
|
@@ -181,9 +171,9 @@ def shift(
|
|
|
181
171
|
direction = f"advanced by {abs(seconds)}s"
|
|
182
172
|
|
|
183
173
|
if output_path == input_path:
|
|
184
|
-
|
|
174
|
+
safe_print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
|
|
185
175
|
else:
|
|
186
|
-
|
|
176
|
+
safe_print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
|
|
187
177
|
|
|
188
178
|
return output_path
|
|
189
179
|
|
lattifai/cli/server.py
CHANGED
|
@@ -4,6 +4,8 @@ import os
|
|
|
4
4
|
import colorful
|
|
5
5
|
import uvicorn
|
|
6
6
|
|
|
7
|
+
from lattifai.utils import safe_print
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
def main():
|
|
9
11
|
"""Launch the LattifAI Web Interface."""
|
|
@@ -29,7 +31,7 @@ def main():
|
|
|
29
31
|
|
|
30
32
|
args = parser.parse_args()
|
|
31
33
|
|
|
32
|
-
|
|
34
|
+
safe_print(colorful.bold_green("🚀 Launching LattifAI Backend Server..."))
|
|
33
35
|
print(colorful.cyan(f"Server running at http://localhost:{args.port}"))
|
|
34
36
|
print(colorful.yellow(f"Host: {args.host}"))
|
|
35
37
|
print(colorful.yellow(f"Auto-reload: {'disabled' if args.no_reload else 'enabled'}"))
|
lattifai/cli/transcribe.py
CHANGED
|
@@ -3,10 +3,8 @@
|
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
5
|
import nemo_run as run
|
|
6
|
-
from lhotse.utils import Pathlike
|
|
7
6
|
from typing_extensions import Annotated
|
|
8
7
|
|
|
9
|
-
from lattifai.audio2 import AudioLoader, ChannelSelectorType
|
|
10
8
|
from lattifai.cli.alignment import align as alignment_align
|
|
11
9
|
from lattifai.config import (
|
|
12
10
|
AlignmentConfig,
|
|
@@ -23,9 +21,8 @@ from lattifai.utils import _resolve_model_path
|
|
|
23
21
|
def transcribe(
|
|
24
22
|
input: Optional[str] = None,
|
|
25
23
|
output_caption: Optional[str] = None,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
channel_selector: Optional[ChannelSelectorType] = "average",
|
|
24
|
+
media: Annotated[Optional[MediaConfig], run.Config[MediaConfig]] = None,
|
|
25
|
+
client: Annotated[Optional[ClientConfig], run.Config[ClientConfig]] = None,
|
|
29
26
|
transcription: Annotated[Optional[TranscriptionConfig], run.Config[TranscriptionConfig]] = None,
|
|
30
27
|
):
|
|
31
28
|
"""
|
|
@@ -39,11 +36,8 @@ def transcribe(
|
|
|
39
36
|
Args:
|
|
40
37
|
input: Path to input audio/video file or YouTube URL (can be provided as positional argument)
|
|
41
38
|
output_caption: Path for output caption file (can be provided as positional argument)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
channel_selector: Audio channel selection strategy (default: average)
|
|
45
|
-
Options: average, left, right, or an integer channel index.
|
|
46
|
-
Note: Ignored when input is a URL and Gemini transcriber is used.
|
|
39
|
+
media: Media configuration for input/output handling.
|
|
40
|
+
Fields: input_path, output_dir, media_format, channel_selector, streaming_chunk_secs
|
|
47
41
|
transcription: Transcription service configuration.
|
|
48
42
|
Fields: model_name, device, language, gemini_api_key
|
|
49
43
|
|
|
@@ -67,6 +61,11 @@ def transcribe(
|
|
|
67
61
|
lai transcribe run audio.wav output.srt \\
|
|
68
62
|
transcription.language=zh
|
|
69
63
|
|
|
64
|
+
# With MediaConfig settings
|
|
65
|
+
lai transcribe run audio.wav output.srt \\
|
|
66
|
+
media.channel_selector=left \\
|
|
67
|
+
media.streaming_chunk_secs=30.0
|
|
68
|
+
|
|
70
69
|
# Full configuration with keyword arguments
|
|
71
70
|
lai transcribe run \\
|
|
72
71
|
input=audio.wav \\
|
|
@@ -78,68 +77,84 @@ def transcribe(
|
|
|
78
77
|
from pathlib import Path
|
|
79
78
|
|
|
80
79
|
import colorful
|
|
80
|
+
from lattifai_core.client import SyncAPIClient
|
|
81
81
|
|
|
82
|
+
from lattifai.audio2 import AudioLoader
|
|
82
83
|
from lattifai.transcription import create_transcriber
|
|
84
|
+
from lattifai.utils import safe_print
|
|
83
85
|
|
|
84
|
-
# Initialize
|
|
86
|
+
# Initialize configs with defaults
|
|
87
|
+
client_config = client or ClientConfig()
|
|
85
88
|
transcription_config = transcription or TranscriptionConfig()
|
|
89
|
+
media_config = media or MediaConfig()
|
|
90
|
+
|
|
91
|
+
# Initialize client wrapper to properly set client_wrapper
|
|
92
|
+
client_wrapper = SyncAPIClient(config=client_config)
|
|
93
|
+
transcription_config.client_wrapper = client_wrapper
|
|
94
|
+
|
|
95
|
+
# Initialize client wrapper to properly set client_wrapper
|
|
96
|
+
client_wrapper = SyncAPIClient(config=client_config)
|
|
97
|
+
transcription_config.client_wrapper = client_wrapper
|
|
86
98
|
|
|
87
99
|
# Validate input is required
|
|
88
|
-
if not input:
|
|
89
|
-
raise ValueError("Input is required. Provide input as positional argument
|
|
100
|
+
if not input and not media_config.input_path:
|
|
101
|
+
raise ValueError("Input is required. Provide input as positional argument or media.input_path.")
|
|
102
|
+
|
|
103
|
+
# Assign input to media_config if provided
|
|
104
|
+
if input:
|
|
105
|
+
media_config.set_input_path(input)
|
|
90
106
|
|
|
91
107
|
# Detect if input is a URL
|
|
92
|
-
is_url =
|
|
108
|
+
is_url = media_config.is_input_remote()
|
|
93
109
|
|
|
94
110
|
# Prepare output paths
|
|
95
111
|
if is_url:
|
|
96
|
-
# For URLs, use output_dir
|
|
97
|
-
|
|
98
|
-
output_path = Path(str(output_dir)).expanduser()
|
|
99
|
-
output_path.mkdir(parents=True, exist_ok=True)
|
|
100
|
-
else:
|
|
101
|
-
output_path = Path.cwd()
|
|
112
|
+
# For URLs, use output_dir from media_config or current directory
|
|
113
|
+
output_path = media_config.output_dir
|
|
102
114
|
else:
|
|
103
115
|
# For files, use input path directory
|
|
104
|
-
|
|
105
|
-
output_path = input_path.parent
|
|
116
|
+
output_path = Path(media_config.input_path).parent
|
|
106
117
|
|
|
107
118
|
# Create transcriber
|
|
108
119
|
if not transcription_config.lattice_model_path:
|
|
109
|
-
transcription_config.lattice_model_path = _resolve_model_path("
|
|
120
|
+
transcription_config.lattice_model_path = _resolve_model_path("LattifAI/Lattice-1")
|
|
110
121
|
transcriber = create_transcriber(transcription_config=transcription_config)
|
|
111
122
|
|
|
112
|
-
|
|
113
|
-
|
|
123
|
+
safe_print(colorful.cyan(f"🎤 Starting transcription with {transcriber.name}..."))
|
|
124
|
+
safe_print(colorful.cyan(f" Input: {media_config.input_path}"))
|
|
114
125
|
|
|
115
126
|
# Perform transcription
|
|
116
127
|
if is_url and transcriber.supports_url:
|
|
117
128
|
# Check if transcriber supports URL directly
|
|
118
|
-
|
|
119
|
-
transcript = asyncio.run(transcriber.transcribe(
|
|
129
|
+
safe_print(colorful.cyan(" Transcribing from URL directly..."))
|
|
130
|
+
transcript = asyncio.run(transcriber.transcribe(media_config.input_path))
|
|
120
131
|
else:
|
|
121
132
|
if is_url:
|
|
122
133
|
# Download media first, then transcribe
|
|
123
|
-
|
|
134
|
+
safe_print(colorful.cyan(" Downloading media from URL..."))
|
|
124
135
|
from lattifai.workflow.youtube import YouTubeDownloader
|
|
125
136
|
|
|
126
137
|
downloader = YouTubeDownloader()
|
|
127
138
|
input_path = asyncio.run(
|
|
128
139
|
downloader.download_media(
|
|
129
|
-
url=
|
|
140
|
+
url=media_config.input_path,
|
|
130
141
|
output_dir=str(output_path),
|
|
131
|
-
media_format=
|
|
132
|
-
force_overwrite=
|
|
142
|
+
media_format=media_config.normalize_format(),
|
|
143
|
+
force_overwrite=media_config.force_overwrite,
|
|
133
144
|
)
|
|
134
145
|
)
|
|
135
|
-
|
|
146
|
+
safe_print(colorful.cyan(f" Media downloaded to: {input_path}"))
|
|
136
147
|
else:
|
|
137
|
-
input_path = Path(
|
|
148
|
+
input_path = Path(media_config.input_path)
|
|
138
149
|
|
|
139
|
-
|
|
150
|
+
safe_print(colorful.cyan(" Loading audio..."))
|
|
140
151
|
# For files, load audio first
|
|
141
152
|
audio_loader = AudioLoader(device=transcription_config.device)
|
|
142
|
-
media_audio = audio_loader(
|
|
153
|
+
media_audio = audio_loader(
|
|
154
|
+
input_path,
|
|
155
|
+
channel_selector=media_config.channel_selector,
|
|
156
|
+
streaming_chunk_secs=media_config.streaming_chunk_secs,
|
|
157
|
+
)
|
|
143
158
|
transcript = asyncio.run(transcriber.transcribe(media_audio))
|
|
144
159
|
|
|
145
160
|
# Determine output caption path
|
|
@@ -153,14 +168,14 @@ def transcribe(
|
|
|
153
168
|
final_output = output_path / f"youtube_LattifAI_{transcriber.name}.{output_format}"
|
|
154
169
|
else:
|
|
155
170
|
# For files, use input filename with suffix
|
|
156
|
-
final_output = Path(
|
|
171
|
+
final_output = Path(media_config.input_path).with_suffix(".LattifAI.srt")
|
|
157
172
|
|
|
158
|
-
|
|
173
|
+
safe_print(colorful.cyan(f" Output: {final_output}"))
|
|
159
174
|
|
|
160
175
|
# Write output
|
|
161
176
|
transcriber.write(transcript, final_output, encoding="utf-8", cache_audio_events=False)
|
|
162
177
|
|
|
163
|
-
|
|
178
|
+
safe_print(colorful.green(f"🎉 Transcription completed: {final_output}"))
|
|
164
179
|
|
|
165
180
|
return transcript
|
|
166
181
|
|
lattifai/cli/youtube.py
CHANGED
lattifai/client.py
CHANGED
|
@@ -18,6 +18,7 @@ from lattifai.errors import (
|
|
|
18
18
|
LatticeEncodingError,
|
|
19
19
|
)
|
|
20
20
|
from lattifai.mixin import LattifAIClientMixin
|
|
21
|
+
from lattifai.utils import safe_print
|
|
21
22
|
|
|
22
23
|
if TYPE_CHECKING:
|
|
23
24
|
from lattifai.diarization import LattifAIDiarizer # noqa: F401
|
|
@@ -91,6 +92,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
91
92
|
input_caption_format: Optional[InputCaptionFormat] = None,
|
|
92
93
|
split_sentence: Optional[bool] = None,
|
|
93
94
|
channel_selector: Optional[str | int] = "average",
|
|
95
|
+
streaming_chunk_secs: Optional[float] = None,
|
|
94
96
|
) -> Caption:
|
|
95
97
|
try:
|
|
96
98
|
# Step 1: Get caption
|
|
@@ -100,6 +102,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
100
102
|
media_audio = self.audio_loader(
|
|
101
103
|
input_media,
|
|
102
104
|
channel_selector=channel_selector,
|
|
105
|
+
streaming_chunk_secs=streaming_chunk_secs,
|
|
103
106
|
)
|
|
104
107
|
|
|
105
108
|
if not input_caption:
|
|
@@ -113,7 +116,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
113
116
|
alignment_strategy = self.aligner.config.strategy
|
|
114
117
|
|
|
115
118
|
if alignment_strategy != "entire" or caption.transcription:
|
|
116
|
-
|
|
119
|
+
safe_print(colorful.cyan(f"🔄 Using segmented alignment strategy: {alignment_strategy}"))
|
|
117
120
|
|
|
118
121
|
if caption.supervisions and alignment_strategy == "transcription":
|
|
119
122
|
# raise NotImplementedError("Transcription-based alignment is not yet implemented.")
|
|
@@ -126,7 +129,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
126
129
|
if not caption.transcription:
|
|
127
130
|
import asyncio
|
|
128
131
|
|
|
129
|
-
|
|
132
|
+
safe_print(colorful.cyan("📝 Transcribing media for alignment..."))
|
|
130
133
|
if output_caption_path:
|
|
131
134
|
transcript_file = (
|
|
132
135
|
Path(str(output_caption_path)).parent
|
|
@@ -223,11 +226,11 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
223
226
|
continue
|
|
224
227
|
|
|
225
228
|
offset = round(start, 4)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
)
|
|
229
|
+
# Extract audio slice
|
|
230
|
+
audio_slice_ndarray = media_audio.ndarray[
|
|
231
|
+
:, int(start * media_audio.sampling_rate) : int(end * media_audio.sampling_rate)
|
|
232
|
+
]
|
|
233
|
+
emission = self.aligner.emission(audio_slice_ndarray)
|
|
231
234
|
|
|
232
235
|
# Align segment
|
|
233
236
|
_supervisions, _alignments = self.aligner.alignment(
|
|
@@ -259,7 +262,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
259
262
|
|
|
260
263
|
# Step 5: Speaker diarization
|
|
261
264
|
if self.diarization_config.enabled and self.diarizer:
|
|
262
|
-
|
|
265
|
+
safe_print(colorful.cyan("🗣️ Performing speaker diarization..."))
|
|
263
266
|
caption = self.speaker_diarization(
|
|
264
267
|
input_media=media_audio,
|
|
265
268
|
caption=caption,
|
|
@@ -308,7 +311,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
308
311
|
if output_caption_path:
|
|
309
312
|
diarization_file = Path(str(output_caption_path)).with_suffix(".SpkDiar")
|
|
310
313
|
if diarization_file.exists():
|
|
311
|
-
|
|
314
|
+
safe_print(colorful.cyan(f"Reading existing speaker diarization from {diarization_file}"))
|
|
312
315
|
caption.read_speaker_diarization(diarization_file)
|
|
313
316
|
|
|
314
317
|
diarization, alignments = self.diarizer.diarize_with_alignments(
|
|
@@ -433,12 +436,13 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
433
436
|
split_sentence: Optional[bool] = None,
|
|
434
437
|
use_transcription: bool = False,
|
|
435
438
|
channel_selector: Optional[str | int] = "average",
|
|
439
|
+
streaming_chunk_secs: Optional[float] = None,
|
|
436
440
|
) -> Caption:
|
|
437
441
|
# Prepare output directory and media format
|
|
438
442
|
output_dir = self._prepare_youtube_output_dir(output_dir)
|
|
439
443
|
media_format = self._determine_media_format(media_format)
|
|
440
444
|
|
|
441
|
-
|
|
445
|
+
safe_print(colorful.cyan(f"🎬 Starting YouTube workflow for: {url}"))
|
|
442
446
|
|
|
443
447
|
# Step 1: Download media
|
|
444
448
|
media_file = self._download_media_sync(url, output_dir, media_format, force_overwrite)
|
|
@@ -460,7 +464,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
460
464
|
output_caption_path = self._generate_output_caption_path(output_caption_path, media_file, output_dir)
|
|
461
465
|
|
|
462
466
|
# Step 4: Perform alignment
|
|
463
|
-
|
|
467
|
+
safe_print(colorful.cyan("🔗 Performing forced alignment..."))
|
|
464
468
|
|
|
465
469
|
caption: Caption = self.alignment(
|
|
466
470
|
input_media=media_audio,
|
|
@@ -468,6 +472,7 @@ class LattifAI(LattifAIClientMixin, SyncAPIClient):
|
|
|
468
472
|
output_caption_path=output_caption_path,
|
|
469
473
|
split_sentence=split_sentence,
|
|
470
474
|
channel_selector=channel_selector,
|
|
475
|
+
streaming_chunk_secs=streaming_chunk_secs,
|
|
471
476
|
)
|
|
472
477
|
|
|
473
478
|
return caption
|
lattifai/config/alignment.py
CHANGED
|
@@ -18,8 +18,8 @@ class AlignmentConfig:
|
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
20
|
# Alignment configuration
|
|
21
|
-
model_name: str = "
|
|
22
|
-
"""Model identifier or path to local model directory (e.g., '
|
|
21
|
+
model_name: str = "LattifAI/Lattice-1"
|
|
22
|
+
"""Model identifier or path to local model directory (e.g., 'LattifAI/Lattice-1')."""
|
|
23
23
|
|
|
24
24
|
device: Literal["cpu", "cuda", "mps", "auto"] = "auto"
|
|
25
25
|
"""Computation device: 'cpu' for CPU, 'cuda' for NVIDIA GPU, 'mps' for Apple Silicon."""
|
|
@@ -58,6 +58,27 @@ class AlignmentConfig:
|
|
|
58
58
|
Default: 4.0 seconds. Useful for detecting scene changes or natural breaks in content.
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
|
+
# Beam search parameters for forced alignment
|
|
62
|
+
search_beam: int = 200
|
|
63
|
+
"""Search beam size for beam search decoding. Larger values explore more hypotheses but are slower.
|
|
64
|
+
Default: 200. Typical range: 20-500.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
output_beam: int = 80
|
|
68
|
+
"""Output beam size for keeping top hypotheses. Should be smaller than search_beam.
|
|
69
|
+
Default: 80. Typical range: 10-200.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
min_active_states: int = 400
|
|
73
|
+
"""Minimum number of active states during decoding. Controls memory and search space.
|
|
74
|
+
Default: 400. Typical range: 30-1000.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
max_active_states: int = 10000
|
|
78
|
+
"""Maximum number of active states during decoding. Prevents excessive memory usage.
|
|
79
|
+
Default: 10000. Typical range: 1000-20000.
|
|
80
|
+
"""
|
|
81
|
+
|
|
61
82
|
client_wrapper: Optional["SyncAPIClient"] = field(default=None, repr=False)
|
|
62
83
|
"""Reference to the SyncAPIClient instance. Auto-set during client initialization."""
|
|
63
84
|
|
lattifai/config/caption.py
CHANGED
|
@@ -48,7 +48,7 @@ class CaptionConfig:
|
|
|
48
48
|
include_speaker_in_text: bool = True
|
|
49
49
|
"""Preserve speaker labels in caption text content."""
|
|
50
50
|
|
|
51
|
-
normalize_text: bool =
|
|
51
|
+
normalize_text: bool = True
|
|
52
52
|
"""Clean HTML entities and normalize whitespace in caption text."""
|
|
53
53
|
|
|
54
54
|
split_sentence: bool = False
|
lattifai/config/media.py
CHANGED
|
@@ -52,12 +52,23 @@ class MediaConfig:
|
|
|
52
52
|
sample_rate: Optional[int] = None
|
|
53
53
|
"""Audio sample rate in Hz (e.g., 16000, 44100)."""
|
|
54
54
|
|
|
55
|
-
channels: Optional[int] = None
|
|
56
|
-
"""Number of audio channels (1=mono, 2=stereo)."""
|
|
57
|
-
|
|
58
55
|
channel_selector: Optional[str | int] = "average"
|
|
59
56
|
"""Audio channel selection strategy: 'average', 'left', 'right', or channel index."""
|
|
60
57
|
|
|
58
|
+
# Audio Streaming Configuration
|
|
59
|
+
streaming_chunk_secs: Optional[float] = 600.0
|
|
60
|
+
"""Duration in seconds of each audio chunk for streaming mode.
|
|
61
|
+
When set to a value (e.g., 600.0), enables streaming mode for processing very long audio files (>1 hour).
|
|
62
|
+
Audio is processed in chunks to keep memory usage low (<4GB peak), suitable for 20+ hour files.
|
|
63
|
+
When None, disables streaming and loads entire audio into memory.
|
|
64
|
+
Valid range: 1-1800 seconds (minimum 1 second, maximum 30 minutes).
|
|
65
|
+
Default: 600 seconds (10 minutes).
|
|
66
|
+
Recommended: Use 60 seconds or larger for optimal performance.
|
|
67
|
+
- Smaller chunks: Lower memory usage, more frequent I/O
|
|
68
|
+
- Larger chunks: Better alignment context, higher memory usage
|
|
69
|
+
Note: Streaming may add slight processing overhead but enables handling arbitrarily long files.
|
|
70
|
+
"""
|
|
71
|
+
|
|
61
72
|
# Output / download configuration
|
|
62
73
|
output_dir: Path = field(default_factory=lambda: Path.cwd())
|
|
63
74
|
"""Directory for output files (default: current working directory)."""
|
|
@@ -87,12 +98,21 @@ class MediaConfig:
|
|
|
87
98
|
self._normalize_media_format()
|
|
88
99
|
self._process_input_path()
|
|
89
100
|
self._process_output_path()
|
|
101
|
+
self._validate_streaming_config()
|
|
90
102
|
|
|
91
103
|
def _setup_output_directory(self) -> None:
|
|
92
104
|
"""Ensure output directory exists and is valid."""
|
|
93
105
|
resolved_output_dir = self._ensure_dir(self.output_dir)
|
|
94
106
|
self.output_dir = resolved_output_dir
|
|
95
107
|
|
|
108
|
+
def _validate_streaming_config(self) -> None:
|
|
109
|
+
"""Validate streaming configuration parameters."""
|
|
110
|
+
if self.streaming_chunk_secs is not None:
|
|
111
|
+
if not 1.0 <= self.streaming_chunk_secs <= 1800.0:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"streaming_chunk_secs must be between 1 and 1800 seconds (1 second to 30 minutes), got {self.streaming_chunk_secs}. Recommended: 60 seconds or larger."
|
|
114
|
+
)
|
|
115
|
+
|
|
96
116
|
def _validate_default_formats(self) -> None:
|
|
97
117
|
"""Validate default audio and video formats."""
|
|
98
118
|
self.default_audio_format = self._normalize_format(self.default_audio_format)
|