lattifai 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/_init.py +20 -0
- lattifai/alignment/__init__.py +9 -1
- lattifai/alignment/lattice1_aligner.py +175 -54
- lattifai/alignment/lattice1_worker.py +47 -4
- lattifai/alignment/punctuation.py +38 -0
- lattifai/alignment/segmenter.py +3 -2
- lattifai/alignment/text_align.py +441 -0
- lattifai/alignment/tokenizer.py +134 -65
- lattifai/audio2.py +162 -183
- lattifai/cli/__init__.py +2 -1
- lattifai/cli/alignment.py +5 -0
- lattifai/cli/caption.py +111 -4
- lattifai/cli/transcribe.py +2 -6
- lattifai/cli/youtube.py +7 -1
- lattifai/client.py +72 -123
- lattifai/config/__init__.py +28 -0
- lattifai/config/alignment.py +14 -0
- lattifai/config/caption.py +45 -31
- lattifai/config/client.py +16 -0
- lattifai/config/event.py +102 -0
- lattifai/config/media.py +20 -0
- lattifai/config/transcription.py +25 -1
- lattifai/data/__init__.py +8 -0
- lattifai/data/caption.py +228 -0
- lattifai/diarization/__init__.py +41 -1
- lattifai/errors.py +78 -53
- lattifai/event/__init__.py +65 -0
- lattifai/event/lattifai.py +166 -0
- lattifai/mixin.py +49 -32
- lattifai/transcription/base.py +8 -2
- lattifai/transcription/gemini.py +147 -16
- lattifai/transcription/lattifai.py +25 -63
- lattifai/types.py +1 -1
- lattifai/utils.py +7 -13
- lattifai/workflow/__init__.py +28 -4
- lattifai/workflow/file_manager.py +2 -5
- lattifai/youtube/__init__.py +43 -0
- lattifai/youtube/client.py +1265 -0
- lattifai/youtube/types.py +23 -0
- lattifai-1.3.0.dist-info/METADATA +678 -0
- lattifai-1.3.0.dist-info/RECORD +57 -0
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +1 -2
- lattifai/__init__.py +0 -88
- lattifai/alignment/sentence_splitter.py +0 -219
- lattifai/caption/__init__.py +0 -20
- lattifai/caption/caption.py +0 -1467
- lattifai/caption/gemini_reader.py +0 -462
- lattifai/caption/gemini_writer.py +0 -173
- lattifai/caption/supervision.py +0 -34
- lattifai/caption/text_parser.py +0 -145
- lattifai/cli/app_installer.py +0 -142
- lattifai/cli/server.py +0 -44
- lattifai/server/app.py +0 -427
- lattifai/workflow/youtube.py +0 -577
- lattifai-1.2.1.dist-info/METADATA +0 -1134
- lattifai-1.2.1.dist-info/RECORD +0 -58
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0
lattifai/caption/text_parser.py
DELETED
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import re
|
|
3
|
-
from typing import Optional, Tuple
|
|
4
|
-
|
|
5
|
-
# Timestamp pattern: [start-end] text
|
|
6
|
-
# Example: [1.23-4.56] Hello world
|
|
7
|
-
TIMESTAMP_PATTERN = re.compile(r"^\[([\d.]+)-([\d.]+)\]\s*(.*)$")
|
|
8
|
-
|
|
9
|
-
# 来自于字幕中常见的说话人标记格式
|
|
10
|
-
SPEAKER_PATTERN = re.compile(r"((?:>>|>>|>|>).*?[::])\s*(.*)")
|
|
11
|
-
|
|
12
|
-
# Transcriber Output Example:
|
|
13
|
-
# 26:19.919 --> 26:34.921
|
|
14
|
-
# [SPEAKER_01]: 越来越多的科技巨头入...
|
|
15
|
-
SPEAKER_LATTIFAI = re.compile(r"(^\[SPEAKER_.*?\][::])\s*(.*)")
|
|
16
|
-
|
|
17
|
-
# NISHTHA BHATIA: Hey, everyone.
|
|
18
|
-
# DIETER: Oh, hey, Nishtha.
|
|
19
|
-
# GEMINI: That might
|
|
20
|
-
SPEAKER_PATTERN2 = re.compile(r"^([A-Z]{1,15}(?:\s+[A-Z]{1,15})?[::])\s*(.*)$")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def normalize_text(text: str) -> str:
|
|
24
|
-
"""Normalize caption text by:
|
|
25
|
-
- Decoding common HTML entities
|
|
26
|
-
- Removing HTML tags (e.g., <i>, <font>, <b>, <br>)
|
|
27
|
-
- Collapsing multiple whitespace into a single space
|
|
28
|
-
- Converting curly apostrophes to straight ones in common contractions
|
|
29
|
-
"""
|
|
30
|
-
if not text:
|
|
31
|
-
return ""
|
|
32
|
-
|
|
33
|
-
# # Remove HTML tags first (replace with space to avoid concatenation)
|
|
34
|
-
# text = re.sub(r"<[^>]+>", " ", text)
|
|
35
|
-
|
|
36
|
-
html_entities = {
|
|
37
|
-
"&": "&",
|
|
38
|
-
"<": "<",
|
|
39
|
-
">": ">",
|
|
40
|
-
""": '"',
|
|
41
|
-
"'": "'",
|
|
42
|
-
" ": " ",
|
|
43
|
-
"\\N": " ",
|
|
44
|
-
"…": " ", # replace ellipsis with space to avoid merging words
|
|
45
|
-
}
|
|
46
|
-
for entity, char in html_entities.items():
|
|
47
|
-
text = text.replace(entity, char)
|
|
48
|
-
|
|
49
|
-
# Convert curly apostrophes to straight apostrophes for common English contractions
|
|
50
|
-
text = re.sub(r"([a-zA-Z])’([tsdm]|ll|re|ve)\b", r"\1'\2", text, flags=re.IGNORECASE)
|
|
51
|
-
text = re.sub(r"([0-9])’([s])\b", r"\1'\2", text, flags=re.IGNORECASE)
|
|
52
|
-
|
|
53
|
-
# Collapse whitespace (after replacements)
|
|
54
|
-
text = re.sub(r"\s+", " ", text)
|
|
55
|
-
|
|
56
|
-
return text.strip()
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def parse_speaker_text(line) -> Tuple[Optional[str], str]:
|
|
60
|
-
"""Parse a line of text to extract speaker and content."""
|
|
61
|
-
|
|
62
|
-
if ":" not in line and ":" not in line:
|
|
63
|
-
return None, line
|
|
64
|
-
|
|
65
|
-
# 匹配以 >> 开头的行,并去除开头的名字和冒号
|
|
66
|
-
match = SPEAKER_PATTERN.match(line)
|
|
67
|
-
if match:
|
|
68
|
-
return match.group(1).strip(), match.group(2).strip()
|
|
69
|
-
|
|
70
|
-
match = SPEAKER_LATTIFAI.match(line)
|
|
71
|
-
if match:
|
|
72
|
-
assert len(match.groups()) == 2, match.groups()
|
|
73
|
-
if not match.group(1):
|
|
74
|
-
logging.error(f"ParseSub LINE [{line}]")
|
|
75
|
-
else:
|
|
76
|
-
return match.group(1).strip(), match.group(2).strip()
|
|
77
|
-
|
|
78
|
-
match = SPEAKER_PATTERN2.match(line)
|
|
79
|
-
if match:
|
|
80
|
-
assert len(match.groups()) == 2, match.groups()
|
|
81
|
-
return match.group(1).strip(), match.group(2).strip()
|
|
82
|
-
|
|
83
|
-
return None, line
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def parse_timestamp_text(line: str) -> Tuple[Optional[float], Optional[float], str]:
|
|
87
|
-
"""
|
|
88
|
-
Parse a line of text to extract timestamp and content.
|
|
89
|
-
|
|
90
|
-
Format: [start-end] text
|
|
91
|
-
Example: [1.23-4.56] Hello world
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
line: Input line to parse
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
Tuple of (start_time, end_time, text)
|
|
98
|
-
- start_time: Start timestamp in seconds, or None if not found
|
|
99
|
-
- end_time: End timestamp in seconds, or None if not found
|
|
100
|
-
- text: The text content after the timestamp
|
|
101
|
-
"""
|
|
102
|
-
match = TIMESTAMP_PATTERN.match(line)
|
|
103
|
-
if match:
|
|
104
|
-
try:
|
|
105
|
-
start = float(match.group(1))
|
|
106
|
-
end = float(match.group(2))
|
|
107
|
-
text = match.group(3).strip()
|
|
108
|
-
return start, end, text
|
|
109
|
-
except ValueError:
|
|
110
|
-
# If conversion fails, treat as plain text
|
|
111
|
-
return None, None, line
|
|
112
|
-
|
|
113
|
-
return None, None, line
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
if __name__ == "__main__":
|
|
117
|
-
pattern = re.compile(r">>\s*(.*?)\s*[::]\s*(.*)")
|
|
118
|
-
pattern = re.compile(r"(>>.*?[::])\s*(.*)")
|
|
119
|
-
|
|
120
|
-
test_strings = [
|
|
121
|
-
">>Key: Value",
|
|
122
|
-
">> Key with space : Value with space ",
|
|
123
|
-
">> 全角键 : 全角值",
|
|
124
|
-
">>Key:Value xxx. >>Key:Value",
|
|
125
|
-
]
|
|
126
|
-
|
|
127
|
-
for text in test_strings:
|
|
128
|
-
match = pattern.match(text)
|
|
129
|
-
if match:
|
|
130
|
-
print(f"Input: '{text}'")
|
|
131
|
-
print(f"Speaker: '{match.group(1)}'")
|
|
132
|
-
print(f"Content: '{match.group(2)}'")
|
|
133
|
-
print("-------------")
|
|
134
|
-
|
|
135
|
-
# pattern2
|
|
136
|
-
test_strings2 = ["NISHTHA BHATIA: Hey, everyone.", "DIETER: Oh, hey, Nishtha.", "GEMINI: That might"]
|
|
137
|
-
for text in test_strings2:
|
|
138
|
-
match = SPEAKER_PATTERN2.match(text)
|
|
139
|
-
if match:
|
|
140
|
-
print(f" Input: '{text}'")
|
|
141
|
-
print(f"Speaker: '{match.group(1)}'")
|
|
142
|
-
print(f"Content: '{match.group(2)}'")
|
|
143
|
-
print("-------------")
|
|
144
|
-
else:
|
|
145
|
-
raise ValueError(f"No match for: '{text}'")
|
lattifai/cli/app_installer.py
DELETED
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
"""CLI tool to install lai-app (frontend web application)."""
|
|
2
|
-
|
|
3
|
-
import platform
|
|
4
|
-
import subprocess
|
|
5
|
-
import sys
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
from lattifai.utils import safe_print
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def check_command_exists(cmd: str) -> bool:
|
|
12
|
-
"""Check if a command exists in PATH."""
|
|
13
|
-
try:
|
|
14
|
-
subprocess.run([cmd, "--version"], check=True, capture_output=True, text=True)
|
|
15
|
-
return True
|
|
16
|
-
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
17
|
-
return False
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def install_nodejs():
|
|
21
|
-
"""Install Node.js based on the operating system."""
|
|
22
|
-
system = platform.system().lower()
|
|
23
|
-
|
|
24
|
-
safe_print("📦 Node.js not found. Installing Node.js...\n")
|
|
25
|
-
|
|
26
|
-
try:
|
|
27
|
-
if system == "darwin": # macOS
|
|
28
|
-
# Check if Homebrew is installed
|
|
29
|
-
if check_command_exists("brew"):
|
|
30
|
-
safe_print("🍺 Using Homebrew to install Node.js...")
|
|
31
|
-
subprocess.run(["brew", "install", "node"], check=True)
|
|
32
|
-
safe_print("✓ Node.js installed via Homebrew\n")
|
|
33
|
-
else:
|
|
34
|
-
safe_print("❌ Homebrew not found.")
|
|
35
|
-
print(" Please install Homebrew first:")
|
|
36
|
-
print(
|
|
37
|
-
' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
|
|
38
|
-
)
|
|
39
|
-
print("\n Or install Node.js manually from: https://nodejs.org/")
|
|
40
|
-
sys.exit(1)
|
|
41
|
-
|
|
42
|
-
elif system == "linux":
|
|
43
|
-
# Try common package managers
|
|
44
|
-
if check_command_exists("apt"):
|
|
45
|
-
safe_print("🐧 Using apt to install Node.js...")
|
|
46
|
-
subprocess.run(["sudo", "apt", "update"], check=True)
|
|
47
|
-
subprocess.run(["sudo", "apt", "install", "-y", "nodejs", "npm"], check=True)
|
|
48
|
-
safe_print("✓ Node.js installed via apt\n")
|
|
49
|
-
elif check_command_exists("yum"):
|
|
50
|
-
safe_print("🐧 Using yum to install Node.js...")
|
|
51
|
-
subprocess.run(["sudo", "yum", "install", "-y", "nodejs", "npm"], check=True)
|
|
52
|
-
safe_print("✓ Node.js installed via yum\n")
|
|
53
|
-
elif check_command_exists("dnf"):
|
|
54
|
-
safe_print("🐧 Using dnf to install Node.js...")
|
|
55
|
-
subprocess.run(["sudo", "dnf", "install", "-y", "nodejs", "npm"], check=True)
|
|
56
|
-
safe_print("✓ Node.js installed via dnf\n")
|
|
57
|
-
elif check_command_exists("pacman"):
|
|
58
|
-
safe_print("🐧 Using pacman to install Node.js...")
|
|
59
|
-
subprocess.run(["sudo", "pacman", "-S", "--noconfirm", "nodejs", "npm"], check=True)
|
|
60
|
-
safe_print("✓ Node.js installed via pacman\n")
|
|
61
|
-
else:
|
|
62
|
-
safe_print("❌ No supported package manager found (apt/yum/dnf/pacman).")
|
|
63
|
-
print(" Please install Node.js manually from: https://nodejs.org/")
|
|
64
|
-
sys.exit(1)
|
|
65
|
-
|
|
66
|
-
elif system == "windows":
|
|
67
|
-
safe_print("❌ Automatic installation on Windows is not supported.")
|
|
68
|
-
print(" Please download and install Node.js from: https://nodejs.org/")
|
|
69
|
-
print(" Then run this command again.")
|
|
70
|
-
sys.exit(1)
|
|
71
|
-
|
|
72
|
-
else:
|
|
73
|
-
safe_print(f"❌ Unsupported operating system: {system}")
|
|
74
|
-
print(" Please install Node.js manually from: https://nodejs.org/")
|
|
75
|
-
sys.exit(1)
|
|
76
|
-
|
|
77
|
-
# Verify installation
|
|
78
|
-
if not check_command_exists("npm"):
|
|
79
|
-
safe_print("❌ Node.js installation verification failed.")
|
|
80
|
-
print(" Please restart your terminal and try again.")
|
|
81
|
-
sys.exit(1)
|
|
82
|
-
|
|
83
|
-
except subprocess.CalledProcessError as e:
|
|
84
|
-
safe_print(f"\n❌ Error during Node.js installation: {e}")
|
|
85
|
-
print(" Please install Node.js manually from: https://nodejs.org/")
|
|
86
|
-
sys.exit(1)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def main():
|
|
90
|
-
"""Install lai-app Node.js application."""
|
|
91
|
-
# Get the app directory relative to this package
|
|
92
|
-
app_dir = Path(__file__).parent.parent.parent.parent / "app"
|
|
93
|
-
|
|
94
|
-
if not app_dir.exists():
|
|
95
|
-
safe_print(f"❌ Error: app directory not found at {app_dir}")
|
|
96
|
-
print(" Make sure you're in the lattifai-python repository.")
|
|
97
|
-
sys.exit(1)
|
|
98
|
-
|
|
99
|
-
safe_print("🚀 Installing lai-app (LattifAI Web Application)...\n")
|
|
100
|
-
|
|
101
|
-
# Check if npm is installed, if not, install Node.js
|
|
102
|
-
if not check_command_exists("npm"):
|
|
103
|
-
install_nodejs()
|
|
104
|
-
else:
|
|
105
|
-
npm_version = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=True).stdout.strip()
|
|
106
|
-
safe_print(f"✓ npm is already installed (v{npm_version})\n")
|
|
107
|
-
|
|
108
|
-
# Change to app directory and run installation
|
|
109
|
-
try:
|
|
110
|
-
safe_print(f"📁 Working directory: {app_dir}\n")
|
|
111
|
-
|
|
112
|
-
# Install dependencies
|
|
113
|
-
safe_print("📦 Installing dependencies...")
|
|
114
|
-
subprocess.run(["npm", "install"], cwd=app_dir, check=True)
|
|
115
|
-
safe_print("✓ Dependencies installed\n")
|
|
116
|
-
|
|
117
|
-
# Build the application
|
|
118
|
-
safe_print("🔨 Building application...")
|
|
119
|
-
subprocess.run(["npm", "run", "build"], cwd=app_dir, check=True)
|
|
120
|
-
safe_print("✓ Application built\n")
|
|
121
|
-
|
|
122
|
-
# Link globally
|
|
123
|
-
safe_print("🔗 Linking lai-app command globally...")
|
|
124
|
-
subprocess.run(["npm", "link"], cwd=app_dir, check=True)
|
|
125
|
-
safe_print("✓ lai-app command linked globally\n")
|
|
126
|
-
|
|
127
|
-
safe_print("=" * 60)
|
|
128
|
-
safe_print("✅ lai-app installed successfully!")
|
|
129
|
-
safe_print("=" * 60)
|
|
130
|
-
safe_print("\n🎉 You can now run:")
|
|
131
|
-
print(" lai-app # Start the web application")
|
|
132
|
-
print(" lai-app --help # Show help")
|
|
133
|
-
print(" lai-app --port 8080 # Use custom port")
|
|
134
|
-
safe_print("\n📖 For more information, see app/CLI_USAGE.md\n")
|
|
135
|
-
|
|
136
|
-
except subprocess.CalledProcessError as e:
|
|
137
|
-
safe_print(f"\n❌ Error during installation: {e}")
|
|
138
|
-
sys.exit(1)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if __name__ == "__main__":
|
|
142
|
-
main()
|
lattifai/cli/server.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import argparse
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
import colorful
|
|
5
|
-
import uvicorn
|
|
6
|
-
|
|
7
|
-
from lattifai.utils import safe_print
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def main():
|
|
11
|
-
"""Launch the LattifAI Web Interface."""
|
|
12
|
-
parser = argparse.ArgumentParser(description="LattifAI Backend Server")
|
|
13
|
-
parser.add_argument(
|
|
14
|
-
"-p",
|
|
15
|
-
"--port",
|
|
16
|
-
type=int,
|
|
17
|
-
default=8001,
|
|
18
|
-
help="Port to run the server on (default: 8001)",
|
|
19
|
-
)
|
|
20
|
-
parser.add_argument(
|
|
21
|
-
"--host",
|
|
22
|
-
type=str,
|
|
23
|
-
default="0.0.0.0",
|
|
24
|
-
help="Host to bind the server to (default: 0.0.0.0)",
|
|
25
|
-
)
|
|
26
|
-
parser.add_argument(
|
|
27
|
-
"--no-reload",
|
|
28
|
-
action="store_true",
|
|
29
|
-
help="Disable auto-reload on code changes",
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
args = parser.parse_args()
|
|
33
|
-
|
|
34
|
-
safe_print(colorful.bold_green("🚀 Launching LattifAI Backend Server..."))
|
|
35
|
-
print(colorful.cyan(f"Server running at http://localhost:{args.port}"))
|
|
36
|
-
print(colorful.yellow(f"Host: {args.host}"))
|
|
37
|
-
print(colorful.yellow(f"Auto-reload: {'disabled' if args.no_reload else 'enabled'}"))
|
|
38
|
-
print()
|
|
39
|
-
|
|
40
|
-
uvicorn.run("lattifai.server.app:app", host=args.host, port=args.port, reload=not args.no_reload, log_level="info")
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if __name__ == "__main__":
|
|
44
|
-
main()
|