lattifai 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. lattifai/_init.py +20 -0
  2. lattifai/alignment/__init__.py +9 -1
  3. lattifai/alignment/lattice1_aligner.py +175 -54
  4. lattifai/alignment/lattice1_worker.py +47 -4
  5. lattifai/alignment/punctuation.py +38 -0
  6. lattifai/alignment/segmenter.py +3 -2
  7. lattifai/alignment/text_align.py +441 -0
  8. lattifai/alignment/tokenizer.py +134 -65
  9. lattifai/audio2.py +162 -183
  10. lattifai/cli/__init__.py +2 -1
  11. lattifai/cli/alignment.py +5 -0
  12. lattifai/cli/caption.py +111 -4
  13. lattifai/cli/transcribe.py +2 -6
  14. lattifai/cli/youtube.py +7 -1
  15. lattifai/client.py +72 -123
  16. lattifai/config/__init__.py +28 -0
  17. lattifai/config/alignment.py +14 -0
  18. lattifai/config/caption.py +45 -31
  19. lattifai/config/client.py +16 -0
  20. lattifai/config/event.py +102 -0
  21. lattifai/config/media.py +20 -0
  22. lattifai/config/transcription.py +25 -1
  23. lattifai/data/__init__.py +8 -0
  24. lattifai/data/caption.py +228 -0
  25. lattifai/diarization/__init__.py +41 -1
  26. lattifai/errors.py +78 -53
  27. lattifai/event/__init__.py +65 -0
  28. lattifai/event/lattifai.py +166 -0
  29. lattifai/mixin.py +49 -32
  30. lattifai/transcription/base.py +8 -2
  31. lattifai/transcription/gemini.py +147 -16
  32. lattifai/transcription/lattifai.py +25 -63
  33. lattifai/types.py +1 -1
  34. lattifai/utils.py +7 -13
  35. lattifai/workflow/__init__.py +28 -4
  36. lattifai/workflow/file_manager.py +2 -5
  37. lattifai/youtube/__init__.py +43 -0
  38. lattifai/youtube/client.py +1265 -0
  39. lattifai/youtube/types.py +23 -0
  40. lattifai-1.3.0.dist-info/METADATA +678 -0
  41. lattifai-1.3.0.dist-info/RECORD +57 -0
  42. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/entry_points.txt +1 -2
  43. lattifai/__init__.py +0 -88
  44. lattifai/alignment/sentence_splitter.py +0 -219
  45. lattifai/caption/__init__.py +0 -20
  46. lattifai/caption/caption.py +0 -1467
  47. lattifai/caption/gemini_reader.py +0 -462
  48. lattifai/caption/gemini_writer.py +0 -173
  49. lattifai/caption/supervision.py +0 -34
  50. lattifai/caption/text_parser.py +0 -145
  51. lattifai/cli/app_installer.py +0 -142
  52. lattifai/cli/server.py +0 -44
  53. lattifai/server/app.py +0 -427
  54. lattifai/workflow/youtube.py +0 -577
  55. lattifai-1.2.1.dist-info/METADATA +0 -1134
  56. lattifai-1.2.1.dist-info/RECORD +0 -58
  57. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/WHEEL +0 -0
  58. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/licenses/LICENSE +0 -0
  59. {lattifai-1.2.1.dist-info → lattifai-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,145 +0,0 @@
1
- import logging
2
- import re
3
- from typing import Optional, Tuple
4
-
5
- # Timestamp pattern: [start-end] text
6
- # Example: [1.23-4.56] Hello world
7
- TIMESTAMP_PATTERN = re.compile(r"^\[([\d.]+)-([\d.]+)\]\s*(.*)$")
8
-
9
- # 来自于字幕中常见的说话人标记格式
10
- SPEAKER_PATTERN = re.compile(r"((?:>>|>>|>|>).*?[::])\s*(.*)")
11
-
12
- # Transcriber Output Example:
13
- # 26:19.919 --> 26:34.921
14
- # [SPEAKER_01]: 越来越多的科技巨头入...
15
- SPEAKER_LATTIFAI = re.compile(r"(^\[SPEAKER_.*?\][::])\s*(.*)")
16
-
17
- # NISHTHA BHATIA: Hey, everyone.
18
- # DIETER: Oh, hey, Nishtha.
19
- # GEMINI: That might
20
- SPEAKER_PATTERN2 = re.compile(r"^([A-Z]{1,15}(?:\s+[A-Z]{1,15})?[::])\s*(.*)$")
21
-
22
-
23
- def normalize_text(text: str) -> str:
24
- """Normalize caption text by:
25
- - Decoding common HTML entities
26
- - Removing HTML tags (e.g., <i>, <font>, <b>, <br>)
27
- - Collapsing multiple whitespace into a single space
28
- - Converting curly apostrophes to straight ones in common contractions
29
- """
30
- if not text:
31
- return ""
32
-
33
- # # Remove HTML tags first (replace with space to avoid concatenation)
34
- # text = re.sub(r"<[^>]+>", " ", text)
35
-
36
- html_entities = {
37
- "&amp;": "&",
38
- "&lt;": "<",
39
- "&gt;": ">",
40
- "&quot;": '"',
41
- "&#39;": "'",
42
- "&nbsp;": " ",
43
- "\\N": " ",
44
- "…": " ", # replace ellipsis with space to avoid merging words
45
- }
46
- for entity, char in html_entities.items():
47
- text = text.replace(entity, char)
48
-
49
- # Convert curly apostrophes to straight apostrophes for common English contractions
50
- text = re.sub(r"([a-zA-Z])’([tsdm]|ll|re|ve)\b", r"\1'\2", text, flags=re.IGNORECASE)
51
- text = re.sub(r"([0-9])’([s])\b", r"\1'\2", text, flags=re.IGNORECASE)
52
-
53
- # Collapse whitespace (after replacements)
54
- text = re.sub(r"\s+", " ", text)
55
-
56
- return text.strip()
57
-
58
-
59
- def parse_speaker_text(line) -> Tuple[Optional[str], str]:
60
- """Parse a line of text to extract speaker and content."""
61
-
62
- if ":" not in line and ":" not in line:
63
- return None, line
64
-
65
- # 匹配以 >> 开头的行,并去除开头的名字和冒号
66
- match = SPEAKER_PATTERN.match(line)
67
- if match:
68
- return match.group(1).strip(), match.group(2).strip()
69
-
70
- match = SPEAKER_LATTIFAI.match(line)
71
- if match:
72
- assert len(match.groups()) == 2, match.groups()
73
- if not match.group(1):
74
- logging.error(f"ParseSub LINE [{line}]")
75
- else:
76
- return match.group(1).strip(), match.group(2).strip()
77
-
78
- match = SPEAKER_PATTERN2.match(line)
79
- if match:
80
- assert len(match.groups()) == 2, match.groups()
81
- return match.group(1).strip(), match.group(2).strip()
82
-
83
- return None, line
84
-
85
-
86
- def parse_timestamp_text(line: str) -> Tuple[Optional[float], Optional[float], str]:
87
- """
88
- Parse a line of text to extract timestamp and content.
89
-
90
- Format: [start-end] text
91
- Example: [1.23-4.56] Hello world
92
-
93
- Args:
94
- line: Input line to parse
95
-
96
- Returns:
97
- Tuple of (start_time, end_time, text)
98
- - start_time: Start timestamp in seconds, or None if not found
99
- - end_time: End timestamp in seconds, or None if not found
100
- - text: The text content after the timestamp
101
- """
102
- match = TIMESTAMP_PATTERN.match(line)
103
- if match:
104
- try:
105
- start = float(match.group(1))
106
- end = float(match.group(2))
107
- text = match.group(3).strip()
108
- return start, end, text
109
- except ValueError:
110
- # If conversion fails, treat as plain text
111
- return None, None, line
112
-
113
- return None, None, line
114
-
115
-
116
- if __name__ == "__main__":
117
- pattern = re.compile(r">>\s*(.*?)\s*[::]\s*(.*)")
118
- pattern = re.compile(r"(>>.*?[::])\s*(.*)")
119
-
120
- test_strings = [
121
- ">>Key: Value",
122
- ">> Key with space : Value with space ",
123
- ">> 全角键 : 全角值",
124
- ">>Key:Value xxx. >>Key:Value",
125
- ]
126
-
127
- for text in test_strings:
128
- match = pattern.match(text)
129
- if match:
130
- print(f"Input: '{text}'")
131
- print(f"Speaker: '{match.group(1)}'")
132
- print(f"Content: '{match.group(2)}'")
133
- print("-------------")
134
-
135
- # pattern2
136
- test_strings2 = ["NISHTHA BHATIA: Hey, everyone.", "DIETER: Oh, hey, Nishtha.", "GEMINI: That might"]
137
- for text in test_strings2:
138
- match = SPEAKER_PATTERN2.match(text)
139
- if match:
140
- print(f" Input: '{text}'")
141
- print(f"Speaker: '{match.group(1)}'")
142
- print(f"Content: '{match.group(2)}'")
143
- print("-------------")
144
- else:
145
- raise ValueError(f"No match for: '{text}'")
@@ -1,142 +0,0 @@
1
- """CLI tool to install lai-app (frontend web application)."""
2
-
3
- import platform
4
- import subprocess
5
- import sys
6
- from pathlib import Path
7
-
8
- from lattifai.utils import safe_print
9
-
10
-
11
- def check_command_exists(cmd: str) -> bool:
12
- """Check if a command exists in PATH."""
13
- try:
14
- subprocess.run([cmd, "--version"], check=True, capture_output=True, text=True)
15
- return True
16
- except (subprocess.CalledProcessError, FileNotFoundError):
17
- return False
18
-
19
-
20
- def install_nodejs():
21
- """Install Node.js based on the operating system."""
22
- system = platform.system().lower()
23
-
24
- safe_print("📦 Node.js not found. Installing Node.js...\n")
25
-
26
- try:
27
- if system == "darwin": # macOS
28
- # Check if Homebrew is installed
29
- if check_command_exists("brew"):
30
- safe_print("🍺 Using Homebrew to install Node.js...")
31
- subprocess.run(["brew", "install", "node"], check=True)
32
- safe_print("✓ Node.js installed via Homebrew\n")
33
- else:
34
- safe_print("❌ Homebrew not found.")
35
- print(" Please install Homebrew first:")
36
- print(
37
- ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
38
- )
39
- print("\n Or install Node.js manually from: https://nodejs.org/")
40
- sys.exit(1)
41
-
42
- elif system == "linux":
43
- # Try common package managers
44
- if check_command_exists("apt"):
45
- safe_print("🐧 Using apt to install Node.js...")
46
- subprocess.run(["sudo", "apt", "update"], check=True)
47
- subprocess.run(["sudo", "apt", "install", "-y", "nodejs", "npm"], check=True)
48
- safe_print("✓ Node.js installed via apt\n")
49
- elif check_command_exists("yum"):
50
- safe_print("🐧 Using yum to install Node.js...")
51
- subprocess.run(["sudo", "yum", "install", "-y", "nodejs", "npm"], check=True)
52
- safe_print("✓ Node.js installed via yum\n")
53
- elif check_command_exists("dnf"):
54
- safe_print("🐧 Using dnf to install Node.js...")
55
- subprocess.run(["sudo", "dnf", "install", "-y", "nodejs", "npm"], check=True)
56
- safe_print("✓ Node.js installed via dnf\n")
57
- elif check_command_exists("pacman"):
58
- safe_print("🐧 Using pacman to install Node.js...")
59
- subprocess.run(["sudo", "pacman", "-S", "--noconfirm", "nodejs", "npm"], check=True)
60
- safe_print("✓ Node.js installed via pacman\n")
61
- else:
62
- safe_print("❌ No supported package manager found (apt/yum/dnf/pacman).")
63
- print(" Please install Node.js manually from: https://nodejs.org/")
64
- sys.exit(1)
65
-
66
- elif system == "windows":
67
- safe_print("❌ Automatic installation on Windows is not supported.")
68
- print(" Please download and install Node.js from: https://nodejs.org/")
69
- print(" Then run this command again.")
70
- sys.exit(1)
71
-
72
- else:
73
- safe_print(f"❌ Unsupported operating system: {system}")
74
- print(" Please install Node.js manually from: https://nodejs.org/")
75
- sys.exit(1)
76
-
77
- # Verify installation
78
- if not check_command_exists("npm"):
79
- safe_print("❌ Node.js installation verification failed.")
80
- print(" Please restart your terminal and try again.")
81
- sys.exit(1)
82
-
83
- except subprocess.CalledProcessError as e:
84
- safe_print(f"\n❌ Error during Node.js installation: {e}")
85
- print(" Please install Node.js manually from: https://nodejs.org/")
86
- sys.exit(1)
87
-
88
-
89
- def main():
90
- """Install lai-app Node.js application."""
91
- # Get the app directory relative to this package
92
- app_dir = Path(__file__).parent.parent.parent.parent / "app"
93
-
94
- if not app_dir.exists():
95
- safe_print(f"❌ Error: app directory not found at {app_dir}")
96
- print(" Make sure you're in the lattifai-python repository.")
97
- sys.exit(1)
98
-
99
- safe_print("🚀 Installing lai-app (LattifAI Web Application)...\n")
100
-
101
- # Check if npm is installed, if not, install Node.js
102
- if not check_command_exists("npm"):
103
- install_nodejs()
104
- else:
105
- npm_version = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=True).stdout.strip()
106
- safe_print(f"✓ npm is already installed (v{npm_version})\n")
107
-
108
- # Change to app directory and run installation
109
- try:
110
- safe_print(f"📁 Working directory: {app_dir}\n")
111
-
112
- # Install dependencies
113
- safe_print("📦 Installing dependencies...")
114
- subprocess.run(["npm", "install"], cwd=app_dir, check=True)
115
- safe_print("✓ Dependencies installed\n")
116
-
117
- # Build the application
118
- safe_print("🔨 Building application...")
119
- subprocess.run(["npm", "run", "build"], cwd=app_dir, check=True)
120
- safe_print("✓ Application built\n")
121
-
122
- # Link globally
123
- safe_print("🔗 Linking lai-app command globally...")
124
- subprocess.run(["npm", "link"], cwd=app_dir, check=True)
125
- safe_print("✓ lai-app command linked globally\n")
126
-
127
- safe_print("=" * 60)
128
- safe_print("✅ lai-app installed successfully!")
129
- safe_print("=" * 60)
130
- safe_print("\n🎉 You can now run:")
131
- print(" lai-app # Start the web application")
132
- print(" lai-app --help # Show help")
133
- print(" lai-app --port 8080 # Use custom port")
134
- safe_print("\n📖 For more information, see app/CLI_USAGE.md\n")
135
-
136
- except subprocess.CalledProcessError as e:
137
- safe_print(f"\n❌ Error during installation: {e}")
138
- sys.exit(1)
139
-
140
-
141
- if __name__ == "__main__":
142
- main()
lattifai/cli/server.py DELETED
@@ -1,44 +0,0 @@
1
- import argparse
2
- import os
3
-
4
- import colorful
5
- import uvicorn
6
-
7
- from lattifai.utils import safe_print
8
-
9
-
10
- def main():
11
- """Launch the LattifAI Web Interface."""
12
- parser = argparse.ArgumentParser(description="LattifAI Backend Server")
13
- parser.add_argument(
14
- "-p",
15
- "--port",
16
- type=int,
17
- default=8001,
18
- help="Port to run the server on (default: 8001)",
19
- )
20
- parser.add_argument(
21
- "--host",
22
- type=str,
23
- default="0.0.0.0",
24
- help="Host to bind the server to (default: 0.0.0.0)",
25
- )
26
- parser.add_argument(
27
- "--no-reload",
28
- action="store_true",
29
- help="Disable auto-reload on code changes",
30
- )
31
-
32
- args = parser.parse_args()
33
-
34
- safe_print(colorful.bold_green("🚀 Launching LattifAI Backend Server..."))
35
- print(colorful.cyan(f"Server running at http://localhost:{args.port}"))
36
- print(colorful.yellow(f"Host: {args.host}"))
37
- print(colorful.yellow(f"Auto-reload: {'disabled' if args.no_reload else 'enabled'}"))
38
- print()
39
-
40
- uvicorn.run("lattifai.server.app:app", host=args.host, port=args.port, reload=not args.no_reload, log_level="info")
41
-
42
-
43
- if __name__ == "__main__":
44
- main()