lattifai 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattifai/__init__.py +42 -27
- lattifai/alignment/__init__.py +6 -0
- lattifai/alignment/lattice1_aligner.py +119 -0
- lattifai/{workers/lattice1_alpha.py → alignment/lattice1_worker.py} +33 -132
- lattifai/{tokenizer → alignment}/phonemizer.py +1 -1
- lattifai/alignment/segmenter.py +166 -0
- lattifai/{tokenizer → alignment}/tokenizer.py +186 -112
- lattifai/audio2.py +211 -0
- lattifai/caption/__init__.py +20 -0
- lattifai/caption/caption.py +1275 -0
- lattifai/{io → caption}/supervision.py +1 -0
- lattifai/{io → caption}/text_parser.py +53 -10
- lattifai/cli/__init__.py +17 -0
- lattifai/cli/alignment.py +153 -0
- lattifai/cli/caption.py +204 -0
- lattifai/cli/server.py +19 -0
- lattifai/cli/transcribe.py +197 -0
- lattifai/cli/youtube.py +128 -0
- lattifai/client.py +455 -246
- lattifai/config/__init__.py +20 -0
- lattifai/config/alignment.py +73 -0
- lattifai/config/caption.py +178 -0
- lattifai/config/client.py +46 -0
- lattifai/config/diarization.py +67 -0
- lattifai/config/media.py +335 -0
- lattifai/config/transcription.py +84 -0
- lattifai/diarization/__init__.py +5 -0
- lattifai/diarization/lattifai.py +89 -0
- lattifai/errors.py +41 -34
- lattifai/logging.py +116 -0
- lattifai/mixin.py +552 -0
- lattifai/server/app.py +420 -0
- lattifai/transcription/__init__.py +76 -0
- lattifai/transcription/base.py +108 -0
- lattifai/transcription/gemini.py +219 -0
- lattifai/transcription/lattifai.py +103 -0
- lattifai/types.py +30 -0
- lattifai/utils.py +3 -31
- lattifai/workflow/__init__.py +22 -0
- lattifai/workflow/agents.py +6 -0
- lattifai/{workflows → workflow}/file_manager.py +81 -57
- lattifai/workflow/youtube.py +564 -0
- lattifai-1.0.0.dist-info/METADATA +736 -0
- lattifai-1.0.0.dist-info/RECORD +52 -0
- {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
- lattifai-1.0.0.dist-info/entry_points.txt +13 -0
- lattifai/base_client.py +0 -126
- lattifai/bin/__init__.py +0 -3
- lattifai/bin/agent.py +0 -324
- lattifai/bin/align.py +0 -295
- lattifai/bin/cli_base.py +0 -25
- lattifai/bin/subtitle.py +0 -210
- lattifai/io/__init__.py +0 -43
- lattifai/io/reader.py +0 -86
- lattifai/io/utils.py +0 -15
- lattifai/io/writer.py +0 -102
- lattifai/tokenizer/__init__.py +0 -3
- lattifai/workers/__init__.py +0 -3
- lattifai/workflows/__init__.py +0 -34
- lattifai/workflows/agents.py +0 -12
- lattifai/workflows/gemini.py +0 -167
- lattifai/workflows/prompts/README.md +0 -22
- lattifai/workflows/prompts/gemini/README.md +0 -24
- lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
- lattifai/workflows/youtube.py +0 -931
- lattifai-0.4.6.dist-info/METADATA +0 -806
- lattifai-0.4.6.dist-info/RECORD +0 -39
- lattifai-0.4.6.dist-info/entry_points.txt +0 -3
- /lattifai/{io → caption}/gemini_reader.py +0 -0
- /lattifai/{io → caption}/gemini_writer.py +0 -0
- /lattifai/{workflows → transcription}/prompts/__init__.py +0 -0
- /lattifai/{workflows → workflow}/base.py +0 -0
- {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -17,6 +17,9 @@ except ImportError: # pragma: no cover - optional dependency
|
|
|
17
17
|
questionary = None
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
TRANSCRIBE_CHOICE = "transcribe"
|
|
21
|
+
|
|
22
|
+
|
|
20
23
|
class FileExistenceManager:
|
|
21
24
|
"""Utility class for handling file existence checks and user confirmations"""
|
|
22
25
|
|
|
@@ -24,37 +27,37 @@ class FileExistenceManager:
|
|
|
24
27
|
"media": ("🎬", "Media"),
|
|
25
28
|
# 'audio': ('📱', 'Audio'),
|
|
26
29
|
# 'video': ('🎬', 'Video'),
|
|
27
|
-
"
|
|
30
|
+
"caption": ("📝", "Caption"),
|
|
28
31
|
}
|
|
29
32
|
|
|
30
33
|
@staticmethod
|
|
31
34
|
def check_existing_files(
|
|
32
35
|
video_id: str,
|
|
33
|
-
|
|
36
|
+
output_path: str,
|
|
34
37
|
media_formats: List[str] = None,
|
|
35
|
-
|
|
38
|
+
caption_formats: List[str] = None,
|
|
36
39
|
) -> Dict[str, List[str]]:
|
|
37
40
|
"""
|
|
38
41
|
Enhanced version to check for existing media files with customizable formats
|
|
39
42
|
|
|
40
43
|
Args:
|
|
41
44
|
video_id: Video ID from any platform
|
|
42
|
-
|
|
45
|
+
output_path: Output directory to check
|
|
43
46
|
media_formats: List of media formats to check (audio and video combined)
|
|
44
|
-
|
|
47
|
+
caption_formats: List of caption formats to check
|
|
45
48
|
|
|
46
49
|
Returns:
|
|
47
|
-
Dictionary with 'media', '
|
|
50
|
+
Dictionary with 'media', 'caption' keys containing lists of existing files
|
|
48
51
|
"""
|
|
49
|
-
output_path = Path(
|
|
50
|
-
existing_files = {"media": [], "
|
|
52
|
+
output_path = Path(output_path).expanduser()
|
|
53
|
+
existing_files = {"media": [], "caption": []}
|
|
51
54
|
|
|
52
55
|
if not output_path.exists():
|
|
53
56
|
return existing_files
|
|
54
57
|
|
|
55
58
|
# Default formats - combine audio and video formats
|
|
56
59
|
media_formats = media_formats or ["mp3", "wav", "m4a", "aac", "opus", "mp4", "webm", "mkv", "avi"]
|
|
57
|
-
|
|
60
|
+
caption_formats = caption_formats or ["md", "srt", "vtt", "ass", "ssa", "sub", "sbv", "txt"]
|
|
58
61
|
|
|
59
62
|
# Check for media files (audio and video)
|
|
60
63
|
for ext in set(media_formats): # Remove duplicates
|
|
@@ -69,44 +72,41 @@ class FileExistenceManager:
|
|
|
69
72
|
if file_path not in existing_files["media"]:
|
|
70
73
|
existing_files["media"].append(file_path)
|
|
71
74
|
|
|
72
|
-
# Check for
|
|
73
|
-
for ext in set(
|
|
74
|
-
# Check multiple naming patterns for
|
|
75
|
+
# Check for caption files
|
|
76
|
+
for ext in set(caption_formats): # Remove duplicates
|
|
77
|
+
# Check multiple naming patterns for caption files
|
|
75
78
|
# Pattern 1: Simple pattern like {video_id}.vtt
|
|
76
|
-
|
|
77
|
-
if
|
|
78
|
-
existing_files["
|
|
79
|
+
caption_file = output_path / f"{video_id}.{ext}"
|
|
80
|
+
if caption_file.exists():
|
|
81
|
+
existing_files["caption"].append(str(caption_file))
|
|
79
82
|
|
|
80
83
|
# Pattern 2: With language/track suffix like {video_id}.en-trackid.vtt
|
|
81
|
-
for sub_file in output_path.glob(f"{video_id}
|
|
84
|
+
for sub_file in output_path.glob(f"{video_id}*.{ext}"):
|
|
82
85
|
file_path = str(sub_file)
|
|
83
|
-
if file_path not in existing_files["
|
|
84
|
-
existing_files["
|
|
85
|
-
|
|
86
|
-
if "md" in subtitle_formats:
|
|
87
|
-
# Gemini-specific pattern: {video_id}_Gemini.md
|
|
88
|
-
gemini_subtitle_file = output_path / f"{video_id}_Gemini.md"
|
|
89
|
-
if gemini_subtitle_file.exists():
|
|
90
|
-
existing_files["subtitle"].append(str(gemini_subtitle_file))
|
|
86
|
+
if file_path not in existing_files["caption"]:
|
|
87
|
+
existing_files["caption"].append(file_path)
|
|
91
88
|
|
|
92
89
|
return existing_files
|
|
93
90
|
|
|
94
91
|
@staticmethod
|
|
95
|
-
def prompt_user_confirmation(
|
|
92
|
+
def prompt_user_confirmation(
|
|
93
|
+
existing_files: Dict[str, List[str]], operation: str = "download", transcriber_name: str = None
|
|
94
|
+
) -> str:
|
|
96
95
|
"""
|
|
97
96
|
Prompt user for confirmation when files already exist (legacy, confirms all files together)
|
|
98
97
|
|
|
99
98
|
Args:
|
|
100
99
|
existing_files: Dictionary of existing files
|
|
101
100
|
operation: Type of operation (e.g., "download", "generate")
|
|
101
|
+
transcriber_name: Name of the transcriber to display (e.g., "Gemini_2.5_Pro")
|
|
102
102
|
|
|
103
103
|
Returns:
|
|
104
104
|
User choice: 'use' (use existing), 'overwrite' (regenerate), or 'cancel'
|
|
105
105
|
"""
|
|
106
106
|
has_media = bool(existing_files.get("media", []))
|
|
107
|
-
|
|
107
|
+
has_caption = bool(existing_files.get("caption", []))
|
|
108
108
|
|
|
109
|
-
if not has_media and not
|
|
109
|
+
if not has_media and not has_caption:
|
|
110
110
|
return "proceed" # No existing files, proceed normally
|
|
111
111
|
|
|
112
112
|
# Header with warning color
|
|
@@ -116,24 +116,39 @@ class FileExistenceManager:
|
|
|
116
116
|
file_paths = []
|
|
117
117
|
if has_media:
|
|
118
118
|
file_paths.extend(existing_files["media"])
|
|
119
|
-
if
|
|
120
|
-
file_paths.extend(existing_files["
|
|
119
|
+
if has_caption:
|
|
120
|
+
file_paths.extend(existing_files["caption"])
|
|
121
121
|
|
|
122
122
|
# Create display options with emojis
|
|
123
|
-
options = []
|
|
124
|
-
for file_path in file_paths:
|
|
123
|
+
options, shift_length = [], 0
|
|
124
|
+
for file_path in sorted(file_paths):
|
|
125
125
|
# Determine emoji based on file type
|
|
126
126
|
if has_media and file_path in existing_files["media"]:
|
|
127
127
|
display_text = f'{colorful.green("•")} 🎬 Media file: {file_path}'
|
|
128
|
+
shift_length = len("Media file:")
|
|
128
129
|
else:
|
|
129
|
-
display_text = f'{colorful.green("•")} 📝
|
|
130
|
+
display_text = f'{colorful.green("•")} 📝 Caption file: {file_path}'
|
|
131
|
+
shift_length = len("Caption file:")
|
|
130
132
|
options.append((display_text, file_path))
|
|
131
133
|
|
|
132
|
-
# Add overwrite and cancel options
|
|
134
|
+
# Add overwrite and cancel options with aligned spacing
|
|
135
|
+
overwrite_text, overwrite_op = "Overwrite existing files (re-generate or download)", "overwrite"
|
|
136
|
+
if transcriber_name:
|
|
137
|
+
options.append(
|
|
138
|
+
(
|
|
139
|
+
f'{colorful.green("•")} 🔄 {" " * shift_length} {overwrite_text}',
|
|
140
|
+
overwrite_op,
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
overwrite_text, overwrite_op = f"Transcribe with {transcriber_name}", TRANSCRIBE_CHOICE
|
|
144
|
+
|
|
133
145
|
options.extend(
|
|
134
146
|
[
|
|
135
|
-
(
|
|
136
|
-
|
|
147
|
+
(
|
|
148
|
+
f'{colorful.green("•")} 🔄 {" " * shift_length} {overwrite_text}',
|
|
149
|
+
overwrite_op,
|
|
150
|
+
),
|
|
151
|
+
(f'{colorful.green("•")} ❌ {" " * shift_length} Cancel operation', "cancel"),
|
|
137
152
|
]
|
|
138
153
|
)
|
|
139
154
|
|
|
@@ -143,6 +158,8 @@ class FileExistenceManager:
|
|
|
143
158
|
|
|
144
159
|
if choice == "overwrite":
|
|
145
160
|
print(f'{colorful.yellow("🔄 Overwriting existing files")}')
|
|
161
|
+
elif choice == TRANSCRIBE_CHOICE:
|
|
162
|
+
print(f'{colorful.magenta(f"✨ Will transcribe with {transcriber_name}")}')
|
|
146
163
|
elif choice == "cancel":
|
|
147
164
|
print(f'{colorful.red("❌ Operation cancelled")}')
|
|
148
165
|
elif choice in file_paths:
|
|
@@ -158,7 +175,7 @@ class FileExistenceManager:
|
|
|
158
175
|
Prompt user for confirmation for a specific file type
|
|
159
176
|
|
|
160
177
|
Args:
|
|
161
|
-
file_type: Type of file ('audio', 'video', '
|
|
178
|
+
file_type: Type of file ('audio', 'video', 'caption', 'gemini')
|
|
162
179
|
files: List of existing files of this type
|
|
163
180
|
operation: Type of operation (e.g., "download", "generate")
|
|
164
181
|
|
|
@@ -174,21 +191,21 @@ class FileExistenceManager:
|
|
|
174
191
|
# Header with warning color
|
|
175
192
|
print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
|
|
176
193
|
|
|
177
|
-
for file_path in files:
|
|
194
|
+
for file_path in sorted(files):
|
|
178
195
|
print(f' {colorful.green("•")} {file_path}')
|
|
179
196
|
|
|
180
|
-
prompt_message = f"What would you like to do with {label
|
|
197
|
+
prompt_message = f"What would you like to do with {label} files?"
|
|
181
198
|
options = [
|
|
182
|
-
(f"Use existing {label
|
|
183
|
-
(f"Overwrite {label
|
|
199
|
+
(f"Use existing {label} files (skip {operation})", "use"),
|
|
200
|
+
(f"Overwrite {label} files (re-{operation})", "overwrite"),
|
|
184
201
|
("Cancel operation", "cancel"),
|
|
185
202
|
]
|
|
186
203
|
choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default="use")
|
|
187
204
|
|
|
188
205
|
if choice == "use":
|
|
189
|
-
print(f'{colorful.green(f"✅ Using existing {label
|
|
206
|
+
print(f'{colorful.green(f"✅ Using existing {label} files")}')
|
|
190
207
|
elif choice == "overwrite":
|
|
191
|
-
print(f'{colorful.yellow(f"🔄 Overwriting {label
|
|
208
|
+
print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
|
|
192
209
|
elif choice == "cancel":
|
|
193
210
|
print(f'{colorful.red("❌ Operation cancelled")}')
|
|
194
211
|
|
|
@@ -196,19 +213,23 @@ class FileExistenceManager:
|
|
|
196
213
|
|
|
197
214
|
@staticmethod
|
|
198
215
|
def prompt_file_selection(
|
|
199
|
-
file_type: str,
|
|
216
|
+
file_type: str,
|
|
217
|
+
files: List[str],
|
|
218
|
+
operation: str = "use",
|
|
219
|
+
transcriber_name: str = None,
|
|
200
220
|
) -> str:
|
|
201
221
|
"""
|
|
202
222
|
Prompt user to select a specific file from a list, or choose to overwrite/cancel
|
|
203
223
|
|
|
204
224
|
Args:
|
|
205
|
-
file_type: Type of file (e.g., 'gemini transcript', '
|
|
225
|
+
file_type: Type of file (e.g., 'gemini transcript', 'caption')
|
|
206
226
|
files: List of existing files to choose from
|
|
207
227
|
operation: Type of operation (e.g., "transcribe", "download")
|
|
208
|
-
|
|
228
|
+
transcriber_name: Name of the transcriber to display (e.g., "Gemini_2.5_Pro").
|
|
229
|
+
If provided, adds transcribe option for the transcriber.
|
|
209
230
|
|
|
210
231
|
Returns:
|
|
211
|
-
Selected file path, 'overwrite' to regenerate, 'gemini' to transcribe with
|
|
232
|
+
Selected file path, 'overwrite' to regenerate, 'gemini' to transcribe with transcriber, or 'cancel' to abort
|
|
212
233
|
"""
|
|
213
234
|
if not files:
|
|
214
235
|
return "proceed"
|
|
@@ -228,16 +249,17 @@ class FileExistenceManager:
|
|
|
228
249
|
|
|
229
250
|
# Create options with full file paths
|
|
230
251
|
options = []
|
|
231
|
-
for i, file_path in enumerate(files, 1):
|
|
252
|
+
for i, file_path in enumerate(sorted(files), 1):
|
|
232
253
|
# Display full path for clarity
|
|
233
254
|
options.append((f"{colorful.cyan(file_path)}", file_path))
|
|
234
255
|
|
|
235
|
-
# Add
|
|
236
|
-
if
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
256
|
+
# Add transcription or overwrite option
|
|
257
|
+
if transcriber_name:
|
|
258
|
+
transcribe_text = f"✨ Transcribe with {transcriber_name}"
|
|
259
|
+
options.append((colorful.magenta(transcribe_text), TRANSCRIBE_CHOICE))
|
|
260
|
+
else:
|
|
261
|
+
overwrite_text = f"Overwrite (re-{operation} or download)"
|
|
262
|
+
options.append((colorful.yellow(overwrite_text), "overwrite"))
|
|
241
263
|
options.append((colorful.red("Cancel operation"), "cancel"))
|
|
242
264
|
|
|
243
265
|
prompt_message = colorful.bold_black_on_cyan(f"Select which {file_type} to use:")
|
|
@@ -246,9 +268,11 @@ class FileExistenceManager:
|
|
|
246
268
|
if choice == "cancel":
|
|
247
269
|
print(f'{colorful.red("❌ Operation cancelled")}')
|
|
248
270
|
elif choice == "overwrite":
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
271
|
+
overwrite_msg = f"🔄 Overwriting all {file_type} files"
|
|
272
|
+
print(f"{colorful.yellow(overwrite_msg)}")
|
|
273
|
+
elif choice == TRANSCRIBE_CHOICE:
|
|
274
|
+
transcribe_msg = f"✨ Will transcribe with {transcriber_name}"
|
|
275
|
+
print(f"{colorful.magenta(transcribe_msg)}")
|
|
252
276
|
else:
|
|
253
277
|
print(f'{colorful.green(f"✅ Using: {choice}")}')
|
|
254
278
|
|
|
@@ -269,7 +293,7 @@ class FileExistenceManager:
|
|
|
269
293
|
Dictionary mapping file type to user choice ('use', 'overwrite', 'proceed', or 'cancel')
|
|
270
294
|
"""
|
|
271
295
|
ordered_types = []
|
|
272
|
-
for preferred in ["media", "audio", "video", "
|
|
296
|
+
for preferred in ["media", "audio", "video", "caption"]:
|
|
273
297
|
if preferred not in ordered_types:
|
|
274
298
|
ordered_types.append(preferred)
|
|
275
299
|
for file_type in existing_files.keys():
|