lattifai 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lattifai/__init__.py +42 -27
  2. lattifai/alignment/__init__.py +6 -0
  3. lattifai/alignment/lattice1_aligner.py +119 -0
  4. lattifai/{workers/lattice1_alpha.py → alignment/lattice1_worker.py} +33 -132
  5. lattifai/{tokenizer → alignment}/phonemizer.py +1 -1
  6. lattifai/alignment/segmenter.py +166 -0
  7. lattifai/{tokenizer → alignment}/tokenizer.py +186 -112
  8. lattifai/audio2.py +211 -0
  9. lattifai/caption/__init__.py +20 -0
  10. lattifai/caption/caption.py +1275 -0
  11. lattifai/{io → caption}/supervision.py +1 -0
  12. lattifai/{io → caption}/text_parser.py +53 -10
  13. lattifai/cli/__init__.py +17 -0
  14. lattifai/cli/alignment.py +153 -0
  15. lattifai/cli/caption.py +204 -0
  16. lattifai/cli/server.py +19 -0
  17. lattifai/cli/transcribe.py +197 -0
  18. lattifai/cli/youtube.py +128 -0
  19. lattifai/client.py +455 -246
  20. lattifai/config/__init__.py +20 -0
  21. lattifai/config/alignment.py +73 -0
  22. lattifai/config/caption.py +178 -0
  23. lattifai/config/client.py +46 -0
  24. lattifai/config/diarization.py +67 -0
  25. lattifai/config/media.py +335 -0
  26. lattifai/config/transcription.py +84 -0
  27. lattifai/diarization/__init__.py +5 -0
  28. lattifai/diarization/lattifai.py +89 -0
  29. lattifai/errors.py +41 -34
  30. lattifai/logging.py +116 -0
  31. lattifai/mixin.py +552 -0
  32. lattifai/server/app.py +420 -0
  33. lattifai/transcription/__init__.py +76 -0
  34. lattifai/transcription/base.py +108 -0
  35. lattifai/transcription/gemini.py +219 -0
  36. lattifai/transcription/lattifai.py +103 -0
  37. lattifai/types.py +30 -0
  38. lattifai/utils.py +3 -31
  39. lattifai/workflow/__init__.py +22 -0
  40. lattifai/workflow/agents.py +6 -0
  41. lattifai/{workflows → workflow}/file_manager.py +81 -57
  42. lattifai/workflow/youtube.py +564 -0
  43. lattifai-1.0.0.dist-info/METADATA +736 -0
  44. lattifai-1.0.0.dist-info/RECORD +52 -0
  45. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/WHEEL +1 -1
  46. lattifai-1.0.0.dist-info/entry_points.txt +13 -0
  47. lattifai/base_client.py +0 -126
  48. lattifai/bin/__init__.py +0 -3
  49. lattifai/bin/agent.py +0 -324
  50. lattifai/bin/align.py +0 -295
  51. lattifai/bin/cli_base.py +0 -25
  52. lattifai/bin/subtitle.py +0 -210
  53. lattifai/io/__init__.py +0 -43
  54. lattifai/io/reader.py +0 -86
  55. lattifai/io/utils.py +0 -15
  56. lattifai/io/writer.py +0 -102
  57. lattifai/tokenizer/__init__.py +0 -3
  58. lattifai/workers/__init__.py +0 -3
  59. lattifai/workflows/__init__.py +0 -34
  60. lattifai/workflows/agents.py +0 -12
  61. lattifai/workflows/gemini.py +0 -167
  62. lattifai/workflows/prompts/README.md +0 -22
  63. lattifai/workflows/prompts/gemini/README.md +0 -24
  64. lattifai/workflows/prompts/gemini/transcription_gem.txt +0 -81
  65. lattifai/workflows/youtube.py +0 -931
  66. lattifai-0.4.6.dist-info/METADATA +0 -806
  67. lattifai-0.4.6.dist-info/RECORD +0 -39
  68. lattifai-0.4.6.dist-info/entry_points.txt +0 -3
  69. /lattifai/{io → caption}/gemini_reader.py +0 -0
  70. /lattifai/{io → caption}/gemini_writer.py +0 -0
  71. /lattifai/{workflows → transcription}/prompts/__init__.py +0 -0
  72. /lattifai/{workflows → workflow}/base.py +0 -0
  73. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/licenses/LICENSE +0 -0
  74. {lattifai-0.4.6.dist-info → lattifai-1.0.0.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,9 @@ except ImportError: # pragma: no cover - optional dependency
17
17
  questionary = None
18
18
 
19
19
 
20
+ TRANSCRIBE_CHOICE = "transcribe"
21
+
22
+
20
23
  class FileExistenceManager:
21
24
  """Utility class for handling file existence checks and user confirmations"""
22
25
 
@@ -24,37 +27,37 @@ class FileExistenceManager:
24
27
  "media": ("🎬", "Media"),
25
28
  # 'audio': ('📱', 'Audio'),
26
29
  # 'video': ('🎬', 'Video'),
27
- "subtitle": ("📝", "Subtitle"),
30
+ "caption": ("📝", "Caption"),
28
31
  }
29
32
 
30
33
  @staticmethod
31
34
  def check_existing_files(
32
35
  video_id: str,
33
- output_dir: str,
36
+ output_path: str,
34
37
  media_formats: List[str] = None,
35
- subtitle_formats: List[str] = None,
38
+ caption_formats: List[str] = None,
36
39
  ) -> Dict[str, List[str]]:
37
40
  """
38
41
  Enhanced version to check for existing media files with customizable formats
39
42
 
40
43
  Args:
41
44
  video_id: Video ID from any platform
42
- output_dir: Output directory to check
45
+ output_path: Output directory to check
43
46
  media_formats: List of media formats to check (audio and video combined)
44
- subtitle_formats: List of subtitle formats to check
47
+ caption_formats: List of caption formats to check
45
48
 
46
49
  Returns:
47
- Dictionary with 'media', 'subtitle' keys containing lists of existing files
50
+ Dictionary with 'media', 'caption' keys containing lists of existing files
48
51
  """
49
- output_path = Path(output_dir).expanduser()
50
- existing_files = {"media": [], "subtitle": []}
52
+ output_path = Path(output_path).expanduser()
53
+ existing_files = {"media": [], "caption": []}
51
54
 
52
55
  if not output_path.exists():
53
56
  return existing_files
54
57
 
55
58
  # Default formats - combine audio and video formats
56
59
  media_formats = media_formats or ["mp3", "wav", "m4a", "aac", "opus", "mp4", "webm", "mkv", "avi"]
57
- subtitle_formats = subtitle_formats or ["md", "srt", "vtt", "ass", "ssa", "sub", "sbv", "txt"]
60
+ caption_formats = caption_formats or ["md", "srt", "vtt", "ass", "ssa", "sub", "sbv", "txt"]
58
61
 
59
62
  # Check for media files (audio and video)
60
63
  for ext in set(media_formats): # Remove duplicates
@@ -69,44 +72,41 @@ class FileExistenceManager:
69
72
  if file_path not in existing_files["media"]:
70
73
  existing_files["media"].append(file_path)
71
74
 
72
- # Check for subtitle files
73
- for ext in set(subtitle_formats): # Remove duplicates
74
- # Check multiple naming patterns for subtitle files
75
+ # Check for caption files
76
+ for ext in set(caption_formats): # Remove duplicates
77
+ # Check multiple naming patterns for caption files
75
78
  # Pattern 1: Simple pattern like {video_id}.vtt
76
- subtitle_file = output_path / f"{video_id}.{ext}"
77
- if subtitle_file.exists():
78
- existing_files["subtitle"].append(str(subtitle_file))
79
+ caption_file = output_path / f"{video_id}.{ext}"
80
+ if caption_file.exists():
81
+ existing_files["caption"].append(str(caption_file))
79
82
 
80
83
  # Pattern 2: With language/track suffix like {video_id}.en-trackid.vtt
81
- for sub_file in output_path.glob(f"{video_id}.*.{ext}"):
84
+ for sub_file in output_path.glob(f"{video_id}*.{ext}"):
82
85
  file_path = str(sub_file)
83
- if file_path not in existing_files["subtitle"]:
84
- existing_files["subtitle"].append(file_path)
85
-
86
- if "md" in subtitle_formats:
87
- # Gemini-specific pattern: {video_id}_Gemini.md
88
- gemini_subtitle_file = output_path / f"{video_id}_Gemini.md"
89
- if gemini_subtitle_file.exists():
90
- existing_files["subtitle"].append(str(gemini_subtitle_file))
86
+ if file_path not in existing_files["caption"]:
87
+ existing_files["caption"].append(file_path)
91
88
 
92
89
  return existing_files
93
90
 
94
91
  @staticmethod
95
- def prompt_user_confirmation(existing_files: Dict[str, List[str]], operation: str = "download") -> str:
92
+ def prompt_user_confirmation(
93
+ existing_files: Dict[str, List[str]], operation: str = "download", transcriber_name: str = None
94
+ ) -> str:
96
95
  """
97
96
  Prompt user for confirmation when files already exist (legacy, confirms all files together)
98
97
 
99
98
  Args:
100
99
  existing_files: Dictionary of existing files
101
100
  operation: Type of operation (e.g., "download", "generate")
101
+ transcriber_name: Name of the transcriber to display (e.g., "Gemini_2.5_Pro")
102
102
 
103
103
  Returns:
104
104
  User choice: 'use' (use existing), 'overwrite' (regenerate), or 'cancel'
105
105
  """
106
106
  has_media = bool(existing_files.get("media", []))
107
- has_subtitle = bool(existing_files.get("subtitle", []))
107
+ has_caption = bool(existing_files.get("caption", []))
108
108
 
109
- if not has_media and not has_subtitle:
109
+ if not has_media and not has_caption:
110
110
  return "proceed" # No existing files, proceed normally
111
111
 
112
112
  # Header with warning color
@@ -116,24 +116,39 @@ class FileExistenceManager:
116
116
  file_paths = []
117
117
  if has_media:
118
118
  file_paths.extend(existing_files["media"])
119
- if has_subtitle:
120
- file_paths.extend(existing_files["subtitle"])
119
+ if has_caption:
120
+ file_paths.extend(existing_files["caption"])
121
121
 
122
122
  # Create display options with emojis
123
- options = []
124
- for file_path in file_paths:
123
+ options, shift_length = [], 0
124
+ for file_path in sorted(file_paths):
125
125
  # Determine emoji based on file type
126
126
  if has_media and file_path in existing_files["media"]:
127
127
  display_text = f'{colorful.green("•")} 🎬 Media file: {file_path}'
128
+ shift_length = len("Media file:")
128
129
  else:
129
- display_text = f'{colorful.green("•")} 📝 Subtitle file: {file_path}'
130
+ display_text = f'{colorful.green("•")} 📝 Caption file: {file_path}'
131
+ shift_length = len("Caption file:")
130
132
  options.append((display_text, file_path))
131
133
 
132
- # Add overwrite and cancel options
134
+ # Add overwrite and cancel options with aligned spacing
135
+ overwrite_text, overwrite_op = "Overwrite existing files (re-generate or download)", "overwrite"
136
+ if transcriber_name:
137
+ options.append(
138
+ (
139
+ f'{colorful.green("•")} 🔄 {" " * shift_length} {overwrite_text}',
140
+ overwrite_op,
141
+ )
142
+ )
143
+ overwrite_text, overwrite_op = f"Transcribe with {transcriber_name}", TRANSCRIBE_CHOICE
144
+
133
145
  options.extend(
134
146
  [
135
- (" Overwrite existing files (re-generate or download)", "overwrite"),
136
- (" Cancel operation", "cancel"),
147
+ (
148
+ f'{colorful.green("•")} 🔄 {" " * shift_length} {overwrite_text}',
149
+ overwrite_op,
150
+ ),
151
+ (f'{colorful.green("•")} ❌ {" " * shift_length} Cancel operation', "cancel"),
137
152
  ]
138
153
  )
139
154
 
@@ -143,6 +158,8 @@ class FileExistenceManager:
143
158
 
144
159
  if choice == "overwrite":
145
160
  print(f'{colorful.yellow("🔄 Overwriting existing files")}')
161
+ elif choice == TRANSCRIBE_CHOICE:
162
+ print(f'{colorful.magenta(f"✨ Will transcribe with {transcriber_name}")}')
146
163
  elif choice == "cancel":
147
164
  print(f'{colorful.red("❌ Operation cancelled")}')
148
165
  elif choice in file_paths:
@@ -158,7 +175,7 @@ class FileExistenceManager:
158
175
  Prompt user for confirmation for a specific file type
159
176
 
160
177
  Args:
161
- file_type: Type of file ('audio', 'video', 'subtitle', 'gemini')
178
+ file_type: Type of file ('audio', 'video', 'caption', 'gemini')
162
179
  files: List of existing files of this type
163
180
  operation: Type of operation (e.g., "download", "generate")
164
181
 
@@ -174,21 +191,21 @@ class FileExistenceManager:
174
191
  # Header with warning color
175
192
  print(f'\n{colorful.bold_yellow(f"⚠️ Existing {label} files found:")}')
176
193
 
177
- for file_path in files:
194
+ for file_path in sorted(files):
178
195
  print(f' {colorful.green("•")} {file_path}')
179
196
 
180
- prompt_message = f"What would you like to do with {label.lower()} files?"
197
+ prompt_message = f"What would you like to do with {label} files?"
181
198
  options = [
182
- (f"Use existing {label.lower()} files (skip {operation})", "use"),
183
- (f"Overwrite {label.lower()} files (re-{operation})", "overwrite"),
199
+ (f"Use existing {label} files (skip {operation})", "use"),
200
+ (f"Overwrite {label} files (re-{operation})", "overwrite"),
184
201
  ("Cancel operation", "cancel"),
185
202
  ]
186
203
  choice = FileExistenceManager._prompt_user_choice(prompt_message, options, default="use")
187
204
 
188
205
  if choice == "use":
189
- print(f'{colorful.green(f"✅ Using existing {label.lower()} files")}')
206
+ print(f'{colorful.green(f"✅ Using existing {label} files")}')
190
207
  elif choice == "overwrite":
191
- print(f'{colorful.yellow(f"🔄 Overwriting {label.lower()} files")}')
208
+ print(f'{colorful.yellow(f"🔄 Overwriting {label} files")}')
192
209
  elif choice == "cancel":
193
210
  print(f'{colorful.red("❌ Operation cancelled")}')
194
211
 
@@ -196,19 +213,23 @@ class FileExistenceManager:
196
213
 
197
214
  @staticmethod
198
215
  def prompt_file_selection(
199
- file_type: str, files: List[str], operation: str = "use", enable_gemini: bool = False
216
+ file_type: str,
217
+ files: List[str],
218
+ operation: str = "use",
219
+ transcriber_name: str = None,
200
220
  ) -> str:
201
221
  """
202
222
  Prompt user to select a specific file from a list, or choose to overwrite/cancel
203
223
 
204
224
  Args:
205
- file_type: Type of file (e.g., 'gemini transcript', 'subtitle')
225
+ file_type: Type of file (e.g., 'gemini transcript', 'caption')
206
226
  files: List of existing files to choose from
207
227
  operation: Type of operation (e.g., "transcribe", "download")
208
- enable_gemini: If True, adds "Transcribe with Gemini" option
228
+ transcriber_name: Name of the transcriber to display (e.g., "Gemini_2.5_Pro").
229
+ If provided, adds transcribe option for the transcriber.
209
230
 
210
231
  Returns:
211
- Selected file path, 'overwrite' to regenerate, 'gemini' to transcribe with Gemini, or 'cancel' to abort
232
+ Selected file path, 'overwrite' to regenerate, 'gemini' to transcribe with transcriber, or 'cancel' to abort
212
233
  """
213
234
  if not files:
214
235
  return "proceed"
@@ -228,16 +249,17 @@ class FileExistenceManager:
228
249
 
229
250
  # Create options with full file paths
230
251
  options = []
231
- for i, file_path in enumerate(files, 1):
252
+ for i, file_path in enumerate(sorted(files), 1):
232
253
  # Display full path for clarity
233
254
  options.append((f"{colorful.cyan(file_path)}", file_path))
234
255
 
235
- # Add Gemini transcription option if enabled
236
- if enable_gemini:
237
- options.append((colorful.magenta("✨ Transcribe with Gemini 2.5 Pro"), "gemini"))
238
-
239
- # Add overwrite and cancel options
240
- options.append((colorful.yellow(f"Overwrite (re-{operation} or download)"), "overwrite"))
256
+ # Add transcription or overwrite option
257
+ if transcriber_name:
258
+ transcribe_text = f"✨ Transcribe with {transcriber_name}"
259
+ options.append((colorful.magenta(transcribe_text), TRANSCRIBE_CHOICE))
260
+ else:
261
+ overwrite_text = f"Overwrite (re-{operation} or download)"
262
+ options.append((colorful.yellow(overwrite_text), "overwrite"))
241
263
  options.append((colorful.red("Cancel operation"), "cancel"))
242
264
 
243
265
  prompt_message = colorful.bold_black_on_cyan(f"Select which {file_type} to use:")
@@ -246,9 +268,11 @@ class FileExistenceManager:
246
268
  if choice == "cancel":
247
269
  print(f'{colorful.red("❌ Operation cancelled")}')
248
270
  elif choice == "overwrite":
249
- print(f'{colorful.yellow(f"🔄 Overwriting all {file_type} files")}')
250
- elif choice == "gemini":
251
- print(f'{colorful.magenta("✨ Will transcribe with Gemini 2.5 Pro")}')
271
+ overwrite_msg = f"🔄 Overwriting all {file_type} files"
272
+ print(f"{colorful.yellow(overwrite_msg)}")
273
+ elif choice == TRANSCRIBE_CHOICE:
274
+ transcribe_msg = f"✨ Will transcribe with {transcriber_name}"
275
+ print(f"{colorful.magenta(transcribe_msg)}")
252
276
  else:
253
277
  print(f'{colorful.green(f"✅ Using: {choice}")}')
254
278
 
@@ -269,7 +293,7 @@ class FileExistenceManager:
269
293
  Dictionary mapping file type to user choice ('use', 'overwrite', 'proceed', or 'cancel')
270
294
  """
271
295
  ordered_types = []
272
- for preferred in ["media", "audio", "video", "subtitle"]:
296
+ for preferred in ["media", "audio", "video", "caption"]:
273
297
  if preferred not in ordered_types:
274
298
  ordered_types.append(preferred)
275
299
  for file_type in existing_files.keys():