lattifai 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lattifai/__init__.py +10 -0
  2. lattifai/alignment/lattice1_aligner.py +64 -15
  3. lattifai/alignment/lattice1_worker.py +135 -50
  4. lattifai/alignment/segmenter.py +3 -2
  5. lattifai/alignment/tokenizer.py +14 -13
  6. lattifai/audio2.py +269 -70
  7. lattifai/caption/caption.py +213 -19
  8. lattifai/cli/__init__.py +2 -0
  9. lattifai/cli/alignment.py +2 -1
  10. lattifai/cli/app_installer.py +35 -33
  11. lattifai/cli/caption.py +9 -19
  12. lattifai/cli/diarization.py +108 -0
  13. lattifai/cli/server.py +3 -1
  14. lattifai/cli/transcribe.py +55 -38
  15. lattifai/cli/youtube.py +1 -0
  16. lattifai/client.py +42 -121
  17. lattifai/config/alignment.py +37 -2
  18. lattifai/config/caption.py +1 -1
  19. lattifai/config/media.py +23 -3
  20. lattifai/config/transcription.py +4 -0
  21. lattifai/diarization/lattifai.py +18 -7
  22. lattifai/errors.py +7 -3
  23. lattifai/mixin.py +45 -16
  24. lattifai/server/app.py +2 -1
  25. lattifai/transcription/__init__.py +1 -1
  26. lattifai/transcription/base.py +21 -2
  27. lattifai/transcription/gemini.py +127 -1
  28. lattifai/transcription/lattifai.py +30 -2
  29. lattifai/utils.py +96 -28
  30. lattifai/workflow/file_manager.py +15 -13
  31. lattifai/workflow/youtube.py +16 -1
  32. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/METADATA +86 -22
  33. lattifai-1.1.0.dist-info/RECORD +57 -0
  34. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/entry_points.txt +2 -0
  35. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/licenses/LICENSE +1 -1
  36. lattifai-1.0.4.dist-info/RECORD +0 -56
  37. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/WHEEL +0 -0
  38. {lattifai-1.0.4.dist-info → lattifai-1.1.0.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,19 @@ import json
4
4
  import re
5
5
  from dataclasses import dataclass, field
6
6
  from pathlib import Path
7
- from typing import Any, Dict, List, Optional, Union
7
+ from typing import Any, Dict, List, Optional, TypeVar
8
8
 
9
9
  from lhotse.supervision import AlignmentItem
10
10
  from lhotse.utils import Pathlike
11
11
  from tgt import TextGrid
12
12
 
13
- from ..config.caption import InputCaptionFormat, OutputCaptionFormat
13
+ from ..config.caption import InputCaptionFormat, OutputCaptionFormat # noqa: F401
14
14
  from .supervision import Supervision
15
15
  from .text_parser import normalize_text as normalize_text_fn
16
16
  from .text_parser import parse_speaker_text, parse_timestamp_text
17
17
 
18
+ DiarizationOutput = TypeVar("DiarizationOutput")
19
+
18
20
 
19
21
  @dataclass
20
22
  class Caption:
@@ -40,7 +42,7 @@ class Caption:
40
42
  # Audio Event Detection results
41
43
  audio_events: Optional[TextGrid] = None
42
44
  # Speaker Diarization results
43
- speaker_diarization: Optional[TextGrid] = None
45
+ speaker_diarization: Optional[DiarizationOutput] = None
44
46
  # Alignment results
45
47
  alignments: List[Supervision] = field(default_factory=list)
46
48
 
@@ -272,7 +274,7 @@ class Caption:
272
274
  cls,
273
275
  transcription: List[Supervision],
274
276
  audio_events: Optional[TextGrid] = None,
275
- speaker_diarization: Optional[TextGrid] = None,
277
+ speaker_diarization: Optional[DiarizationOutput] = None,
276
278
  language: Optional[str] = None,
277
279
  source_path: Optional[Pathlike] = None,
278
280
  metadata: Optional[Dict[str, str]] = None,
@@ -283,7 +285,7 @@ class Caption:
283
285
  Args:
284
286
  transcription: List of transcription supervision segments
285
287
  audio_events: Optional TextGrid with audio event detection results
286
- speaker_diarization: Optional TextGrid with speaker diarization results
288
+ speaker_diarization: Optional DiarizationOutput with speaker diarization results
287
289
  language: Language code
288
290
  source_path: Source file path
289
291
  metadata: Additional metadata
@@ -307,7 +309,7 @@ class Caption:
307
309
  cls,
308
310
  path: Pathlike,
309
311
  format: Optional[str] = None,
310
- normalize_text: bool = False,
312
+ normalize_text: bool = True,
311
313
  ) -> "Caption":
312
314
  """
313
315
  Read caption file and return Caption object.
@@ -384,9 +386,9 @@ class Caption:
384
386
  """
385
387
  Read speaker diarization TextGrid from file.
386
388
  """
387
- from tgt import read_textgrid
389
+ from lattifai_core.diarization import DiarizationOutput
388
390
 
389
- self.speaker_diarization = read_textgrid(path)
391
+ self.speaker_diarization = DiarizationOutput.read(path)
390
392
  return self.speaker_diarization
391
393
 
392
394
  def write_speaker_diarization(
@@ -399,9 +401,7 @@ class Caption:
399
401
  if not self.speaker_diarization:
400
402
  raise ValueError("No speaker diarization data to write.")
401
403
 
402
- from tgt import write_to_file
403
-
404
- write_to_file(self.speaker_diarization, path, format="long")
404
+ self.speaker_diarization.write(path)
405
405
  return path
406
406
 
407
407
  @staticmethod
@@ -451,7 +451,10 @@ class Caption:
451
451
  else:
452
452
  if include_speaker_in_text and sup.speaker is not None:
453
453
  # Use [SPEAKER]: format for consistency with parsing
454
- text = f"[{sup.speaker}]: {sup.text}"
454
+ if not sup.has_custom("original_speaker") or sup.custom["original_speaker"]:
455
+ text = f"[{sup.speaker}]: {sup.text}"
456
+ else:
457
+ text = f"{sup.text}"
455
458
  else:
456
459
  text = sup.text
457
460
  f.write(f"[{sup.start:.2f}-{sup.end:.2f}] {text}\n")
@@ -471,7 +474,12 @@ class Caption:
471
474
  tg = TextGrid()
472
475
  supervisions, words, scores = [], [], {"utterances": [], "words": []}
473
476
  for supervision in sorted(alignments, key=lambda x: x.start):
474
- if include_speaker_in_text and supervision.speaker is not None:
477
+ # Respect `original_speaker` custom flag: default to include speaker when missing
478
+ if (
479
+ include_speaker_in_text
480
+ and supervision.speaker is not None
481
+ and (not supervision.has_custom("original_speaker") or supervision.custom["original_speaker"])
482
+ ):
475
483
  text = f"{supervision.speaker} {supervision.text}"
476
484
  else:
477
485
  text = supervision.text
@@ -505,6 +513,8 @@ class Caption:
505
513
  cls._write_csv(alignments, output_path, include_speaker_in_text)
506
514
  elif str(output_path)[-4:].lower() == ".aud":
507
515
  cls._write_aud(alignments, output_path, include_speaker_in_text)
516
+ elif str(output_path)[-4:].lower() == ".sbv":
517
+ cls._write_sbv(alignments, output_path, include_speaker_in_text)
508
518
  else:
509
519
  import pysubs2
510
520
 
@@ -524,7 +534,10 @@ class Caption:
524
534
  )
525
535
  else:
526
536
  if include_speaker_in_text and sup.speaker is not None:
527
- text = f"{sup.speaker} {sup.text}"
537
+ if not sup.has_custom("original_speaker") or sup.custom["original_speaker"]:
538
+ text = f"{sup.speaker} {sup.text}"
539
+ else:
540
+ text = f"{sup.text}"
528
541
  else:
529
542
  text = sup.text
530
543
  subs.append(
@@ -535,7 +548,14 @@ class Caption:
535
548
  name=sup.speaker or "",
536
549
  )
537
550
  )
538
- subs.save(output_path)
551
+
552
+ # MicroDVD format requires framerate to be specified
553
+ output_ext = str(output_path).lower().split(".")[-1]
554
+ if output_ext == "sub":
555
+ # Default to 25 fps for MicroDVD format if not specified
556
+ subs.save(output_path, fps=25.0)
557
+ else:
558
+ subs.save(output_path)
539
559
 
540
560
  return output_path
541
561
 
@@ -821,7 +841,16 @@ class Caption:
821
841
  if cls._is_youtube_vtt_with_word_timestamps(content):
822
842
  return cls._parse_youtube_vtt_with_word_timestamps(content, normalize_text)
823
843
 
824
- if format == "gemini" or str(caption).endswith("Gemini.md"):
844
+ # Match Gemini format: explicit format, or files ending with Gemini.md/Gemini3.md,
845
+ # or files containing "gemini" in the name with .md extension
846
+ caption_str = str(caption).lower()
847
+ is_gemini_format = (
848
+ format == "gemini"
849
+ or str(caption).endswith("Gemini.md")
850
+ or str(caption).endswith("Gemini3.md")
851
+ or ("gemini" in caption_str and caption_str.endswith(".md"))
852
+ )
853
+ if is_gemini_format:
825
854
  from .gemini_reader import GeminiReader
826
855
 
827
856
  supervisions = GeminiReader.extract_for_alignment(caption)
@@ -850,6 +879,8 @@ class Caption:
850
879
  supervisions = cls._parse_csv(caption, normalize_text)
851
880
  elif format == "aud" or str(caption)[-4:].lower() == ".aud":
852
881
  supervisions = cls._parse_aud(caption, normalize_text)
882
+ elif format == "sbv" or str(caption)[-4:].lower() == ".sbv":
883
+ supervisions = cls._parse_sbv(caption, normalize_text)
853
884
  elif format == "txt" or (format == "auto" and str(caption)[-4:].lower() == ".txt"):
854
885
  if not Path(str(caption)).exists(): # str
855
886
  lines = [line.strip() for line in str(caption).split("\n")]
@@ -1113,6 +1144,101 @@ class Caption:
1113
1144
 
1114
1145
  return supervisions
1115
1146
 
1147
+ @classmethod
1148
+ def _parse_sbv(cls, caption: Pathlike, normalize_text: Optional[bool] = False) -> List[Supervision]:
1149
+ """
1150
+ Parse SubViewer (SBV) format caption file.
1151
+
1152
+ Format:
1153
+ 0:00:00.000,0:00:02.000
1154
+ Text line 1
1155
+
1156
+ 0:00:02.000,0:00:04.000
1157
+ Text line 2
1158
+
1159
+ Args:
1160
+ caption: Caption file path
1161
+ normalize_text: Whether to normalize text
1162
+
1163
+ Returns:
1164
+ List of Supervision objects
1165
+ """
1166
+ caption_path = Path(str(caption))
1167
+ if not caption_path.exists():
1168
+ raise FileNotFoundError(f"Caption file not found: {caption}")
1169
+
1170
+ supervisions = []
1171
+
1172
+ with open(caption_path, "r", encoding="utf-8") as f:
1173
+ content = f.read()
1174
+
1175
+ # Split by double newlines to separate entries
1176
+ entries = content.strip().split("\n\n")
1177
+
1178
+ for entry in entries:
1179
+ lines = entry.strip().split("\n")
1180
+ if len(lines) < 2:
1181
+ continue
1182
+
1183
+ # First line: timestamp (H:MM:SS.mmm,H:MM:SS.mmm)
1184
+ timestamp_line = lines[0].strip()
1185
+ # Remaining lines: text
1186
+ text_lines = lines[1:]
1187
+
1188
+ try:
1189
+ # Parse timestamp: 0:00:00.000,0:00:02.000
1190
+ if "," not in timestamp_line:
1191
+ continue
1192
+
1193
+ start_str, end_str = timestamp_line.split(",", 1)
1194
+
1195
+ # Parse start time
1196
+ start_parts = start_str.strip().split(":")
1197
+ if len(start_parts) == 3:
1198
+ h, m, s = start_parts
1199
+ s_parts = s.split(".")
1200
+ start = int(h) * 3600 + int(m) * 60 + int(s_parts[0])
1201
+ if len(s_parts) > 1:
1202
+ start += int(s_parts[1]) / 1000.0
1203
+ else:
1204
+ continue
1205
+
1206
+ # Parse end time
1207
+ end_parts = end_str.strip().split(":")
1208
+ if len(end_parts) == 3:
1209
+ h, m, s = end_parts
1210
+ s_parts = s.split(".")
1211
+ end = int(h) * 3600 + int(m) * 60 + int(s_parts[0])
1212
+ if len(s_parts) > 1:
1213
+ end += int(s_parts[1]) / 1000.0
1214
+ else:
1215
+ continue
1216
+
1217
+ # Parse text and speaker
1218
+ text = " ".join(text_lines).strip()
1219
+ speaker, text = parse_speaker_text(text)
1220
+
1221
+ if normalize_text:
1222
+ text = normalize_text_fn(text)
1223
+
1224
+ duration = end - start
1225
+ if duration < 0:
1226
+ continue
1227
+
1228
+ supervisions.append(
1229
+ Supervision(
1230
+ text=text,
1231
+ start=start,
1232
+ duration=duration,
1233
+ speaker=speaker,
1234
+ )
1235
+ )
1236
+ except (ValueError, IndexError):
1237
+ # Skip malformed entries
1238
+ continue
1239
+
1240
+ return supervisions
1241
+
1116
1242
  @classmethod
1117
1243
  def _write_tsv(
1118
1244
  cls,
@@ -1136,7 +1262,11 @@ class Caption:
1136
1262
  if include_speaker_in_text:
1137
1263
  file.write("speaker\tstart\tend\ttext\n")
1138
1264
  for supervision in alignments:
1139
- speaker = supervision.speaker or ""
1265
+ # Respect `original_speaker` custom flag: default to True when missing
1266
+ include_speaker = supervision.speaker and (
1267
+ not supervision.has_custom("original_speaker") or supervision.custom["original_speaker"]
1268
+ )
1269
+ speaker = supervision.speaker if include_speaker else ""
1140
1270
  start_ms = round(1000 * supervision.start)
1141
1271
  end_ms = round(1000 * supervision.end)
1142
1272
  text = supervision.text.strip().replace("\t", " ")
@@ -1174,7 +1304,10 @@ class Caption:
1174
1304
  writer = csv.writer(file)
1175
1305
  writer.writerow(["speaker", "start", "end", "text"])
1176
1306
  for supervision in alignments:
1177
- speaker = supervision.speaker or ""
1307
+ include_speaker = supervision.speaker and (
1308
+ not supervision.has_custom("original_speaker") or supervision.custom["original_speaker"]
1309
+ )
1310
+ speaker = supervision.speaker if include_speaker else ""
1178
1311
  start_ms = round(1000 * supervision.start)
1179
1312
  end_ms = round(1000 * supervision.end)
1180
1313
  text = supervision.text.strip()
@@ -1212,11 +1345,72 @@ class Caption:
1212
1345
  end = supervision.end
1213
1346
  text = supervision.text.strip().replace("\t", " ")
1214
1347
 
1215
- if include_speaker_in_text and supervision.speaker:
1348
+ # Respect `original_speaker` custom flag when adding speaker prefix
1349
+ if (
1350
+ include_speaker_in_text
1351
+ and supervision.speaker
1352
+ and (not supervision.has_custom("original_speaker") or supervision.custom["original_speaker"])
1353
+ ):
1216
1354
  text = f"[[{supervision.speaker}]]{text}"
1217
1355
 
1218
1356
  file.write(f"{start}\t{end}\t{text}\n")
1219
1357
 
1358
+ @classmethod
1359
+ def _write_sbv(
1360
+ cls,
1361
+ alignments: List[Supervision],
1362
+ output_path: Pathlike,
1363
+ include_speaker_in_text: bool = True,
1364
+ ) -> None:
1365
+ """
1366
+ Write caption to SubViewer (SBV) format.
1367
+
1368
+ Format:
1369
+ 0:00:00.000,0:00:02.000
1370
+ Text line 1
1371
+
1372
+ 0:00:02.000,0:00:04.000
1373
+ Text line 2
1374
+
1375
+ Args:
1376
+ alignments: List of supervision segments to write
1377
+ output_path: Path to output SBV file
1378
+ include_speaker_in_text: Whether to include speaker in text
1379
+ """
1380
+ with open(output_path, "w", encoding="utf-8") as file:
1381
+ for i, supervision in enumerate(alignments):
1382
+ # Format timestamps as H:MM:SS.mmm
1383
+ start_h = int(supervision.start // 3600)
1384
+ start_m = int((supervision.start % 3600) // 60)
1385
+ start_s = int(supervision.start % 60)
1386
+ start_ms = int((supervision.start % 1) * 1000)
1387
+
1388
+ end_h = int(supervision.end // 3600)
1389
+ end_m = int((supervision.end % 3600) // 60)
1390
+ end_s = int(supervision.end % 60)
1391
+ end_ms = int((supervision.end % 1) * 1000)
1392
+
1393
+ start_time = f"{start_h}:{start_m:02d}:{start_s:02d}.{start_ms:03d}"
1394
+ end_time = f"{end_h}:{end_m:02d}:{end_s:02d}.{end_ms:03d}"
1395
+
1396
+ # Write timestamp line
1397
+ file.write(f"{start_time},{end_time}\n")
1398
+
1399
+ # Write text (with optional speaker). Respect `original_speaker` custom flag.
1400
+ text = supervision.text.strip()
1401
+ if (
1402
+ include_speaker_in_text
1403
+ and supervision.speaker
1404
+ and (not supervision.has_custom("original_speaker") or supervision.custom["original_speaker"])
1405
+ ):
1406
+ text = f"{supervision.speaker}: {text}"
1407
+
1408
+ file.write(f"{text}\n")
1409
+
1410
+ # Add blank line between entries (except after last one)
1411
+ if i < len(alignments) - 1:
1412
+ file.write("\n")
1413
+
1220
1414
  @classmethod
1221
1415
  def _parse_caption(
1222
1416
  cls, caption: Pathlike, format: Optional[OutputCaptionFormat], normalize_text: Optional[bool] = False
lattifai/cli/__init__.py CHANGED
@@ -5,12 +5,14 @@ import nemo_run as run # noqa: F401
5
5
  # Import and re-export entrypoints at package level so NeMo Run can find them
6
6
  from lattifai.cli.alignment import align
7
7
  from lattifai.cli.caption import convert
8
+ from lattifai.cli.diarization import diarize
8
9
  from lattifai.cli.transcribe import transcribe, transcribe_align
9
10
  from lattifai.cli.youtube import youtube
10
11
 
11
12
  __all__ = [
12
13
  "align",
13
14
  "convert",
15
+ "diarize",
14
16
  "transcribe",
15
17
  "transcribe_align",
16
18
  "youtube",
lattifai/cli/alignment.py CHANGED
@@ -81,7 +81,7 @@ def align(
81
81
  caption.word_level=true \\
82
82
  caption.normalize_text=true \\
83
83
  alignment.device=mps \\
84
- alignment.model_name=Lattifai/Lattice-1-Alpha
84
+ alignment.model_name=LattifAI/Lattice-1-Alpha
85
85
  """
86
86
  media_config = media or MediaConfig()
87
87
 
@@ -142,6 +142,7 @@ def align(
142
142
  output_caption_path=caption_config.output_path,
143
143
  split_sentence=caption_config.split_sentence,
144
144
  channel_selector=media_config.channel_selector,
145
+ streaming_chunk_secs=media_config.streaming_chunk_secs,
145
146
  )
146
147
 
147
148
 
@@ -5,6 +5,8 @@ import subprocess
5
5
  import sys
6
6
  from pathlib import Path
7
7
 
8
+ from lattifai.utils import safe_print
9
+
8
10
 
9
11
  def check_command_exists(cmd: str) -> bool:
10
12
  """Check if a command exists in PATH."""
@@ -19,17 +21,17 @@ def install_nodejs():
19
21
  """Install Node.js based on the operating system."""
20
22
  system = platform.system().lower()
21
23
 
22
- print("📦 Node.js not found. Installing Node.js...\n")
24
+ safe_print("📦 Node.js not found. Installing Node.js...\n")
23
25
 
24
26
  try:
25
27
  if system == "darwin": # macOS
26
28
  # Check if Homebrew is installed
27
29
  if check_command_exists("brew"):
28
- print("🍺 Using Homebrew to install Node.js...")
30
+ safe_print("🍺 Using Homebrew to install Node.js...")
29
31
  subprocess.run(["brew", "install", "node"], check=True)
30
- print("✓ Node.js installed via Homebrew\n")
32
+ safe_print("✓ Node.js installed via Homebrew\n")
31
33
  else:
32
- print("❌ Homebrew not found.")
34
+ safe_print("❌ Homebrew not found.")
33
35
  print(" Please install Homebrew first:")
34
36
  print(
35
37
  ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"'
@@ -40,46 +42,46 @@ def install_nodejs():
40
42
  elif system == "linux":
41
43
  # Try common package managers
42
44
  if check_command_exists("apt"):
43
- print("🐧 Using apt to install Node.js...")
45
+ safe_print("🐧 Using apt to install Node.js...")
44
46
  subprocess.run(["sudo", "apt", "update"], check=True)
45
47
  subprocess.run(["sudo", "apt", "install", "-y", "nodejs", "npm"], check=True)
46
- print("✓ Node.js installed via apt\n")
48
+ safe_print("✓ Node.js installed via apt\n")
47
49
  elif check_command_exists("yum"):
48
- print("🐧 Using yum to install Node.js...")
50
+ safe_print("🐧 Using yum to install Node.js...")
49
51
  subprocess.run(["sudo", "yum", "install", "-y", "nodejs", "npm"], check=True)
50
- print("✓ Node.js installed via yum\n")
52
+ safe_print("✓ Node.js installed via yum\n")
51
53
  elif check_command_exists("dnf"):
52
- print("🐧 Using dnf to install Node.js...")
54
+ safe_print("🐧 Using dnf to install Node.js...")
53
55
  subprocess.run(["sudo", "dnf", "install", "-y", "nodejs", "npm"], check=True)
54
- print("✓ Node.js installed via dnf\n")
56
+ safe_print("✓ Node.js installed via dnf\n")
55
57
  elif check_command_exists("pacman"):
56
- print("🐧 Using pacman to install Node.js...")
58
+ safe_print("🐧 Using pacman to install Node.js...")
57
59
  subprocess.run(["sudo", "pacman", "-S", "--noconfirm", "nodejs", "npm"], check=True)
58
- print("✓ Node.js installed via pacman\n")
60
+ safe_print("✓ Node.js installed via pacman\n")
59
61
  else:
60
- print("❌ No supported package manager found (apt/yum/dnf/pacman).")
62
+ safe_print("❌ No supported package manager found (apt/yum/dnf/pacman).")
61
63
  print(" Please install Node.js manually from: https://nodejs.org/")
62
64
  sys.exit(1)
63
65
 
64
66
  elif system == "windows":
65
- print("❌ Automatic installation on Windows is not supported.")
67
+ safe_print("❌ Automatic installation on Windows is not supported.")
66
68
  print(" Please download and install Node.js from: https://nodejs.org/")
67
69
  print(" Then run this command again.")
68
70
  sys.exit(1)
69
71
 
70
72
  else:
71
- print(f"❌ Unsupported operating system: {system}")
73
+ safe_print(f"❌ Unsupported operating system: {system}")
72
74
  print(" Please install Node.js manually from: https://nodejs.org/")
73
75
  sys.exit(1)
74
76
 
75
77
  # Verify installation
76
78
  if not check_command_exists("npm"):
77
- print("❌ Node.js installation verification failed.")
79
+ safe_print("❌ Node.js installation verification failed.")
78
80
  print(" Please restart your terminal and try again.")
79
81
  sys.exit(1)
80
82
 
81
83
  except subprocess.CalledProcessError as e:
82
- print(f"\n❌ Error during Node.js installation: {e}")
84
+ safe_print(f"\n❌ Error during Node.js installation: {e}")
83
85
  print(" Please install Node.js manually from: https://nodejs.org/")
84
86
  sys.exit(1)
85
87
 
@@ -90,49 +92,49 @@ def main():
90
92
  app_dir = Path(__file__).parent.parent.parent.parent / "app"
91
93
 
92
94
  if not app_dir.exists():
93
- print(f"❌ Error: app directory not found at {app_dir}")
95
+ safe_print(f"❌ Error: app directory not found at {app_dir}")
94
96
  print(" Make sure you're in the lattifai-python repository.")
95
97
  sys.exit(1)
96
98
 
97
- print("🚀 Installing lai-app (LattifAI Web Application)...\n")
99
+ safe_print("🚀 Installing lai-app (LattifAI Web Application)...\n")
98
100
 
99
101
  # Check if npm is installed, if not, install Node.js
100
102
  if not check_command_exists("npm"):
101
103
  install_nodejs()
102
104
  else:
103
105
  npm_version = subprocess.run(["npm", "--version"], capture_output=True, text=True, check=True).stdout.strip()
104
- print(f"✓ npm is already installed (v{npm_version})\n")
106
+ safe_print(f"✓ npm is already installed (v{npm_version})\n")
105
107
 
106
108
  # Change to app directory and run installation
107
109
  try:
108
- print(f"📁 Working directory: {app_dir}\n")
110
+ safe_print(f"📁 Working directory: {app_dir}\n")
109
111
 
110
112
  # Install dependencies
111
- print("📦 Installing dependencies...")
113
+ safe_print("📦 Installing dependencies...")
112
114
  subprocess.run(["npm", "install"], cwd=app_dir, check=True)
113
- print("✓ Dependencies installed\n")
115
+ safe_print("✓ Dependencies installed\n")
114
116
 
115
117
  # Build the application
116
- print("🔨 Building application...")
118
+ safe_print("🔨 Building application...")
117
119
  subprocess.run(["npm", "run", "build"], cwd=app_dir, check=True)
118
- print("✓ Application built\n")
120
+ safe_print("✓ Application built\n")
119
121
 
120
122
  # Link globally
121
- print("🔗 Linking lai-app command globally...")
123
+ safe_print("🔗 Linking lai-app command globally...")
122
124
  subprocess.run(["npm", "link"], cwd=app_dir, check=True)
123
- print("✓ lai-app command linked globally\n")
125
+ safe_print("✓ lai-app command linked globally\n")
124
126
 
125
- print("=" * 60)
126
- print("✅ lai-app installed successfully!")
127
- print("=" * 60)
128
- print("\n🎉 You can now run:")
127
+ safe_print("=" * 60)
128
+ safe_print("✅ lai-app installed successfully!")
129
+ safe_print("=" * 60)
130
+ safe_print("\n🎉 You can now run:")
129
131
  print(" lai-app # Start the web application")
130
132
  print(" lai-app --help # Show help")
131
133
  print(" lai-app --port 8080 # Use custom port")
132
- print("\n📖 For more information, see app/CLI_USAGE.md\n")
134
+ safe_print("\n📖 For more information, see app/CLI_USAGE.md\n")
133
135
 
134
136
  except subprocess.CalledProcessError as e:
135
- print(f"\n❌ Error during installation: {e}")
137
+ safe_print(f"\n❌ Error during installation: {e}")
136
138
  sys.exit(1)
137
139
 
138
140
 
lattifai/cli/caption.py CHANGED
@@ -7,13 +7,14 @@ from lhotse.utils import Pathlike
7
7
  from typing_extensions import Annotated
8
8
 
9
9
  from lattifai.config import CaptionConfig
10
+ from lattifai.utils import safe_print
10
11
 
11
12
 
12
13
  @run.cli.entrypoint(name="convert", namespace="caption")
13
14
  def convert(
14
15
  input_path: Pathlike,
15
16
  output_path: Pathlike,
16
- include_speaker_in_text: bool = True,
17
+ include_speaker_in_text: bool = False,
17
18
  normalize_text: bool = False,
18
19
  ):
19
20
  """
@@ -55,7 +56,7 @@ def convert(
55
56
  caption = Caption.read(input_path, normalize_text=normalize_text)
56
57
  caption.write(output_path, include_speaker_in_text=include_speaker_in_text)
57
58
 
58
- print(f"✅ Converted {input_path} -> {output_path}")
59
+ safe_print(f"✅ Converted {input_path} -> {output_path}")
59
60
  return output_path
60
61
 
61
62
 
@@ -63,7 +64,6 @@ def convert(
63
64
  def normalize(
64
65
  input_path: Pathlike,
65
66
  output_path: Pathlike,
66
- caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
67
67
  ):
68
68
  """
69
69
  Normalize caption text by cleaning HTML entities and whitespace.
@@ -81,9 +81,6 @@ def normalize(
81
81
  Args:
82
82
  input_path: Path to input caption file to normalize
83
83
  output_path: Path to output caption file (defaults to overwriting input file)
84
- caption: Caption configuration for text normalization.
85
- Fields: input_format, output_format, normalize_text (automatically enabled),
86
- encoding
87
84
 
88
85
  Examples:
89
86
  # Normalize and save to new file (positional arguments)
@@ -92,13 +89,9 @@ def normalize(
92
89
  # Normalize with format conversion
93
90
  lai caption normalize input.vtt output.srt
94
91
 
95
- # Normalize with custom caption config
96
- lai caption normalize input.srt output.srt \\
97
- caption.encoding=utf-8
98
-
99
92
  # Using keyword arguments (traditional syntax)
100
- lai caption normalize \\
101
- input_path=input.srt \\
93
+ lai caption normalize \
94
+ input_path=input.srt \
102
95
  output_path=output.srt
103
96
  """
104
97
  from pathlib import Path
@@ -112,9 +105,9 @@ def normalize(
112
105
  caption_obj.write(output_path, include_speaker_in_text=True)
113
106
 
114
107
  if output_path == input_path:
115
- print(f"✅ Normalized {input_path} (in-place)")
108
+ safe_print(f"✅ Normalized {input_path} (in-place)")
116
109
  else:
117
- print(f"✅ Normalized {input_path} -> {output_path}")
110
+ safe_print(f"✅ Normalized {input_path} -> {output_path}")
118
111
 
119
112
  return output_path
120
113
 
@@ -124,7 +117,6 @@ def shift(
124
117
  input_path: Pathlike,
125
118
  output_path: Pathlike,
126
119
  seconds: float,
127
- caption: Annotated[Optional[CaptionConfig], run.Config[CaptionConfig]] = None,
128
120
  ):
129
121
  """
130
122
  Shift caption timestamps by a specified number of seconds.
@@ -140,8 +132,6 @@ def shift(
140
132
  output_path: Path to output caption file (can be same as input for in-place modification)
141
133
  seconds: Number of seconds to shift timestamps. Positive values delay captions,
142
134
  negative values advance them earlier.
143
- caption: Caption configuration for reading/writing.
144
- Fields: input_format, output_format, encoding
145
135
 
146
136
  Examples:
147
137
  # Delay captions by 2 seconds (positional arguments)
@@ -181,9 +171,9 @@ def shift(
181
171
  direction = f"advanced by {abs(seconds)}s"
182
172
 
183
173
  if output_path == input_path:
184
- print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
174
+ safe_print(f"✅ Shifted timestamps {direction} in {input_path} (in-place)")
185
175
  else:
186
- print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
176
+ safe_print(f"✅ Shifted timestamps {direction}: {input_path} -> {output_path}")
187
177
 
188
178
  return output_path
189
179