omni-captions-skills 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/.github/workflows/test.yml +4 -2
  2. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/.gitignore +1 -0
  3. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/PKG-INFO +14 -3
  4. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/README.md +9 -0
  5. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/README_zh.md +9 -0
  6. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/pyproject.toml +6 -3
  7. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/skills/omnicaptions-LaiCut/SKILL.md +2 -5
  8. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/skills/omnicaptions-convert/SKILL.md +1 -2
  9. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/skills/omnicaptions-download/SKILL.md +4 -2
  10. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/skills/omnicaptions-transcribe/SKILL.md +1 -2
  11. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/skills/omnicaptions-translate/SKILL.md +1 -2
  12. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/src/omnicaptions/caption.py +43 -3
  13. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/src/omnicaptions/cli.py +52 -5
  14. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/.claude-plugin/marketplace.json +0 -0
  15. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/.claude-plugin/plugin.json +0 -0
  16. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/.github/workflows/publish.yml +0 -0
  17. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/.github/workflows/skills-install.yml +0 -0
  18. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/CLAUDE.md +0 -0
  19. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/LICENSE +0 -0
  20. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/build.sh +0 -0
  21. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/packages/lattifai-1.2.2.tar.gz +0 -0
  22. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/packages/lattifai_captions-0.1.0.tar.gz +0 -0
  23. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/packages/lattifai_core-0.6.1.tar.gz +0 -0
  24. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/packages/omnicaptions-0.1.0.tar.gz +0 -0
  25. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/src/omnicaptions/__init__.py +0 -0
  26. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/src/omnicaptions/__main__.py +0 -0
  27. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/src/omnicaptions/config.py +0 -0
  28. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/src/omnicaptions/prompts/transcription_dotey.md +0 -0
  29. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/tests/data/SA1.mp3 +0 -0
  30. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/tests/data/SA1.vtt +0 -0
  31. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/tests/data/karaoke_test.json +0 -0
  32. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/tests/test_cli_convert.py +0 -0
  33. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/tests/test_cli_entrypoint.py +0 -0
  34. {omni_captions_skills-0.1.0 → omni_captions_skills-0.1.2}/tests/test_import.py +0 -0
@@ -33,14 +33,16 @@ jobs:
33
33
  - name: Install dependencies
34
34
  run: |
35
35
  pip install --upgrade pip
36
- pip install -e ".[all]"
36
+ pip install -e ".[all]" --extra-index-url https://lattifai.github.io/pypi/simple
37
37
 
38
38
  - name: Test CLI entry point
39
+ env:
40
+ LATTIFAI_API_KEY: ${{ secrets.LATTIFAI_API_KEY }}
39
41
  run: |
40
42
  python -m omnicaptions --help
41
43
  omnicaptions --help
42
44
  # test LaiCut
43
- lai alignment align tests/data/SA1.mp3 tests/data/SA1.vtt tests/data/SA1_LaiCut.vtt
45
+ lai alignment align -Y tests/data/SA1.mp3 tests/data/SA1.vtt tests/data/SA1_LaiCut.vtt
44
46
 
45
47
  - name: Run tests
46
48
  run: pytest tests/ -v
@@ -210,3 +210,4 @@ __marimo__/
210
210
  .worktrees/
211
211
 
212
212
  .DS_Store
213
+ tests/data/SA1_LaiCut.vtt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omni-captions-skills
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: AI-powered media transcription with Claude Code skills
5
5
  Author: LattifAI
6
6
  License-Expression: MIT
@@ -20,12 +20,14 @@ Requires-Dist: google-genai>=1.0.0
20
20
  Requires-Dist: lattifai-captions>=0.1.6
21
21
  Requires-Dist: yt-dlp>=2025.12.08
22
22
  Provides-Extra: all
23
- Requires-Dist: omnicaptions[dev,laicut]; extra == 'all'
23
+ Requires-Dist: lattifai[diarization]>=1.3.4; extra == 'all'
24
+ Requires-Dist: pytest>=8.0.0; extra == 'all'
25
+ Requires-Dist: ruff>=0.4.0; extra == 'all'
24
26
  Provides-Extra: dev
25
27
  Requires-Dist: pytest>=8.0.0; extra == 'dev'
26
28
  Requires-Dist: ruff>=0.4.0; extra == 'dev'
27
29
  Provides-Extra: laicut
28
- Requires-Dist: lattifai[diarization]>=1.3.0; extra == 'laicut'
30
+ Requires-Dist: lattifai[diarization]>=1.3.4; extra == 'laicut'
29
31
  Description-Content-Type: text/markdown
30
32
 
31
33
  # omni-captions-skills
@@ -94,6 +96,15 @@ Vibe coding 就是不用自己写代码的编程方式。
94
96
 
95
97
  Standard transcription gives "approximate" timestamps. LaiCut uses [LattifAI](https://lattifai.com/) Lattice-1 model to match text precisely to audio waveforms, achieving **word-level accuracy**.
96
98
 
99
+ **Install LaiCut:**
100
+ ```bash
101
+ # Using uv (recommended, auto-configures package index)
102
+ uv pip install "omni-captions-skills[laicut]" --extra-index-url https://lattifai.github.io/pypi/simple/
103
+
104
+ # Using pip
105
+ pip install "omni-captions-skills[laicut]" --extra-index-url https://lattifai.github.io/pypi/simple/
106
+ ```
107
+
97
108
  **Supported languages:** English, Chinese, German, and mixed
98
109
 
99
110
  **Recommended workflow:** Align before translate (translated text doesn't match original audio)
@@ -64,6 +64,15 @@ Vibe coding 就是不用自己写代码的编程方式。
64
64
 
65
65
  Standard transcription gives "approximate" timestamps. LaiCut uses [LattifAI](https://lattifai.com/) Lattice-1 model to match text precisely to audio waveforms, achieving **word-level accuracy**.
66
66
 
67
+ **Install LaiCut:**
68
+ ```bash
69
+ # Using uv (recommended, auto-configures package index)
70
+ uv pip install "omni-captions-skills[laicut]" --extra-index-url https://lattifai.github.io/pypi/simple/
71
+
72
+ # Using pip
73
+ pip install "omni-captions-skills[laicut]" --extra-index-url https://lattifai.github.io/pypi/simple/
74
+ ```
75
+
67
76
  **Supported languages:** English, Chinese, German, and mixed
68
77
 
69
78
  **Recommended workflow:** Align before translate (translated text doesn't match original audio)
@@ -64,6 +64,15 @@ Vibe coding 就是不用自己写代码的编程方式。
64
64
 
65
65
  普通转录的时间戳只是"大概",LaiCut 使用 [LattifAI](https://lattifai.com/) Lattice-1 模型将文本与音频波形精确匹配,实现**词级精度**。
66
66
 
67
+ **安装 LaiCut:**
68
+ ```bash
69
+ # 使用 uv(推荐,自动配置包索引)
70
+ uv pip install "omni-captions-skills[laicut]"
71
+
72
+ # 使用 pip
73
+ pip install "omni-captions-skills[laicut]" --extra-index-url https://lattifai.github.io/pypi/simple/
74
+ ```
75
+
67
76
  **支持语言:** 英语、中文、德语及混合
68
77
 
69
78
  **推荐工作流:** 先对齐再翻译(翻译文本与原始音频不匹配,无法对齐)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "omni-captions-skills"
7
- version = "0.1.0"
7
+ version = "0.1.2"
8
8
  description = "AI-powered media transcription with Claude Code skills"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -37,10 +37,10 @@ dev = [
37
37
  "ruff>=0.4.0",
38
38
  ]
39
39
  laicut = [
40
- "lattifai[diarization]>=1.3.0",
40
+ "lattifai[diarization]>=1.3.4",
41
41
  ]
42
42
  all = [
43
- "omnicaptions[dev,laicut]",
43
+ "omni-captions-skills[dev,laicut]",
44
44
  ]
45
45
 
46
46
  [project.scripts]
@@ -60,3 +60,6 @@ target-version = "py310"
60
60
  [tool.ruff.lint]
61
61
  select = ["E", "F", "I"]
62
62
  ignore = ["E501"]
63
+
64
+ [tool.uv]
65
+ extra-index-url = ["https://lattifai.github.io/pypi/simple/"]
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: omnicaptions-LaiCut
3
3
  description: Use when user needs accurate/precise caption timing, or aligning captions with audio/video using forced alignment. Corrects caption timing to match actual speech. Uses LattifAI Lattice-1 model.
4
- allowed-tools: Bash(omnicaptions:*)
4
+ allowed-tools: Read, Bash(omnicaptions:*), Bash(lai:*)
5
5
  ---
6
6
 
7
7
  # LaiCut
@@ -27,10 +27,7 @@ LattifAI's audio-text processing toolkit. Currently supports forced alignment, w
27
27
  ## Setup
28
28
 
29
29
  ```bash
30
- pip install "lattifai-captions[splitting] @ https://github.com/lattifai/omni-captions-skills/raw/main/packages/lattifai_captions-0.1.0.tar.gz"
31
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/omnicaptions-0.1.0.tar.gz
32
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/lattifai_core-0.6.1.tar.gz
33
- pip install "lattifai[alignment] @ https://github.com/lattifai/omni-captions-skills/raw/main/packages/lattifai-1.2.2.tar.gz"
30
+ pip install "omni-captions-skills[laicut]" --extra-index-url https://lattifai.github.io/pypi/simple/
34
31
  ```
35
32
 
36
33
  ## API Key
@@ -35,8 +35,7 @@ omnicaptions convert transcript.md -o output.vtt
35
35
  ## Setup
36
36
 
37
37
  ```bash
38
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/lattifai_captions-0.1.0.tar.gz
39
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/omnicaptions-0.1.0.tar.gz
38
+ pip install omni-captions-skills
40
39
  ```
41
40
 
42
41
  ## Quick Reference
@@ -43,8 +43,7 @@ Only proceed with the download command after user confirms.
43
43
  ## Setup
44
44
 
45
45
  ```bash
46
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/lattifai_captions-0.1.0.tar.gz
47
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/omnicaptions-0.1.0.tar.gz
46
+ pip install omni-captions-skills
48
47
  ```
49
48
 
50
49
  ## CLI Usage
@@ -55,6 +54,9 @@ pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/o
55
54
  # Download audio only (default, saves to current directory)
56
55
  omnicaptions download "https://www.youtube.com/watch?v=VIDEO_ID"
57
56
 
57
+ # Supports bare YouTube video ID (auto-validates via yt-dlp)
58
+ omnicaptions download e882eXLtwkI
59
+
58
60
  # Download video (1080p recommended)
59
61
  omnicaptions download "https://youtube.com/watch?v=VIDEO_ID" -q 1080p
60
62
 
@@ -60,8 +60,7 @@ omnicaptions transcribe video.mp4
60
60
  ## Setup
61
61
 
62
62
  ```bash
63
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/lattifai_captions-0.1.0.tar.gz
64
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/omnicaptions-0.1.0.tar.gz
63
+ pip install omni-captions-skills
65
64
  ```
66
65
 
67
66
  ## API Key
@@ -41,8 +41,7 @@ Output: `input_Gemini_zh.srt`
41
41
  ## Setup
42
42
 
43
43
  ```bash
44
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/lattifai_captions-0.1.0.tar.gz
45
- pip install https://github.com/lattifai/omni-captions-skills/raw/main/packages/omnicaptions-0.1.0.tar.gz
44
+ pip install omni-captions-skills
46
45
  ```
47
46
 
48
47
  ## API Key
@@ -50,6 +50,45 @@ VIDEO_PLATFORM_PATTERNS = [
50
50
  r"x\.com/.*/status",
51
51
  ]
52
52
 
53
+ # YouTube video ID pattern: 11 alphanumeric chars with hyphens and underscores
54
+ YOUTUBE_VIDEO_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{11}$")
55
+
56
+
57
+ def is_youtube_video_id(value: str) -> bool:
58
+ """Check if a string looks like a bare YouTube video ID."""
59
+ return bool(YOUTUBE_VIDEO_ID_RE.match(value))
60
+
61
+
62
+ def resolve_video_input(url_or_id: str) -> str:
63
+ """Resolve a video URL or bare YouTube video ID to a full URL.
64
+
65
+ If the input is a bare YouTube video ID, validates it via yt-dlp
66
+ extract_info and converts to a full URL. Otherwise returns as-is.
67
+
68
+ Raises:
69
+ ValueError: If the video ID does not exist or is unavailable.
70
+ """
71
+ if not is_youtube_video_id(url_or_id):
72
+ return url_or_id
73
+
74
+ import yt_dlp
75
+
76
+ full_url = f"https://www.youtube.com/watch?v={url_or_id}"
77
+ ydl_opts = {"quiet": True, "no_warnings": True, "skip_download": True}
78
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
79
+ try:
80
+ info = ydl.extract_info(full_url, download=False)
81
+ except yt_dlp.utils.DownloadError as e:
82
+ raise ValueError(f"YouTube video '{url_or_id}' not found or unavailable: {e}") from e
83
+ if not info:
84
+ raise ValueError(f"YouTube video '{url_or_id}' not found or unavailable")
85
+
86
+ logging.info(
87
+ "Resolved video ID '%s' → %s (title: %s)", url_or_id, full_url, info.get("title", "")
88
+ )
89
+ return full_url
90
+
91
+
53
92
  LANGUAGE_NAMES = {
54
93
  "zh": "Chinese (Simplified)",
55
94
  "zh-TW": "Chinese (Traditional)",
@@ -313,9 +352,9 @@ class GeminiCaption:
313
352
  video_path = None
314
353
  caption_path = None
315
354
 
316
- audio_exts = (".webm", ".m4a", ".mp3", ".opus", ".ogg", ".wav")
355
+ audio_exts = (".mp3", ".m4a", ".mp3", ".opus", ".ogg", ".wav")
317
356
  video_exts = (".mp4", ".mkv", ".avi", ".mov", ".flv")
318
- caption_exts = (".vtt", ".srt", ".ass")
357
+ caption_exts = (".vtt", ".srt", ".ass", ".srv3")
319
358
 
320
359
  for f in output_dir.iterdir():
321
360
  # Check if filename starts with video_id
@@ -341,10 +380,11 @@ class GeminiCaption:
341
380
  if caption_path:
342
381
  self.logger.info(f"Downloaded caption: {caption_path}")
343
382
 
344
- # Save metadata to .meta.json for later use (e.g., ASS font scaling)
383
+ # Save metadata to .meta.json for later use (e.g., ASS font scaling, video_url tracking)
345
384
  meta_path = output_dir / f"{video_id}.meta.json"
346
385
  meta_data = {
347
386
  "video_id": video_id,
387
+ "video_url": url,
348
388
  "title": title,
349
389
  "width": video_width,
350
390
  "height": video_height,
@@ -6,6 +6,13 @@ import subprocess
6
6
  import sys
7
7
  from pathlib import Path
8
8
 
9
+ try:
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+ except ImportError:
14
+ pass
15
+
9
16
  from lattifai.caption import Caption, GeminiReader
10
17
 
11
18
  from .caption import GeminiCaption, GeminiCaptionConfig
@@ -343,8 +350,12 @@ def ensure_api_key(api_key: str | None = None) -> bool:
343
350
 
344
351
 
345
352
  def is_url(path: str) -> bool:
346
- """Check if path is a URL."""
347
- return path.startswith(("http://", "https://", "youtube.com", "youtu.be"))
353
+ """Check if path is a URL or a bare YouTube video ID."""
354
+ from .caption import is_youtube_video_id
355
+
356
+ return path.startswith(
357
+ ("http://", "https://", "youtube.com", "youtu.be")
358
+ ) or is_youtube_video_id(path)
348
359
 
349
360
 
350
361
  def get_default_output_dir(input_path: str) -> Path:
@@ -363,8 +374,13 @@ def get_stem_from_input(input_path: str) -> str:
363
374
  """Extract stem (filename without extension) from input.
364
375
 
365
376
  For URLs, extract video ID or use 'output'.
377
+ For bare YouTube video IDs, return the ID directly.
366
378
  For files, use the file stem.
367
379
  """
380
+ from .caption import is_youtube_video_id
381
+
382
+ if is_youtube_video_id(input_path):
383
+ return input_path
368
384
  if is_url(input_path):
369
385
  # Try to extract YouTube video ID
370
386
  import re
@@ -461,6 +477,8 @@ def cmd_transcribe(args):
461
477
 
462
478
  def cmd_download(args):
463
479
  """Download audio/video and captions from video platforms."""
480
+ from .caption import resolve_video_input
481
+
464
482
  config = GeminiCaptionConfig(verbose=args.verbose)
465
483
  gc = GeminiCaption(config=config)
466
484
 
@@ -469,8 +487,15 @@ def cmd_download(args):
469
487
 
470
488
  quality = getattr(args, "quality", "audio")
471
489
 
490
+ # Resolve bare video ID to full URL (validates via yt-dlp)
472
491
  try:
473
- result = gc.download(args.url, output_dir, quality)
492
+ url = resolve_video_input(args.url)
493
+ except ValueError as e:
494
+ print(f"Error: {e}", file=sys.stderr)
495
+ sys.exit(1)
496
+
497
+ try:
498
+ result = gc.download(url, output_dir, quality)
474
499
  if result.video_path:
475
500
  print(f"Video: {result.video_path}")
476
501
  if result.audio_path:
@@ -691,6 +716,21 @@ def cmd_translate(args):
691
716
  sys.exit(1)
692
717
 
693
718
 
719
+ def _detect_metadata(audio_path: Path) -> dict | None:
720
+ """Detect metadata from .meta.json in the same directory."""
721
+ import json
722
+
723
+ # Try to find meta.json based on audio filename (e.g., e882eXLtwkI.m4a -> e882eXLtwkI.meta.json)
724
+ stem = audio_path.stem
725
+ meta_path = audio_path.parent / f"{stem}.meta.json"
726
+ if meta_path.exists():
727
+ try:
728
+ return json.loads(meta_path.read_text())
729
+ except Exception:
730
+ pass
731
+ return None
732
+
733
+
694
734
  def cmd_laicut_align(args):
695
735
  """Align audio with caption using LattifAI forced alignment."""
696
736
  # Get API key
@@ -709,6 +749,9 @@ def cmd_laicut_align(args):
709
749
  audio_path = Path(args.audio)
710
750
  caption_path = Path(args.caption)
711
751
 
752
+ # Auto-detect metadata from .meta.json
753
+ metadata = _detect_metadata(audio_path)
754
+
712
755
  if not audio_path.exists():
713
756
  print(f"Error: Audio file not found: {audio_path}", file=sys.stderr)
714
757
  sys.exit(1)
@@ -718,7 +761,7 @@ def cmd_laicut_align(args):
718
761
  sys.exit(1)
719
762
 
720
763
  from lattifai.client import LattifAI
721
- from lattifai.config import AlignmentConfig, CaptionConfig, ClientConfig
764
+ from lattifai.config import AlignmentConfig, CaptionConfig, ClientConfig, EventConfig
722
765
 
723
766
  # Determine output path and format
724
767
  if args.output:
@@ -746,12 +789,16 @@ def cmd_laicut_align(args):
746
789
  split_sentence=getattr(args, "split_sentence", False),
747
790
  word_level=word_level,
748
791
  ),
792
+ event_config=EventConfig(
793
+ enabled=True,
794
+ ),
749
795
  )
750
796
 
751
797
  client.alignment(
752
798
  input_media=str(audio_path),
753
799
  input_caption=str(caption_path),
754
800
  output_caption_path=str(output_path),
801
+ metadata=metadata,
755
802
  )
756
803
  print(f"LaiCut aligned: {output_path}")
757
804
  except Exception as e:
@@ -793,7 +840,7 @@ def main():
793
840
  p_download = subparsers.add_parser(
794
841
  "download", help="Download audio/video and captions from URL"
795
842
  )
796
- p_download.add_argument("url", help="Video URL (YouTube, Bilibili, etc.)")
843
+ p_download.add_argument("url", help="Video URL or YouTube video ID (e.g. e882eXLtwkI)")
797
844
  p_download.add_argument("-o", "--output", help="Output directory (default: current)")
798
845
  p_download.add_argument(
799
846
  "-q",