videonut 1.2.7 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +272 -272
  2. package/USER_GUIDE.md +90 -90
  3. package/agents/core/eic.md +771 -771
  4. package/agents/creative/director.md +246 -246
  5. package/agents/creative/scriptwriter.md +207 -207
  6. package/agents/research/investigator.md +394 -394
  7. package/agents/technical/archivist.md +288 -288
  8. package/agents/technical/scavenger.md +247 -247
  9. package/bin/videonut.js +37 -21
  10. package/config.yaml +61 -61
  11. package/docs/scriptwriter.md +42 -42
  12. package/file_validator.py +186 -186
  13. package/memory/short_term/asset_manifest.md +64 -64
  14. package/memory/short_term/investigation_dossier.md +31 -31
  15. package/memory/short_term/master_script.md +51 -51
  16. package/package.json +61 -64
  17. package/requirements.txt +8 -8
  18. package/setup.js +33 -15
  19. package/tools/check_env.py +76 -76
  20. package/tools/downloaders/caption_reader.py +237 -237
  21. package/tools/downloaders/clip_grabber.py +82 -82
  22. package/tools/downloaders/image_grabber.py +105 -105
  23. package/tools/downloaders/pdf_reader.py +163 -163
  24. package/tools/downloaders/screenshotter.py +58 -58
  25. package/tools/downloaders/web_reader.py +69 -69
  26. package/tools/validators/link_checker.py +45 -45
  27. package/workflow_orchestrator.py +336 -336
  28. package/.claude/commands/archivist.toml +0 -12
  29. package/.claude/commands/director.toml +0 -12
  30. package/.claude/commands/eic.toml +0 -12
  31. package/.claude/commands/investigator.toml +0 -12
  32. package/.claude/commands/prompt.toml +0 -12
  33. package/.claude/commands/scavenger.toml +0 -12
  34. package/.claude/commands/scout.toml +0 -12
  35. package/.claude/commands/scriptwriter.toml +0 -12
  36. package/.claude/commands/seo.toml +0 -12
  37. package/.claude/commands/thumbnail.toml +0 -12
  38. package/.claude/commands/topic_scout.toml +0 -12
  39. package/.gemini/commands/archivist.toml +0 -12
  40. package/.gemini/commands/director.toml +0 -12
  41. package/.gemini/commands/eic.toml +0 -12
  42. package/.gemini/commands/investigator.toml +0 -12
  43. package/.gemini/commands/prompt.toml +0 -12
  44. package/.gemini/commands/scavenger.toml +0 -12
  45. package/.gemini/commands/scout.toml +0 -12
  46. package/.gemini/commands/scriptwriter.toml +0 -12
  47. package/.gemini/commands/seo.toml +0 -12
  48. package/.gemini/commands/thumbnail.toml +0 -12
  49. package/.gemini/commands/topic_scout.toml +0 -12
  50. package/.qwen/commands/archivist.toml +0 -12
  51. package/.qwen/commands/director.toml +0 -12
  52. package/.qwen/commands/eic.toml +0 -12
  53. package/.qwen/commands/investigator.toml +0 -12
  54. package/.qwen/commands/prompt.toml +0 -12
  55. package/.qwen/commands/scavenger.toml +0 -12
  56. package/.qwen/commands/scout.toml +0 -12
  57. package/.qwen/commands/scriptwriter.toml +0 -12
  58. package/.qwen/commands/seo.toml +0 -12
  59. package/.qwen/commands/thumbnail.toml +0 -12
  60. package/.qwen/commands/topic_scout.toml +0 -12
@@ -1,77 +1,77 @@
1
- import shutil
2
- import sys
3
- import os
4
- import subprocess
5
-
6
- def check_command(cmd, name):
7
- path = shutil.which(cmd)
8
- if path:
9
- print(f"✅ {name} found at: {path}")
10
- return True
11
- else:
12
- print(f"❌ {name} NOT found in PATH.")
13
- return False
14
-
15
- def check_import(module_name):
16
- try:
17
- __import__(module_name)
18
- print(f"✅ Python module '{module_name}' is installed.")
19
- return True
20
- except ImportError:
21
- print(f"❌ Python module '{module_name}' is MISSING.")
22
- return False
23
-
24
- def main():
25
- print("🔍 VideoNut Environment Check...")
26
- print("-" * 30)
27
-
28
- all_good = True
29
-
30
- # 1. Check Python version
31
- if sys.version_info < (3, 8):
32
- print("❌ Python 3.8+ is required.")
33
- all_good = False
34
- else:
35
- print(f"✅ Python Version: {sys.version}")
36
-
37
- # 2. Check FFmpeg
38
- if not check_command("ffmpeg", "FFmpeg"):
39
- # Check local bin fallback
40
- local_bin = os.path.join(os.path.dirname(__file__), "bin", "ffmpeg.exe")
41
- if os.path.exists(local_bin):
42
- print(f"✅ FFmpeg found in local bin: {local_bin}")
43
- else:
44
- print(" (Please install FFmpeg or place it in tools/bin/)")
45
- all_good = False
46
-
47
- # 3. Check Python Packages
48
- if not check_import("yt_dlp"): all_good = False
49
- if not check_import("playwright"): all_good = False
50
- if not check_import("requests"): all_good = False
51
- if not check_import("bs4"): all_good = False
52
- if not check_import("youtube_transcript_api"): all_good = False
53
- if not check_import("pypdf"): all_good = False
54
-
55
- # 4. Check for new tools
56
- tools_dir = os.path.join(os.path.dirname(__file__), "downloaders")
57
- new_tools = [
58
- ("caption_reader.py", os.path.join(tools_dir, "caption_reader.py")),
59
- ]
60
-
61
- for tool_name, tool_path in new_tools:
62
- if os.path.exists(tool_path):
63
- print(f"✅ Tool found: {tool_name}")
64
- else:
65
- print(f"❌ Tool missing: {tool_name} at {tool_path}")
66
- all_good = False
67
-
68
- print("-" * 30)
69
- if all_good:
70
- print("🚀 System is READY for VideoNut Agents.")
71
- sys.exit(0)
72
- else:
73
- print("⚠️ System has ISSUES. Please fix missing dependencies.")
74
- sys.exit(1)
75
-
76
- if __name__ == "__main__":
1
+ import shutil
2
+ import sys
3
+ import os
4
+ import subprocess
5
+
6
+ def check_command(cmd, name):
7
+ path = shutil.which(cmd)
8
+ if path:
9
+ print(f"✅ {name} found at: {path}")
10
+ return True
11
+ else:
12
+ print(f"❌ {name} NOT found in PATH.")
13
+ return False
14
+
15
+ def check_import(module_name):
16
+ try:
17
+ __import__(module_name)
18
+ print(f"✅ Python module '{module_name}' is installed.")
19
+ return True
20
+ except ImportError:
21
+ print(f"❌ Python module '{module_name}' is MISSING.")
22
+ return False
23
+
24
+ def main():
25
+ print("🔍 VideoNut Environment Check...")
26
+ print("-" * 30)
27
+
28
+ all_good = True
29
+
30
+ # 1. Check Python version
31
+ if sys.version_info < (3, 8):
32
+ print("❌ Python 3.8+ is required.")
33
+ all_good = False
34
+ else:
35
+ print(f"✅ Python Version: {sys.version}")
36
+
37
+ # 2. Check FFmpeg
38
+ if not check_command("ffmpeg", "FFmpeg"):
39
+ # Check local bin fallback
40
+ local_bin = os.path.join(os.path.dirname(__file__), "bin", "ffmpeg.exe")
41
+ if os.path.exists(local_bin):
42
+ print(f"✅ FFmpeg found in local bin: {local_bin}")
43
+ else:
44
+ print(" (Please install FFmpeg or place it in tools/bin/)")
45
+ all_good = False
46
+
47
+ # 3. Check Python Packages
48
+ if not check_import("yt_dlp"): all_good = False
49
+ if not check_import("playwright"): all_good = False
50
+ if not check_import("requests"): all_good = False
51
+ if not check_import("bs4"): all_good = False
52
+ if not check_import("youtube_transcript_api"): all_good = False
53
+ if not check_import("pypdf"): all_good = False
54
+
55
+ # 4. Check for new tools
56
+ tools_dir = os.path.join(os.path.dirname(__file__), "downloaders")
57
+ new_tools = [
58
+ ("caption_reader.py", os.path.join(tools_dir, "caption_reader.py")),
59
+ ]
60
+
61
+ for tool_name, tool_path in new_tools:
62
+ if os.path.exists(tool_path):
63
+ print(f"✅ Tool found: {tool_name}")
64
+ else:
65
+ print(f"❌ Tool missing: {tool_name} at {tool_path}")
66
+ all_good = False
67
+
68
+ print("-" * 30)
69
+ if all_good:
70
+ print("🚀 System is READY for VideoNut Agents.")
71
+ sys.exit(0)
72
+ else:
73
+ print("⚠️ System has ISSUES. Please fix missing dependencies.")
74
+ sys.exit(1)
75
+
76
+ if __name__ == "__main__":
77
77
  main()
@@ -1,238 +1,238 @@
1
- #!/usr/bin/env python3
2
- """
3
- YouTube Caption/Transcript Reader for VideoNut
4
- Extracts captions from YouTube videos with optional timestamp display.
5
- """
6
-
7
- import sys
8
- import argparse
9
- import json
10
- from youtube_transcript_api import YouTubeTranscriptApi
11
- from youtube_transcript_api.formatters import TextFormatter, JSONFormatter
12
- import re
13
-
14
-
15
- def extract_video_id(url):
16
- """
17
- Extract YouTube video ID from various URL formats
18
- """
19
- # Patterns for different YouTube URL formats
20
- patterns = [
21
- r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11,12})',
22
- r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([a-zA-Z0-9_-]{11,12})',
23
- r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([a-zA-Z0-9_-]{11,12})',
24
- r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([a-zA-Z0-9_-]{11,12})',
25
- ]
26
-
27
- for pattern in patterns:
28
- match = re.search(pattern, url)
29
- if match:
30
- return match.group(1)
31
-
32
- return None
33
-
34
-
35
- def format_timestamp(seconds):
36
- """Convert seconds to HH:MM:SS format"""
37
- hours = int(seconds // 3600)
38
- minutes = int((seconds % 3600) // 60)
39
- secs = int(seconds % 60)
40
-
41
- if hours > 0:
42
- return f"{hours:02d}:{minutes:02d}:{secs:02d}"
43
- else:
44
- return f"{minutes:02d}:{secs:02d}"
45
-
46
-
47
- def get_youtube_captions(url, languages=None, with_timestamps=False, search_term=None):
48
- """
49
- Get YouTube video captions/transcript
50
-
51
- Args:
52
- url: YouTube video URL
53
- languages: List of preferred language codes
54
- with_timestamps: If True, include timestamps with each line
55
- search_term: If provided, only return lines containing this term (with timestamps)
56
-
57
- Returns:
58
- Formatted transcript string
59
- """
60
- if languages is None:
61
- # Default to English and other common languages
62
- languages = ['en', 'en-US', 'en-GB', 'hi', 'te', 'ta', 'mr', 'es', 'fr', 'de']
63
-
64
- video_id = extract_video_id(url)
65
-
66
- if not video_id:
67
- print(f"Error: Could not extract video ID from URL: {url}", file=sys.stderr)
68
- sys.exit(1)
69
-
70
- try:
71
- # Instantiate the API class
72
- api = YouTubeTranscriptApi()
73
-
74
- # Fetch the transcript data directly using the instance method
75
- transcript_data = api.fetch(video_id, languages=languages)
76
-
77
- # If searching for a term, filter and return with timestamps
78
- if search_term:
79
- search_lower = search_term.lower()
80
- matches = []
81
- for entry in transcript_data:
82
- if search_lower in entry.text.lower():
83
- timestamp = format_timestamp(entry.start)
84
- duration = getattr(entry, 'duration', 0)
85
- end_timestamp = format_timestamp(entry.start + duration)
86
- matches.append({
87
- 'timestamp': timestamp,
88
- 'end_timestamp': end_timestamp,
89
- 'start_seconds': entry.start,
90
- 'text': entry.text
91
- })
92
-
93
- if not matches:
94
- return f"No matches found for '{search_term}' in transcript."
95
-
96
- output = [f"\n🔍 Found {len(matches)} matches for '{search_term}':\n"]
97
- for match in matches:
98
- output.append(f"[{match['timestamp']}] {match['text']}")
99
- output.append(f"\n📋 Suggested clip range: {matches[0]['timestamp']} - {matches[-1]['end_timestamp']}")
100
- return '\n'.join(output)
101
-
102
- # If with_timestamps, format each line with timestamp
103
- if with_timestamps:
104
- output = []
105
- output.append(f"\n📝 Transcript with Timestamps:\n")
106
- output.append("=" * 60)
107
- for entry in transcript_data:
108
- timestamp = format_timestamp(entry.start)
109
- output.append(f"[{timestamp}] {entry.text}")
110
- return '\n'.join(output)
111
-
112
- # Default: plain text format - join all text entries
113
- plain_text = ' '.join([entry.text for entry in transcript_data])
114
-
115
- return plain_text
116
-
117
- except Exception as e:
118
- print(f"Error retrieving captions: {str(e)}", file=sys.stderr)
119
- sys.exit(1)
120
-
121
-
122
- def find_timestamp_for_quote(url, quote, context_seconds=30):
123
- """
124
- Find the timestamp where a specific quote appears in the video.
125
- Returns the start and end timestamps for a clip containing that quote.
126
-
127
- Args:
128
- url: YouTube video URL
129
- quote: The quote to search for
130
- context_seconds: How many seconds of context to include before/after
131
-
132
- Returns:
133
- Dict with start_time, end_time, and surrounding text
134
- """
135
- video_id = extract_video_id(url)
136
- if not video_id:
137
- return None
138
-
139
- try:
140
- api = YouTubeTranscriptApi()
141
- # Use fetch to get the default transcript or specify languages
142
- transcript_data = api.fetch(video_id)
143
-
144
- quote_lower = quote.lower()
145
-
146
- for i, entry in enumerate(transcript_data):
147
- if quote_lower in entry.text.lower():
148
- # Found the quote
149
- start_time = max(0, entry.start - context_seconds)
150
- end_time = entry.start + getattr(entry, 'duration', 5) + context_seconds
151
-
152
- # Get surrounding context
153
- context_entries = []
154
- for j in range(max(0, i-3), min(len(transcript_data), i+4)):
155
- context_entries.append({
156
- 'timestamp': format_timestamp(transcript_data[j].start),
157
- 'text': transcript_data[j].text
158
- })
159
-
160
- return {
161
- 'found': True,
162
- 'quote': entry.text,
163
- 'timestamp': format_timestamp(entry.start),
164
- 'clip_start': format_timestamp(start_time),
165
- 'clip_end': format_timestamp(end_time),
166
- 'context': context_entries
167
- }
168
-
169
- return {'found': False, 'message': f"Quote not found: {quote}"}
170
-
171
- except Exception as e:
172
- return {'found': False, 'message': str(e)}
173
-
174
-
175
- def main():
176
- parser = argparse.ArgumentParser(
177
- description="Extract captions from YouTube videos with optional timestamps.",
178
- formatter_class=argparse.RawDescriptionHelpFormatter,
179
- epilog="""
180
- Examples:
181
- # Get plain transcript
182
- python caption_reader.py --url "https://youtube.com/watch?v=xxx"
183
-
184
- # Get transcript with timestamps
185
- python caption_reader.py --url "https://youtube.com/watch?v=xxx" --timestamps
186
-
187
- # Search for specific term and get timestamps
188
- python caption_reader.py --url "https://youtube.com/watch?v=xxx" --search "electoral bonds"
189
-
190
- # Find timestamp for a specific quote
191
- python caption_reader.py --url "https://youtube.com/watch?v=xxx" --find-quote "corruption" --json
192
- """
193
- )
194
-
195
- parser.add_argument("--url", required=True, help="YouTube video URL")
196
- parser.add_argument("--languages", nargs="*", default=None,
197
- help="Preferred language codes (e.g., en hi te). Default: en and Indian languages")
198
- parser.add_argument("--timestamps", "-t", action="store_true",
199
- help="Include timestamps with each line")
200
- parser.add_argument("--search", "-s", help="Search for specific term and show timestamps")
201
- parser.add_argument("--find-quote", "-f", help="Find exact timestamp for a quote")
202
- parser.add_argument("--context", "-c", type=int, default=30,
203
- help="Seconds of context around found quote (default: 30)")
204
- parser.add_argument("--json", "-j", action="store_true", help="Output as JSON")
205
-
206
- args = parser.parse_args()
207
-
208
- if args.find_quote:
209
- # Find timestamp for specific quote
210
- result = find_timestamp_for_quote(args.url, args.find_quote, args.context)
211
- if args.json:
212
- print(json.dumps(result, indent=2, ensure_ascii=False))
213
- else:
214
- if result.get('found'):
215
- print(f"\n✅ Quote Found!")
216
- print(f" Timestamp: {result['timestamp']}")
217
- print(f" Text: {result['quote']}")
218
- print(f"\n🎬 Suggested Clip:")
219
- print(f" Start: {result['clip_start']}")
220
- print(f" End: {result['clip_end']}")
221
- print(f"\n📄 Context:")
222
- for entry in result['context']:
223
- print(f" [{entry['timestamp']}] {entry['text']}")
224
- else:
225
- print(f"❌ {result.get('message', 'Quote not found')}")
226
- else:
227
- # Get transcript
228
- captions = get_youtube_captions(
229
- args.url,
230
- args.languages,
231
- with_timestamps=args.timestamps,
232
- search_term=args.search
233
- )
234
- print(captions)
235
-
236
-
237
- if __name__ == "__main__":
1
+ #!/usr/bin/env python3
2
+ """
3
+ YouTube Caption/Transcript Reader for VideoNut
4
+ Extracts captions from YouTube videos with optional timestamp display.
5
+ """
6
+
7
+ import sys
8
+ import argparse
9
+ import json
10
+ from youtube_transcript_api import YouTubeTranscriptApi
11
+ from youtube_transcript_api.formatters import TextFormatter, JSONFormatter
12
+ import re
13
+
14
+
15
+ def extract_video_id(url):
16
+ """
17
+ Extract YouTube video ID from various URL formats
18
+ """
19
+ # Patterns for different YouTube URL formats
20
+ patterns = [
21
+ r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11,12})',
22
+ r'(?:https?:\/\/)?(?:www\.)?youtu\.be\/([a-zA-Z0-9_-]{11,12})',
23
+ r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/embed\/([a-zA-Z0-9_-]{11,12})',
24
+ r'(?:https?:\/\/)?(?:www\.)?youtube\.com\/v\/([a-zA-Z0-9_-]{11,12})',
25
+ ]
26
+
27
+ for pattern in patterns:
28
+ match = re.search(pattern, url)
29
+ if match:
30
+ return match.group(1)
31
+
32
+ return None
33
+
34
+
35
+ def format_timestamp(seconds):
36
+ """Convert seconds to HH:MM:SS format"""
37
+ hours = int(seconds // 3600)
38
+ minutes = int((seconds % 3600) // 60)
39
+ secs = int(seconds % 60)
40
+
41
+ if hours > 0:
42
+ return f"{hours:02d}:{minutes:02d}:{secs:02d}"
43
+ else:
44
+ return f"{minutes:02d}:{secs:02d}"
45
+
46
+
47
+ def get_youtube_captions(url, languages=None, with_timestamps=False, search_term=None):
48
+ """
49
+ Get YouTube video captions/transcript
50
+
51
+ Args:
52
+ url: YouTube video URL
53
+ languages: List of preferred language codes
54
+ with_timestamps: If True, include timestamps with each line
55
+ search_term: If provided, only return lines containing this term (with timestamps)
56
+
57
+ Returns:
58
+ Formatted transcript string
59
+ """
60
+ if languages is None:
61
+ # Default to English and other common languages
62
+ languages = ['en', 'en-US', 'en-GB', 'hi', 'te', 'ta', 'mr', 'es', 'fr', 'de']
63
+
64
+ video_id = extract_video_id(url)
65
+
66
+ if not video_id:
67
+ print(f"Error: Could not extract video ID from URL: {url}", file=sys.stderr)
68
+ sys.exit(1)
69
+
70
+ try:
71
+ # Instantiate the API class
72
+ api = YouTubeTranscriptApi()
73
+
74
+ # Fetch the transcript data directly using the instance method
75
+ transcript_data = api.fetch(video_id, languages=languages)
76
+
77
+ # If searching for a term, filter and return with timestamps
78
+ if search_term:
79
+ search_lower = search_term.lower()
80
+ matches = []
81
+ for entry in transcript_data:
82
+ if search_lower in entry.text.lower():
83
+ timestamp = format_timestamp(entry.start)
84
+ duration = getattr(entry, 'duration', 0)
85
+ end_timestamp = format_timestamp(entry.start + duration)
86
+ matches.append({
87
+ 'timestamp': timestamp,
88
+ 'end_timestamp': end_timestamp,
89
+ 'start_seconds': entry.start,
90
+ 'text': entry.text
91
+ })
92
+
93
+ if not matches:
94
+ return f"No matches found for '{search_term}' in transcript."
95
+
96
+ output = [f"\n🔍 Found {len(matches)} matches for '{search_term}':\n"]
97
+ for match in matches:
98
+ output.append(f"[{match['timestamp']}] {match['text']}")
99
+ output.append(f"\n📋 Suggested clip range: {matches[0]['timestamp']} - {matches[-1]['end_timestamp']}")
100
+ return '\n'.join(output)
101
+
102
+ # If with_timestamps, format each line with timestamp
103
+ if with_timestamps:
104
+ output = []
105
+ output.append(f"\n📝 Transcript with Timestamps:\n")
106
+ output.append("=" * 60)
107
+ for entry in transcript_data:
108
+ timestamp = format_timestamp(entry.start)
109
+ output.append(f"[{timestamp}] {entry.text}")
110
+ return '\n'.join(output)
111
+
112
+ # Default: plain text format - join all text entries
113
+ plain_text = ' '.join([entry.text for entry in transcript_data])
114
+
115
+ return plain_text
116
+
117
+ except Exception as e:
118
+ print(f"Error retrieving captions: {str(e)}", file=sys.stderr)
119
+ sys.exit(1)
120
+
121
+
122
+ def find_timestamp_for_quote(url, quote, context_seconds=30):
123
+ """
124
+ Find the timestamp where a specific quote appears in the video.
125
+ Returns the start and end timestamps for a clip containing that quote.
126
+
127
+ Args:
128
+ url: YouTube video URL
129
+ quote: The quote to search for
130
+ context_seconds: How many seconds of context to include before/after
131
+
132
+ Returns:
133
+ Dict with start_time, end_time, and surrounding text
134
+ """
135
+ video_id = extract_video_id(url)
136
+ if not video_id:
137
+ return None
138
+
139
+ try:
140
+ api = YouTubeTranscriptApi()
141
+ # Use fetch to get the default transcript or specify languages
142
+ transcript_data = api.fetch(video_id)
143
+
144
+ quote_lower = quote.lower()
145
+
146
+ for i, entry in enumerate(transcript_data):
147
+ if quote_lower in entry.text.lower():
148
+ # Found the quote
149
+ start_time = max(0, entry.start - context_seconds)
150
+ end_time = entry.start + getattr(entry, 'duration', 5) + context_seconds
151
+
152
+ # Get surrounding context
153
+ context_entries = []
154
+ for j in range(max(0, i-3), min(len(transcript_data), i+4)):
155
+ context_entries.append({
156
+ 'timestamp': format_timestamp(transcript_data[j].start),
157
+ 'text': transcript_data[j].text
158
+ })
159
+
160
+ return {
161
+ 'found': True,
162
+ 'quote': entry.text,
163
+ 'timestamp': format_timestamp(entry.start),
164
+ 'clip_start': format_timestamp(start_time),
165
+ 'clip_end': format_timestamp(end_time),
166
+ 'context': context_entries
167
+ }
168
+
169
+ return {'found': False, 'message': f"Quote not found: {quote}"}
170
+
171
+ except Exception as e:
172
+ return {'found': False, 'message': str(e)}
173
+
174
+
175
+ def main():
176
+ parser = argparse.ArgumentParser(
177
+ description="Extract captions from YouTube videos with optional timestamps.",
178
+ formatter_class=argparse.RawDescriptionHelpFormatter,
179
+ epilog="""
180
+ Examples:
181
+ # Get plain transcript
182
+ python caption_reader.py --url "https://youtube.com/watch?v=xxx"
183
+
184
+ # Get transcript with timestamps
185
+ python caption_reader.py --url "https://youtube.com/watch?v=xxx" --timestamps
186
+
187
+ # Search for specific term and get timestamps
188
+ python caption_reader.py --url "https://youtube.com/watch?v=xxx" --search "electoral bonds"
189
+
190
+ # Find timestamp for a specific quote
191
+ python caption_reader.py --url "https://youtube.com/watch?v=xxx" --find-quote "corruption" --json
192
+ """
193
+ )
194
+
195
+ parser.add_argument("--url", required=True, help="YouTube video URL")
196
+ parser.add_argument("--languages", nargs="*", default=None,
197
+ help="Preferred language codes (e.g., en hi te). Default: en and Indian languages")
198
+ parser.add_argument("--timestamps", "-t", action="store_true",
199
+ help="Include timestamps with each line")
200
+ parser.add_argument("--search", "-s", help="Search for specific term and show timestamps")
201
+ parser.add_argument("--find-quote", "-f", help="Find exact timestamp for a quote")
202
+ parser.add_argument("--context", "-c", type=int, default=30,
203
+ help="Seconds of context around found quote (default: 30)")
204
+ parser.add_argument("--json", "-j", action="store_true", help="Output as JSON")
205
+
206
+ args = parser.parse_args()
207
+
208
+ if args.find_quote:
209
+ # Find timestamp for specific quote
210
+ result = find_timestamp_for_quote(args.url, args.find_quote, args.context)
211
+ if args.json:
212
+ print(json.dumps(result, indent=2, ensure_ascii=False))
213
+ else:
214
+ if result.get('found'):
215
+ print(f"\n✅ Quote Found!")
216
+ print(f" Timestamp: {result['timestamp']}")
217
+ print(f" Text: {result['quote']}")
218
+ print(f"\n🎬 Suggested Clip:")
219
+ print(f" Start: {result['clip_start']}")
220
+ print(f" End: {result['clip_end']}")
221
+ print(f"\n📄 Context:")
222
+ for entry in result['context']:
223
+ print(f" [{entry['timestamp']}] {entry['text']}")
224
+ else:
225
+ print(f"❌ {result.get('message', 'Quote not found')}")
226
+ else:
227
+ # Get transcript
228
+ captions = get_youtube_captions(
229
+ args.url,
230
+ args.languages,
231
+ with_timestamps=args.timestamps,
232
+ search_term=args.search
233
+ )
234
+ print(captions)
235
+
236
+
237
+ if __name__ == "__main__":
238
238
  main()