videonut 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.antigravity/config.toml +8 -0
- package/.claude/commands/archivist.toml +12 -0
- package/.claude/commands/director.toml +12 -0
- package/.claude/commands/eic.toml +12 -0
- package/.claude/commands/investigator.toml +12 -0
- package/.claude/commands/prompt.toml +12 -0
- package/.claude/commands/scavenger.toml +12 -0
- package/.claude/commands/scout.toml +12 -0
- package/.claude/commands/scriptwriter.toml +12 -0
- package/.claude/commands/seo.toml +12 -0
- package/.claude/commands/thumbnail.toml +12 -0
- package/.claude/commands/topic_scout.toml +12 -0
- package/.gemini/commands/archivist.toml +12 -0
- package/.gemini/commands/director.toml +12 -0
- package/.gemini/commands/eic.toml +12 -0
- package/.gemini/commands/investigator.toml +12 -0
- package/.gemini/commands/prompt.toml +12 -0
- package/.gemini/commands/scavenger.toml +12 -0
- package/.gemini/commands/scout.toml +12 -0
- package/.gemini/commands/scriptwriter.toml +12 -0
- package/.gemini/commands/seo.toml +12 -0
- package/.gemini/commands/thumbnail.toml +12 -0
- package/.gemini/commands/topic_scout.toml +12 -0
- package/.qwen/commands/archivist.toml +12 -0
- package/.qwen/commands/director.toml +12 -0
- package/.qwen/commands/eic.toml +12 -0
- package/.qwen/commands/investigator.toml +12 -0
- package/.qwen/commands/prompt.toml +12 -0
- package/.qwen/commands/scavenger.toml +12 -0
- package/.qwen/commands/scout.toml +12 -0
- package/.qwen/commands/scriptwriter.toml +12 -0
- package/.qwen/commands/seo.toml +12 -0
- package/.qwen/commands/thumbnail.toml +12 -0
- package/.qwen/commands/topic_scout.toml +12 -0
- package/USER_GUIDE.md +90 -0
- package/agents/core/eic.md +772 -0
- package/agents/core/prompt_agent.md +264 -0
- package/agents/core/self_review_protocol.md +143 -0
- package/agents/creative/director.md +247 -0
- package/agents/creative/scriptwriter.md +208 -0
- package/agents/creative/seo.md +316 -0
- package/agents/creative/thumbnail.md +285 -0
- package/agents/research/investigator.md +395 -0
- package/agents/research/topic_scout.md +419 -0
- package/agents/technical/archivist.md +289 -0
- package/agents/technical/scavenger.md +248 -0
- package/bin/videonut.js +389 -107
- package/config.yaml +62 -0
- package/docs/AUDIT_REPORT.md +364 -0
- package/docs/LIFECYCLE.md +651 -0
- package/docs/scriptwriter.md +43 -0
- package/file_validator.py +187 -0
- package/memory/short_term/asset_manifest.md +64 -0
- package/memory/short_term/investigation_dossier.md +31 -0
- package/memory/short_term/master_script.md +51 -0
- package/package.json +16 -3
- package/requirements.txt +9 -0
- package/scripts/setup.js +8 -0
- package/tools/check_env.py +77 -0
- package/tools/downloaders/__pycache__/caption_reader.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/image_grabber.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/pdf_reader.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/screenshotter.cpython-312.pyc +0 -0
- package/tools/downloaders/__pycache__/web_reader.cpython-312.pyc +0 -0
- package/tools/downloaders/article_screenshotter.py +388 -0
- package/tools/downloaders/caption_reader.py +238 -0
- package/tools/downloaders/clip_grabber.py +83 -0
- package/tools/downloaders/image_grabber.py +106 -0
- package/tools/downloaders/pdf_reader.py +163 -0
- package/tools/downloaders/pdf_screenshotter.py +240 -0
- package/tools/downloaders/screenshotter.py +58 -0
- package/tools/downloaders/web_reader.py +69 -0
- package/tools/downloaders/youtube_search.py +174 -0
- package/tools/logging/search_logger.py +334 -0
- package/tools/validators/__pycache__/archive_url.cpython-312.pyc +0 -0
- package/tools/validators/__pycache__/link_checker.cpython-312.pyc +0 -0
- package/tools/validators/archive_url.py +269 -0
- package/tools/validators/link_checker.py +45 -0
- package/workflow_orchestrator.py +337 -0
package/config.yaml
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# VideoNut Configuration
|
|
2
|
+
# This file is managed by Topic Scout agent. All other agents READ ONLY.
|
|
3
|
+
|
|
4
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
5
|
+
# USER SETTINGS
|
|
6
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
7
|
+
user_name: "Producer"
|
|
8
|
+
communication_language: "Telugu"
|
|
9
|
+
|
|
10
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
11
|
+
# PROJECT SETTINGS (Set by Topic Scout)
|
|
12
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
13
|
+
projects_folder: "./Projects"
|
|
14
|
+
current_project: "gemini_2025-12-30_SEBI-Hindenburg_004"
|
|
15
|
+
|
|
16
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
17
|
+
# VIDEO PRODUCTION SETTINGS (Set by Topic Scout)
|
|
18
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
19
|
+
video_format: "Investigative Documentary"
|
|
20
|
+
target_duration: 20
|
|
21
|
+
target_line_count: 2700
|
|
22
|
+
audio_language: "Telugu"
|
|
23
|
+
|
|
24
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
25
|
+
# SCOPE & REGION (Set by Topic Scout - User Selected)
|
|
26
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
27
|
+
# scope: international | national | regional
|
|
28
|
+
scope: "national"
|
|
29
|
+
|
|
30
|
+
# country: Only set if scope is "national" (e.g., India, USA, UK)
|
|
31
|
+
country: "India"
|
|
32
|
+
|
|
33
|
+
# region: User selected region (NOT auto-derived from language)
|
|
34
|
+
# Examples: "Telangana", "Andhra Pradesh", "Maharashtra", "Tamil Nadu", "Pan-India"
|
|
35
|
+
region: "Pan-India"
|
|
36
|
+
|
|
37
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
38
|
+
# INDUSTRY TAG (Set by Topic Scout)
|
|
39
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
40
|
+
# Helps agents stay in context and use appropriate sources
|
|
41
|
+
# Options: Finance, Stock Market, Political, Crime, Social Awareness,
|
|
42
|
+
# Technology, Entertainment, Sports, Health, Environment, Business, Other
|
|
43
|
+
industry_tag: "Political"
|
|
44
|
+
|
|
45
|
+
# Industry-specific sources (auto-populated based on industry_tag)
|
|
46
|
+
# Finance: RBI, SEBI, Economic Times, Mint
|
|
47
|
+
# Stock Market: NSE, BSE, MoneyControl, TradingView
|
|
48
|
+
# Political: Election Commission, PRS Legislative, Parliament videos
|
|
49
|
+
# Crime: Court records, Police statements, NCRB data
|
|
50
|
+
# Social Awareness: NGO reports, Government schemes, RTI data
|
|
51
|
+
|
|
52
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
53
|
+
# EIC CORRECTION TRACKING (Set by EIC after review)
|
|
54
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
55
|
+
# correction_log: Path to the correction log file (relative to project folder)
|
|
56
|
+
# Status: pending_review | corrections_needed | approved
|
|
57
|
+
correction_log: ""
|
|
58
|
+
correction_status: "pending_review"
|
|
59
|
+
|
|
60
|
+
# Agents with pending corrections (comma-separated)
|
|
61
|
+
# Example: "investigator,scriptwriter,director"
|
|
62
|
+
agents_with_errors: ""
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# VideoNut System Audit Report
|
|
2
|
+
|
|
3
|
+
**Date:** 2026-01-04
|
|
4
|
+
**Purpose:** Verify the complete asset workflow works like a human would
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## 🎯 AUDIT SCOPE
|
|
9
|
+
|
|
10
|
+
Testing if the system can:
|
|
11
|
+
1. Find YouTube video → Get captions → Find timestamp → Download specific clip
|
|
12
|
+
2. Find PDF → Search for keywords → Screenshot relevant page
|
|
13
|
+
3. Find News Article → Scroll to quote → Highlight and screenshot
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## ✅ AUDIT RESULTS SUMMARY
|
|
18
|
+
|
|
19
|
+
| Workflow | Status | Tool Chain |
|
|
20
|
+
|----------|--------|------------|
|
|
21
|
+
| YouTube Clip Extraction | ✅ WORKING | `youtube_search.py` → `caption_reader.py` → `clip_grabber.py` |
|
|
22
|
+
| PDF Search & Screenshot | ✅ WORKING (ENHANCED) | `pdf_reader.py` → `pdf_screenshotter.py` (NEW) |
|
|
23
|
+
| Article Quote Screenshot | ✅ WORKING (FIXED) | `link_checker.py` → `article_screenshotter.py` |
|
|
24
|
+
| Link Validation | ✅ WORKING | `link_checker.py` |
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 📹 WORKFLOW 1: YOUTUBE VIDEO CLIP EXTRACTION
|
|
29
|
+
|
|
30
|
+
### How a Human Would Do It:
|
|
31
|
+
1. Search YouTube for "PM Modi speech on electoral bonds"
|
|
32
|
+
2. Find a video
|
|
33
|
+
3. Watch/skim to find the relevant quote
|
|
34
|
+
4. Note the timestamp (e.g., 5:23 to 5:45)
|
|
35
|
+
5. Download that specific clip
|
|
36
|
+
|
|
37
|
+
### How Our System Does It:
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
41
|
+
│ STEP 1: Director/Investigator searches for videos │
|
|
42
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
43
|
+
│ │
|
|
44
|
+
│ COMMAND: │
|
|
45
|
+
│ python youtube_search.py --query "PM Modi electoral bonds speech" ─► │
|
|
46
|
+
│ │
|
|
47
|
+
│ OUTPUT: │
|
|
48
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
49
|
+
│ │ 1. PM Modi addresses Parliament on Electoral Bonds ││
|
|
50
|
+
│ │ URL: https://youtube.com/watch?v=abc123 ││
|
|
51
|
+
│ │ Views: 1.2M | Duration: 15:30 ││
|
|
52
|
+
│ │ ││
|
|
53
|
+
│ │ 2. Full Press Conference - Electoral Bonds Explained ││
|
|
54
|
+
│ │ URL: https://youtube.com/watch?v=def456 ││
|
|
55
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
56
|
+
│ │
|
|
57
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
58
|
+
│
|
|
59
|
+
▼
|
|
60
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
61
|
+
│ STEP 2: Get transcript and search for quote │
|
|
62
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
63
|
+
│ │
|
|
64
|
+
│ COMMAND: │
|
|
65
|
+
│ python caption_reader.py --url "https://youtube.com/watch?v=abc123" │
|
|
66
|
+
│ --search "transparency" │
|
|
67
|
+
│ │
|
|
68
|
+
│ OUTPUT: │
|
|
69
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
70
|
+
│ │ 🔍 Found 3 matches for 'transparency': ││
|
|
71
|
+
│ │ ││
|
|
72
|
+
│ │ [05:23] "Electoral bonds ensure complete transparency" ││
|
|
73
|
+
│ │ [08:45] "We brought transparency to political funding" ││
|
|
74
|
+
│ │ [12:10] "Maximum transparency has been achieved" ││
|
|
75
|
+
│ │ ││
|
|
76
|
+
│ │ 📋 Suggested clip range: 05:23 - 12:25 ││
|
|
77
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
78
|
+
│ │
|
|
79
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
80
|
+
│
|
|
81
|
+
▼
|
|
82
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
83
|
+
│ STEP 3: Get EXACT timestamp for specific quote │
|
|
84
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
85
|
+
│ │
|
|
86
|
+
│ COMMAND: │
|
|
87
|
+
│ python caption_reader.py --url "https://youtube.com/watch?v=abc123" │
|
|
88
|
+
│ --find-quote "complete transparency" --json │
|
|
89
|
+
│ │
|
|
90
|
+
│ OUTPUT (JSON): │
|
|
91
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
92
|
+
│ │ { ││
|
|
93
|
+
│ │ "found": true, ││
|
|
94
|
+
│ │ "quote": "Electoral bonds ensure complete transparency", ││
|
|
95
|
+
│ │ "timestamp": "05:23", ││
|
|
96
|
+
│ │ "clip_start": "04:53", ◄── 30 sec before ││
|
|
97
|
+
│ │ "clip_end": "05:58", ◄── 30 sec after ││
|
|
98
|
+
│ │ "context": [...] ││
|
|
99
|
+
│ │ } ││
|
|
100
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
101
|
+
│ │
|
|
102
|
+
│ ✅ NOW WE KNOW: Download from 04:53 to 05:58 │
|
|
103
|
+
│ │
|
|
104
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
105
|
+
│
|
|
106
|
+
▼
|
|
107
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
108
|
+
│ STEP 4: Scavenger adds to asset_manifest.md │
|
|
109
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
110
|
+
│ │
|
|
111
|
+
│ MANIFEST ENTRY: │
|
|
112
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
113
|
+
│ │ | Scene | URL | Type | Timestamp | Quote | ││
|
|
114
|
+
│ │ |-------|-----|------|-----------|-------| ││
|
|
115
|
+
│ │ | 5 | youtube.com/watch?v=abc123 | Video Clip | 04:53-05:58 | ││
|
|
116
|
+
│ │ | | | | | "Electoral bonds ensure complete transparency" |││
|
|
117
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
118
|
+
│ │
|
|
119
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
120
|
+
│
|
|
121
|
+
▼
|
|
122
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
123
|
+
│ STEP 5: Archivist downloads the EXACT clip │
|
|
124
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
125
|
+
│ │
|
|
126
|
+
│ COMMAND: │
|
|
127
|
+
│ python clip_grabber.py --url "https://youtube.com/watch?v=abc123" │
|
|
128
|
+
│ --start "04:53" --end "05:58" │
|
|
129
|
+
│ --output "assets/005_PM_transparency.mp4" │
|
|
130
|
+
│ │
|
|
131
|
+
│ OUTPUT: │
|
|
132
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
133
|
+
│ │ ✅ Download successful. ││
|
|
134
|
+
│ │ ✅ File validation: 005_PM_transparency.mp4 (2.3 MB) ││
|
|
135
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
136
|
+
│ │
|
|
137
|
+
│ Result: 65 second clip (4:53 to 5:58) downloaded, NOT the full 15min!│
|
|
138
|
+
│ │
|
|
139
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### What If Captions Are Not Available?
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
146
|
+
│ FALLBACK: No Captions Available │
|
|
147
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
148
|
+
│ │
|
|
149
|
+
│ caption_reader.py OUTPUT: │
|
|
150
|
+
│ "Error: No captions available for this video" │
|
|
151
|
+
│ │
|
|
152
|
+
│ ARCHIVIST ACTION: │
|
|
153
|
+
│ 1. Download 30-second preview: --start "00:00" --end "00:30" │
|
|
154
|
+
│ 2. Log to MANUAL_REQUIRED.txt: │
|
|
155
|
+
│ "Scene 5: No captions available. Downloaded preview. │
|
|
156
|
+
│ Human must watch and find correct timestamp." │
|
|
157
|
+
│ │
|
|
158
|
+
│ ✅ System doesn't fail - gracefully degrades to manual review │
|
|
159
|
+
│ │
|
|
160
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## 📄 WORKFLOW 2: PDF SEARCH & SCREENSHOT
|
|
166
|
+
|
|
167
|
+
### How a Human Would Do It:
|
|
168
|
+
1. Download or open PDF (e.g., Supreme Court judgment)
|
|
169
|
+
2. Press Ctrl+F, search for "electoral bonds"
|
|
170
|
+
3. Go to the page where it's found
|
|
171
|
+
4. Take screenshot of that page
|
|
172
|
+
|
|
173
|
+
### How Our System Does It (ENHANCED):
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
177
|
+
│ STEP 1: Search for keyword in PDF │
|
|
178
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
179
|
+
│ │
|
|
180
|
+
│ COMMAND: │
|
|
181
|
+
│ python pdf_reader.py --url "https://sci.gov.in/judgment.pdf" │
|
|
182
|
+
│ --search "electoral bonds" │
|
|
183
|
+
│ │
|
|
184
|
+
│ OUTPUT: │
|
|
185
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
186
|
+
│ │ 📄 PDF loaded: 156 pages ││
|
|
187
|
+
│ │ 🔍 Searching for: 'electoral bonds' ││
|
|
188
|
+
│ │ ││
|
|
189
|
+
│ │ ✅ Found 8 matches: ││
|
|
190
|
+
│ │ ││
|
|
191
|
+
│ │ ════════════════════════════════════════════════════════════ ││
|
|
192
|
+
│ │ 📍 Match 1 - Page 23 ││
|
|
193
|
+
│ │ ════════════════════════════════════════════════════════════ ││
|
|
194
|
+
│ │ Line: The electoral bonds scheme was introduced in 2017... ││
|
|
195
|
+
│ │ ││
|
|
196
|
+
│ │ Context: ││
|
|
197
|
+
│ │ The Finance Act of 2017 made several amendments... ││
|
|
198
|
+
│ │ The electoral bonds scheme was introduced in 2017... ││
|
|
199
|
+
│ │ These bonds could only be purchased from SBI... ││
|
|
200
|
+
│ │ ││
|
|
201
|
+
│ │ 📸 Suggested page for screenshot: Page 23 ││
|
|
202
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
203
|
+
│ │
|
|
204
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
205
|
+
│
|
|
206
|
+
▼
|
|
207
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
208
|
+
│ STEP 2: Screenshot that specific page (NEW TOOL) │
|
|
209
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
210
|
+
│ │
|
|
211
|
+
│ COMMAND: │
|
|
212
|
+
│ python pdf_screenshotter.py --url "https://sci.gov.in/judgment.pdf" │
|
|
213
|
+
│ --search "electoral bonds" │
|
|
214
|
+
│ --output "assets/sc_judgment_page23.png" │
|
|
215
|
+
│ │
|
|
216
|
+
│ OUTPUT: │
|
|
217
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
218
|
+
│ │ 📥 Downloading PDF... ││
|
|
219
|
+
│ │ ✅ PDF downloaded ││
|
|
220
|
+
│ │ 🔍 Searching for 'electoral bonds' in PDF... ││
|
|
221
|
+
│ │ ✅ Found 'electoral bonds' on page 23 ││
|
|
222
|
+
│ │ 🌐 Opening PDF in browser... ││
|
|
223
|
+
│ │ 📸 Taking screenshot of page 23... ││
|
|
224
|
+
│ │ ✅ Screenshot saved: assets/sc_judgment_page23.png ││
|
|
225
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
226
|
+
│ │
|
|
227
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## 📰 WORKFLOW 3: NEWS ARTICLE SCREENSHOT
|
|
233
|
+
|
|
234
|
+
### How a Human Would Do It:
|
|
235
|
+
1. Open the news article
|
|
236
|
+
2. Scroll down to find the quote "PM Modi said..."
|
|
237
|
+
3. Take screenshot with that quote visible
|
|
238
|
+
|
|
239
|
+
### How Our System Does It:
|
|
240
|
+
|
|
241
|
+
```
|
|
242
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
243
|
+
│ STEP 1: Director identifies the quote to capture │
|
|
244
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
245
|
+
│ │
|
|
246
|
+
│ In master_script.md: │
|
|
247
|
+
│ [Source: https://timesofindia.com/article/123] │
|
|
248
|
+
│ [Screenshot-Quote: "₹560 crore donated after ED raid"] │
|
|
249
|
+
│ │
|
|
250
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
251
|
+
│
|
|
252
|
+
▼
|
|
253
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
254
|
+
│ STEP 2: Archivist takes screenshot with highlight │
|
|
255
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
256
|
+
│ │
|
|
257
|
+
│ COMMAND: │
|
|
258
|
+
│ python article_screenshotter.py │
|
|
259
|
+
│ --url "https://timesofindia.com/article/123" │
|
|
260
|
+
│ --quote "₹560 crore donated after ED raid" │
|
|
261
|
+
│ --output "assets/007_quid_pro_quo.png" │
|
|
262
|
+
│ │
|
|
263
|
+
│ PROCESS (INSIDE THE TOOL): │
|
|
264
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
265
|
+
│ │ 1. 🌐 Open article URL ││
|
|
266
|
+
│ │ 2. ⏳ Wait for page to fully load (networkidle) ││
|
|
267
|
+
│ │ 3. 🧹 Close cookie popups ││
|
|
268
|
+
│ │ 4. 🔍 Search for quote using 3 strategies: ││
|
|
269
|
+
│ │ - Strategy 1: Playwright exact match ││
|
|
270
|
+
│ │ - Strategy 2: First 5 words match ││
|
|
271
|
+
│ │ - Strategy 3: JavaScript deep search ││
|
|
272
|
+
│ │ 5. ✅ Quote found! ││
|
|
273
|
+
│ │ 6. 📍 Scroll to CENTER quote in viewport ││
|
|
274
|
+
│ │ - Method 1: scrollIntoView({ block: 'center' }) ││
|
|
275
|
+
│ │ - Method 2: scrollTo() calculation (fallback) ││
|
|
276
|
+
│ │ - Method 3: Playwright scroll_into_view_if_needed() ││
|
|
277
|
+
│ │ 7. 🎨 Highlight with yellow background + orange border ││
|
|
278
|
+
│ │ - Uses !important to override site CSS ││
|
|
279
|
+
│ │ 8. 📸 Take screenshot ││
|
|
280
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
281
|
+
│ │
|
|
282
|
+
│ OUTPUT: │
|
|
283
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
284
|
+
│ │ 🌐 Navigating to https://timesofindia.com/article/123... ││
|
|
285
|
+
│ │ ⏳ Waiting for dynamic content to load... ││
|
|
286
|
+
│ │ 🧹 Closing popups... ││
|
|
287
|
+
│ │ 🔍 Searching for quote: '₹560 crore donated after ED raid' ││
|
|
288
|
+
│ │ Strategy 1: Exact text match... ││
|
|
289
|
+
│ │ ✅ Found with Strategy 1 ││
|
|
290
|
+
│ │ 📍 Scrolling quote to center of viewport... ││
|
|
291
|
+
│ │ ✅ Scroll method 1 (scrollIntoView) succeeded ││
|
|
292
|
+
│ │ 🎨 Highlighting quote... ││
|
|
293
|
+
│ │ ✅ Quote highlighted with yellow background + orange border ││
|
|
294
|
+
│ │ 📸 Taking screenshot... ││
|
|
295
|
+
│ │ ✅ Screenshot saved: assets/007_quid_pro_quo.png (245,320 bytes) ││
|
|
296
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
297
|
+
│ │
|
|
298
|
+
│ RESULT: Screenshot shows the exact quote highlighted, not page header!│
|
|
299
|
+
│ │
|
|
300
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## 🔗 WORKFLOW 4: LINK VALIDATION
|
|
306
|
+
|
|
307
|
+
### Every URL is checked before use:
|
|
308
|
+
|
|
309
|
+
```
|
|
310
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
311
|
+
│ Link Validation in Every Step │
|
|
312
|
+
├─────────────────────────────────────────────────────────────────────────┤
|
|
313
|
+
│ │
|
|
314
|
+
│ COMMAND: │
|
|
315
|
+
│ python link_checker.py "https://example.com/article" │
|
|
316
|
+
│ │
|
|
317
|
+
│ POSSIBLE OUTPUTS: │
|
|
318
|
+
│ ┌────────────────────────────────────────────────────────────────────┐│
|
|
319
|
+
│ │ ✅ VALID: URL is accessible (HTTP 200) ││
|
|
320
|
+
│ │ ⚠️ REDIRECT: URL redirects to another location ││
|
|
321
|
+
│ │ ❌ INVALID: URL returns 404 or timeout ││
|
|
322
|
+
│ │ ⚠️ REQUIRES AUTH: URL needs login ││
|
|
323
|
+
│ └────────────────────────────────────────────────────────────────────┘│
|
|
324
|
+
│ │
|
|
325
|
+
│ WHO USES IT: │
|
|
326
|
+
│ - Scavenger: Before adding URL to manifest │
|
|
327
|
+
│ - Archivist: Before attempting download │
|
|
328
|
+
│ - EIC: To verify all URLs still work │
|
|
329
|
+
│ │
|
|
330
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
335
|
+
## 📊 TOOL CAPABILITY MATRIX
|
|
336
|
+
|
|
337
|
+
| Capability | Tool | Status |
|
|
338
|
+
|------------|------|--------|
|
|
339
|
+
| Search YouTube videos | `youtube_search.py` | ✅ Working |
|
|
340
|
+
| Get video captions | `caption_reader.py` | ✅ Working |
|
|
341
|
+
| Search keywords in captions | `caption_reader.py --search` | ✅ Working |
|
|
342
|
+
| Find exact quote timestamp | `caption_reader.py --find-quote` | ✅ Working |
|
|
343
|
+
| Download specific clip | `clip_grabber.py --start --end` | ✅ Working |
|
|
344
|
+
| Validate URLs | `link_checker.py` | ✅ Working |
|
|
345
|
+
| Read PDF content | `pdf_reader.py` | ✅ Working |
|
|
346
|
+
| **Search keywords in PDF** | `pdf_reader.py --search` | ✅ **ENHANCED** |
|
|
347
|
+
| **Screenshot PDF page** | `pdf_screenshotter.py` | ✅ **NEW** |
|
|
348
|
+
| Screenshot article | `article_screenshotter.py` | ✅ Working |
|
|
349
|
+
| **Scroll to quote** | `article_screenshotter.py` | ✅ **FIXED** |
|
|
350
|
+
| **Highlight quote** | `article_screenshotter.py` | ✅ **FIXED** |
|
|
351
|
+
| Download images | `image_grabber.py` | ✅ Working |
|
|
352
|
+
| Archive URLs | `archive_url.py` | ✅ Working |
|
|
353
|
+
|
|
354
|
+
---
|
|
355
|
+
|
|
356
|
+
## ✅ CONCLUSION
|
|
357
|
+
|
|
358
|
+
The system now works like a human would:
|
|
359
|
+
|
|
360
|
+
1. **YouTube clips**: Searches → Reads captions → Finds timestamp → Downloads exact clip
|
|
361
|
+
2. **PDFs**: Downloads → Searches for keyword → Screenshots the relevant page
|
|
362
|
+
3. **Articles**: Opens → Scrolls to quote → Highlights → Screenshots
|
|
363
|
+
|
|
364
|
+
All agents are properly using these tools in the correct workflow order.
|