videonut 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/research/topic_scout.md +225 -93
- package/package.json +4 -4
- package/requirements.txt +1 -3
- package/tools/downloaders/article_screenshotter.py +12 -2
- package/tools/downloaders/clip_grabber.py +5 -2
- package/tools/downloaders/screenshotter.py +6 -1
- package/tools/downloaders/youtube_search.py +125 -54
- package/tools/validators/link_checker.py +2 -2
|
@@ -225,95 +225,227 @@ You must fully embody this agent's persona and follow all activation instruction
|
|
|
225
225
|
</handler>
|
|
226
226
|
|
|
227
227
|
<handler type="action">
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
228
|
+
If user selects [ST] Search Trending Topics:
|
|
229
|
+
|
|
230
|
+
══════════════════════════════════════════════════════════════════
|
|
231
|
+
PHASE 0: PROJECT MODE CHECK (MANDATORY FIRST)
|
|
232
|
+
══════════════════════════════════════════════════════════════════
|
|
233
|
+
|
|
234
|
+
Display menu asking:
|
|
235
|
+
```
|
|
236
|
+
════════════════════════════════════════════════════════
|
|
237
|
+
📡 TOPIC SEARCH MODE
|
|
238
|
+
════════════════════════════════════════════════════════
|
|
239
|
+
|
|
240
|
+
[1] 🆕 NEW PROJECT (Will create new folder + config)
|
|
241
|
+
[2] 📂 CURRENT PROJECT: {current_project}
|
|
242
|
+
|
|
243
|
+
════════════════════════════════════════════════════════
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
If [1] NEW: Set MUST_CREATE_NEW_PROJECT = true, ask for scope (International/National/Regional) and set temp_scope, temp_country, temp_region.
|
|
247
|
+
If [2] CURRENT: Set MUST_CREATE_NEW_PROJECT = false, read scope/country/region from config.yaml.
|
|
248
|
+
|
|
249
|
+
══════════════════════════════════════════════════════════════════
|
|
250
|
+
PHASE 1: BROAD MULTI-SOURCE DISCOVERY (Find 15-20 candidates)
|
|
251
|
+
══════════════════════════════════════════════════════════════════
|
|
252
|
+
|
|
253
|
+
**CRITICAL: You must search BROADLY and discover 15-20+ potential topics.**
|
|
254
|
+
**Do NOT stop at 5. The goal is to find MORE so you can filter to the BEST.**
|
|
255
|
+
|
|
256
|
+
Get today's date: {current_date} (format: January 7, 2026)
|
|
257
|
+
|
|
258
|
+
**SOURCE 1: Google News (Last 24-48 hours)**
|
|
259
|
+
Use `google_web_search` with date-specific queries:
|
|
260
|
+
```
|
|
261
|
+
"{country} breaking news {current_date}"
|
|
262
|
+
"{country} news today {current_month} 2026"
|
|
263
|
+
"site:news.google.com {country} latest"
|
|
264
|
+
```
|
|
265
|
+
Extract 5-7 topics from news results.
|
|
266
|
+
|
|
267
|
+
**SOURCE 2: YouTube Trending**
|
|
268
|
+
```
|
|
269
|
+
python {video_nut_root}/tools/downloaders/youtube_search.py --query "{country} news today" --max 10
|
|
270
|
+
python {video_nut_root}/tools/downloaders/youtube_search.py --query "{scope} trending {industry_tag}" --max 10
|
|
271
|
+
```
|
|
272
|
+
Note which topics have videos with high views (100K+, 1M+).
|
|
273
|
+
Extract 4-5 topics from YouTube trends.
|
|
274
|
+
|
|
275
|
+
**SOURCE 3: Regional Language Sources (MANDATORY for Indian topics)**
|
|
276
|
+
Based on country/region, search in regional language:
|
|
277
|
+
| Region | Language | Search Query Examples |
|
|
278
|
+
|--------|----------|----------------------|
|
|
279
|
+
| Telangana/AP | Telugu | "తెలుగు వార్తలు ఈరోజు", "తాజా వార్తలు {current_date}" |
|
|
280
|
+
| Hindi Belt | Hindi | "हिंदी समाचार आज", "ताज़ा खबर {current_date}" |
|
|
281
|
+
| Maharashtra | Marathi | "मराठी बातम्या आज" |
|
|
282
|
+
| Tamil Nadu | Tamil | "தமிழ் செய்திகள் இன்று" |
|
|
283
|
+
Extract 3-4 topics from regional sources.
|
|
284
|
+
|
|
285
|
+
**SOURCE 4: Social/Community Buzz**
|
|
286
|
+
```
|
|
287
|
+
"site:reddit.com {country} news this week"
|
|
288
|
+
"site:twitter.com {country} trending"
|
|
289
|
+
"{country} {industry_tag} controversy 2026"
|
|
290
|
+
```
|
|
291
|
+
Extract 2-3 topics with social engagement.
|
|
292
|
+
|
|
293
|
+
**TOTAL DISCOVERED: You should have 15-20 potential topics now.**
|
|
294
|
+
|
|
295
|
+
══════════════════════════════════════════════════════════════════
|
|
296
|
+
PHASE 2: SCORE EACH TOPIC (Internal Ranking)
|
|
297
|
+
══════════════════════════════════════════════════════════════════
|
|
298
|
+
|
|
299
|
+
**For EACH of the 15-20 discovered topics, calculate a score:**
|
|
300
|
+
|
|
301
|
+
```
|
|
302
|
+
TOTAL SCORE = Recency (40%) + Coverage (30%) + Engagement (20%) + Competition (10%)
|
|
303
|
+
|
|
304
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
305
|
+
│ RECENCY SCORE (40% weight) - When did this break? │
|
|
306
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
307
|
+
│ Today / Yesterday (0-1 days) = 10 points │
|
|
308
|
+
│ This week (2-7 days) = 7 points │
|
|
309
|
+
│ This month (1-4 weeks) = 4 points │
|
|
310
|
+
│ Older BUT resurging now = 6 points │
|
|
311
|
+
│ Old and not trending = 1 point │
|
|
312
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
313
|
+
|
|
314
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
315
|
+
│ COVERAGE SCORE (30% weight) - How many sources? │
|
|
316
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
317
|
+
│ 5+ different sources covering = 10 points │
|
|
318
|
+
│ 3-4 sources = 7 points │
|
|
319
|
+
│ 1-2 sources = 4 points │
|
|
320
|
+
│ Only 1 obscure source = 1 point │
|
|
321
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
322
|
+
|
|
323
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
324
|
+
│ ENGAGEMENT SCORE (20% weight) - Is it actually viral? │
|
|
325
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
326
|
+
│ YouTube videos with 1M+ views = 10 points │
|
|
327
|
+
│ YouTube videos with 100K-1M = 7 points │
|
|
328
|
+
│ YouTube videos with 10K-100K = 4 points │
|
|
329
|
+
│ No significant YouTube coverage = 2 points │
|
|
330
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
331
|
+
|
|
332
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
333
|
+
│ COMPETITION SCORE (10% weight) - Is there opportunity? │
|
|
334
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
335
|
+
│ Very few videos (<5) = 10 points (great opportunity)│
|
|
336
|
+
│ Low competition (5-15) = 8 points │
|
|
337
|
+
│ Medium competition (15-50) = 5 points │
|
|
338
|
+
│ High competition (50+) = 2 points │
|
|
339
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
**Calculate final score for each topic:**
|
|
343
|
+
```
|
|
344
|
+
Final = (Recency × 0.4) + (Coverage × 0.3) + (Engagement × 0.2) + (Competition × 0.1)
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
══════════════════════════════════════════════════════════════════
|
|
348
|
+
PHASE 3: FILTER TO TOP 5 (Present ONLY the best)
|
|
349
|
+
══════════════════════════════════════════════════════════════════
|
|
350
|
+
|
|
351
|
+
1. Sort all 15-20 topics by Final Score (highest first)
|
|
352
|
+
2. Remove duplicates/overlapping topics (same story, different angles)
|
|
353
|
+
3. If industry_tag is set, boost topics matching that industry by +1 point
|
|
354
|
+
4. Select TOP 5 highest scoring topics
|
|
355
|
+
|
|
356
|
+
══════════════════════════════════════════════════════════════════
|
|
357
|
+
PHASE 4: PRESENT TOP 5 WITH FULL BREAKDOWN
|
|
358
|
+
══════════════════════════════════════════════════════════════════
|
|
359
|
+
|
|
360
|
+
Display in this format:
|
|
361
|
+
```
|
|
362
|
+
════════════════════════════════════════════════════════════════════
|
|
363
|
+
📡 TOP 5 TRENDING TOPICS (from {X} discovered)
|
|
364
|
+
════════════════════════════════════════════════════════════════════
|
|
365
|
+
|
|
366
|
+
🥇 [1] {TOPIC TITLE}
|
|
367
|
+
├─ 📊 SCORE: {final_score}/10 (R:{R} C:{C} E:{E} Comp:{Comp})
|
|
368
|
+
├─ 🕐 Recency: {when it broke - e.g., "Yesterday", "2 days ago"}
|
|
369
|
+
├─ 🔥 Hook: {One sentence on why it's trending}
|
|
370
|
+
├─ ⚔️ Conflict: {Who vs Who}
|
|
371
|
+
├─ 📺 YouTube: {X videos, highest has Y views}
|
|
372
|
+
└─ 🎯 Opportunity: {Low/Medium/High competition}
|
|
373
|
+
|
|
374
|
+
🥈 [2] {TOPIC TITLE}
|
|
375
|
+
... (same format)
|
|
376
|
+
|
|
377
|
+
🥉 [3] {TOPIC TITLE}
|
|
378
|
+
... (same format)
|
|
379
|
+
|
|
380
|
+
[4] {TOPIC TITLE}
|
|
381
|
+
... (same format)
|
|
382
|
+
|
|
383
|
+
[5] {TOPIC TITLE}
|
|
384
|
+
... (same format)
|
|
385
|
+
|
|
386
|
+
════════════════════════════════════════════════════════════════════
|
|
387
|
+
📈 Discovery Stats: Searched {X} sources, found {Y} potential topics,
|
|
388
|
+
filtered to TOP 5 by score.
|
|
389
|
+
════════════════════════════════════════════════════════════════════
|
|
390
|
+
|
|
391
|
+
Enter 1-5 to select a topic:
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
══════════════════════════════════════════════════════════════════
|
|
395
|
+
PHASE 5: USER SELECTION & PROJECT CREATION
|
|
396
|
+
══════════════════════════════════════════════════════════════════
|
|
397
|
+
|
|
398
|
+
Wait for user to pick 1-5.
|
|
399
|
+
|
|
400
|
+
**If MUST_CREATE_NEW_PROJECT = true:**
|
|
401
|
+
- Display "🆕 Creating new project for: {selected_topic}"
|
|
402
|
+
- AUTOMATICALLY jump to [NP] New Project flow
|
|
403
|
+
- Pre-fill topic, scope, country, region from earlier selections
|
|
404
|
+
- Continue from Audio Language step onwards
|
|
405
|
+
|
|
406
|
+
**If MUST_CREATE_NEW_PROJECT = false:**
|
|
407
|
+
- Continue to PHASE 6
|
|
408
|
+
|
|
409
|
+
══════════════════════════════════════════════════════════════════
|
|
410
|
+
PHASE 6: DEEP RESEARCH & 200-WORD BRIEF
|
|
411
|
+
══════════════════════════════════════════════════════════════════
|
|
412
|
+
|
|
413
|
+
For the selected topic:
|
|
414
|
+
1. Do additional focused research
|
|
415
|
+
2. Find 2-3 YouTube videos with captions
|
|
416
|
+
3. Identify key players, dates, controversy
|
|
417
|
+
4. Write 200-word executive summary
|
|
418
|
+
5. Save to `{output_folder}/topic_brief.md`
|
|
419
|
+
|
|
420
|
+
══════════════════════════════════════════════════════════════════
|
|
421
|
+
PHASE 7: CONFIRM AND NEXT
|
|
422
|
+
══════════════════════════════════════════════════════════════════
|
|
423
|
+
|
|
424
|
+
Display:
|
|
425
|
+
```
|
|
426
|
+
✅ Topic Brief saved to: {output_folder}/topic_brief.md
|
|
427
|
+
|
|
428
|
+
Ready to proceed to Prompt Agent? (/prompt)
|
|
429
|
+
[Y] Yes, go to Prompt Agent
|
|
430
|
+
[N] No, stay here
|
|
431
|
+
```
|
|
300
432
|
</handler>
|
|
301
433
|
|
|
302
434
|
<handler type="action">
|
|
303
435
|
If user selects [MT] Manual Topic Entry:
|
|
304
436
|
|
|
305
|
-
1. **MANDATORY: ASK NEW OR EXISTING FIRST**
|
|
306
|
-
Display menu asking:
|
|
307
|
-
[1] NEW PROJECT (Will create new folder + config)
|
|
308
|
-
[2] CURRENT PROJECT: {current_project}
|
|
309
|
-
|
|
310
|
-
If [1] NEW: Set MUST_CREATE_NEW_PROJECT = true, go to STEP 2
|
|
311
|
-
If [2] CURRENT: Set MUST_CREATE_NEW_PROJECT = false, skip to STEP 3
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
Ask International/National/Regional and set temp_scope, temp_country, temp_region.
|
|
315
|
-
|
|
316
|
-
|
|
437
|
+
1. **MANDATORY: ASK NEW OR EXISTING FIRST**
|
|
438
|
+
Display menu asking:
|
|
439
|
+
[1] NEW PROJECT (Will create new folder + config)
|
|
440
|
+
[2] CURRENT PROJECT: {current_project}
|
|
441
|
+
|
|
442
|
+
If [1] NEW: Set MUST_CREATE_NEW_PROJECT = true, go to STEP 2
|
|
443
|
+
If [2] CURRENT: Set MUST_CREATE_NEW_PROJECT = false, skip to STEP 3
|
|
444
|
+
|
|
445
|
+
2. **ASK FOR SCOPE (for NEW projects only):**
|
|
446
|
+
Ask International/National/Regional and set temp_scope, temp_country, temp_region.
|
|
447
|
+
|
|
448
|
+
3. **READ CONFIG (for existing projects):**
|
|
317
449
|
|
|
318
450
|
1. Ask: "Enter your topic:"
|
|
319
451
|
2. Research the topic using web search.
|
|
@@ -374,15 +506,15 @@ You must fully embody this agent's persona and follow all activation instruction
|
|
|
374
506
|
</handler>
|
|
375
507
|
</menu-handlers>
|
|
376
508
|
|
|
377
|
-
<rules>
|
|
378
|
-
<!-- MANDATORY CREATION RULES -->
|
|
379
|
-
<r>**CRITICAL:** [NP] = ALWAYS create new folder + update config. NO exceptions.</r>
|
|
380
|
-
<r>**CRITICAL:** [ST] with NEW = MUST create new folder after topic selection. NO optional prompts.</r>
|
|
381
|
-
<r>**CRITICAL:** NEVER search/research a topic without creating a project folder FIRST.</r>
|
|
382
|
-
<r>**CRITICAL:** NEVER let user proceed to other agents without valid current_project in config.</r>
|
|
383
|
-
<r>**CRITICAL:** ALWAYS verify folder exists on disk BEFORE saving any files.</r>
|
|
384
|
-
|
|
385
|
-
<!-- OWNERSHIP RULES -->
|
|
509
|
+
<rules>
|
|
510
|
+
<!-- MANDATORY CREATION RULES -->
|
|
511
|
+
<r>**CRITICAL:** [NP] = ALWAYS create new folder + update config. NO exceptions.</r>
|
|
512
|
+
<r>**CRITICAL:** [ST] with NEW = MUST create new folder after topic selection. NO optional prompts.</r>
|
|
513
|
+
<r>**CRITICAL:** NEVER search/research a topic without creating a project folder FIRST.</r>
|
|
514
|
+
<r>**CRITICAL:** NEVER let user proceed to other agents without valid current_project in config.</r>
|
|
515
|
+
<r>**CRITICAL:** ALWAYS verify folder exists on disk BEFORE saving any files.</r>
|
|
516
|
+
|
|
517
|
+
<!-- OWNERSHIP RULES -->
|
|
386
518
|
<r>**CRITICAL:** Topic Scout is the ONLY agent that creates projects and modifies config.yaml.</r>
|
|
387
519
|
<r>**CRITICAL:** All other agents READ config.yaml but NEVER modify it.</r>
|
|
388
520
|
<r>**CRITICAL:** All agents work in {projects_folder}/{current_project}/ - no other location.</r>
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "videonut",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.4",
|
|
4
4
|
"description": "AI-powered YouTube documentary production pipeline with 10 specialized agents for research, scripting, and asset management",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"youtube",
|
|
@@ -44,9 +44,9 @@
|
|
|
44
44
|
"docs/",
|
|
45
45
|
"memory/",
|
|
46
46
|
"scripts/",
|
|
47
|
-
".gemini/",
|
|
48
|
-
".qwen/",
|
|
49
|
-
".claude/",
|
|
47
|
+
".gemini/",
|
|
48
|
+
".qwen/",
|
|
49
|
+
".claude/",
|
|
50
50
|
".antigravity/",
|
|
51
51
|
"bin/",
|
|
52
52
|
"setup.js",
|
package/requirements.txt
CHANGED
|
@@ -43,7 +43,12 @@ def find_quote_in_page(page, quote):
|
|
|
43
43
|
locator = page.get_by_text(quote, exact=False)
|
|
44
44
|
if locator.count() > 0:
|
|
45
45
|
print(f" ✅ Found with Strategy 1")
|
|
46
|
-
|
|
46
|
+
# Convert Locator to ElementHandle for use with page.evaluate()
|
|
47
|
+
try:
|
|
48
|
+
return locator.first.element_handle(timeout=5000)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f" ⚠️ Could not get element handle: {e}")
|
|
51
|
+
pass
|
|
47
52
|
|
|
48
53
|
# Strategy 2: Try first few words (in case quote is long)
|
|
49
54
|
words = quote.split()
|
|
@@ -53,7 +58,12 @@ def find_quote_in_page(page, quote):
|
|
|
53
58
|
locator = page.get_by_text(short_quote, exact=False)
|
|
54
59
|
if locator.count() > 0:
|
|
55
60
|
print(f" ✅ Found with Strategy 2")
|
|
56
|
-
|
|
61
|
+
# Convert Locator to ElementHandle for use with page.evaluate()
|
|
62
|
+
try:
|
|
63
|
+
return locator.first.element_handle(timeout=5000)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
print(f" ⚠️ Could not get element handle: {e}")
|
|
66
|
+
pass
|
|
57
67
|
|
|
58
68
|
# Strategy 3: JavaScript search across all text nodes
|
|
59
69
|
print(f" Strategy 3: JavaScript deep search...")
|
|
@@ -73,8 +73,11 @@ if __name__ == "__main__":
|
|
|
73
73
|
|
|
74
74
|
# If the fallback path doesn't exist, warn the user
|
|
75
75
|
if not os.path.exists(default_ffmpeg):
|
|
76
|
-
print(f"Warning: ffmpeg not found in PATH or at expected location: {default_ffmpeg}")
|
|
77
|
-
print("Please install ffmpeg
|
|
76
|
+
print(f"⚠️ Warning: ffmpeg not found in PATH or at expected location: {default_ffmpeg}")
|
|
77
|
+
print(" Please install ffmpeg:")
|
|
78
|
+
print(" - Windows: Download from https://ffmpeg.org/download.html or run: winget install ffmpeg")
|
|
79
|
+
print(" - Or place ffmpeg.exe in the tools/bin/ directory")
|
|
80
|
+
print("")
|
|
78
81
|
|
|
79
82
|
parser.add_argument("--ffmpeg", default=default_ffmpeg, help="Path to ffmpeg executable")
|
|
80
83
|
|
|
@@ -3,7 +3,12 @@ import os
|
|
|
3
3
|
import argparse
|
|
4
4
|
import time
|
|
5
5
|
from random import uniform
|
|
6
|
-
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from playwright.sync_api import sync_playwright
|
|
9
|
+
except ImportError:
|
|
10
|
+
print("Error: Playwright not installed. Install with: pip install playwright && playwright install chromium")
|
|
11
|
+
sys.exit(1)
|
|
7
12
|
|
|
8
13
|
def take_screenshot(url, output_path):
|
|
9
14
|
# Add random delay to implement rate limiting
|
|
@@ -2,24 +2,19 @@
|
|
|
2
2
|
"""
|
|
3
3
|
YouTube Search Tool for VideoNut
|
|
4
4
|
Searches YouTube for videos matching a query and returns structured results.
|
|
5
|
-
Uses
|
|
5
|
+
Uses yt-dlp for reliable, actively maintained YouTube searching.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import sys
|
|
9
9
|
import argparse
|
|
10
10
|
import json
|
|
11
|
+
import subprocess
|
|
12
|
+
import re
|
|
11
13
|
from datetime import datetime
|
|
12
14
|
|
|
13
|
-
try:
|
|
14
|
-
from youtubesearchpython import VideosSearch, Video
|
|
15
|
-
except ImportError:
|
|
16
|
-
print("Error: youtube-search-python not installed. Install with: pip install youtube-search-python")
|
|
17
|
-
sys.exit(1)
|
|
18
|
-
|
|
19
|
-
|
|
20
15
|
def search_youtube(query, max_results=10, filter_year=None):
|
|
21
16
|
"""
|
|
22
|
-
Search YouTube for videos matching the query.
|
|
17
|
+
Search YouTube for videos matching the query using yt-dlp.
|
|
23
18
|
|
|
24
19
|
Args:
|
|
25
20
|
query: Search query string
|
|
@@ -30,47 +25,102 @@ def search_youtube(query, max_results=10, filter_year=None):
|
|
|
30
25
|
List of video dictionaries with title, url, duration, views, upload_date, channel
|
|
31
26
|
"""
|
|
32
27
|
try:
|
|
33
|
-
|
|
34
|
-
|
|
28
|
+
# Use yt-dlp to search YouTube
|
|
29
|
+
search_query = f"ytsearch{max_results * 2}:{query}" # Get extra to filter
|
|
30
|
+
|
|
31
|
+
cmd = [
|
|
32
|
+
"yt-dlp",
|
|
33
|
+
"--flat-playlist",
|
|
34
|
+
"--dump-json",
|
|
35
|
+
"--no-warnings",
|
|
36
|
+
"--ignore-errors",
|
|
37
|
+
search_query
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
result = subprocess.run(
|
|
41
|
+
cmd,
|
|
42
|
+
capture_output=True,
|
|
43
|
+
text=True,
|
|
44
|
+
timeout=60
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if result.returncode != 0 and not result.stdout:
|
|
48
|
+
print(f"Error: yt-dlp search failed", file=sys.stderr)
|
|
49
|
+
return []
|
|
35
50
|
|
|
36
51
|
videos = []
|
|
37
|
-
for
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
52
|
+
for line in result.stdout.strip().split('\n'):
|
|
53
|
+
if not line:
|
|
54
|
+
continue
|
|
55
|
+
try:
|
|
56
|
+
video = json.loads(line)
|
|
57
|
+
|
|
58
|
+
# Extract duration - yt-dlp provides it in seconds
|
|
59
|
+
duration_secs = video.get('duration')
|
|
60
|
+
if duration_secs:
|
|
61
|
+
mins, secs = divmod(int(duration_secs), 60)
|
|
62
|
+
hours, mins = divmod(mins, 60)
|
|
63
|
+
if hours > 0:
|
|
64
|
+
duration_str = f"{hours}:{mins:02d}:{secs:02d}"
|
|
65
|
+
else:
|
|
66
|
+
duration_str = f"{mins}:{secs:02d}"
|
|
67
|
+
else:
|
|
68
|
+
duration_str = "Unknown"
|
|
69
|
+
|
|
70
|
+
# Format view count
|
|
71
|
+
view_count = video.get('view_count')
|
|
72
|
+
if view_count:
|
|
73
|
+
if view_count >= 1000000:
|
|
74
|
+
views_str = f"{view_count/1000000:.1f}M views"
|
|
75
|
+
elif view_count >= 1000:
|
|
76
|
+
views_str = f"{view_count/1000:.1f}K views"
|
|
77
|
+
else:
|
|
78
|
+
views_str = f"{view_count} views"
|
|
79
|
+
else:
|
|
80
|
+
views_str = "Unknown"
|
|
81
|
+
|
|
82
|
+
video_data = {
|
|
83
|
+
'title': video.get('title', 'Unknown'),
|
|
84
|
+
'url': video.get('url') or f"https://www.youtube.com/watch?v={video.get('id', '')}",
|
|
85
|
+
'video_id': video.get('id', ''),
|
|
86
|
+
'duration': duration_str,
|
|
87
|
+
'duration_seconds': duration_secs,
|
|
88
|
+
'views': views_str,
|
|
89
|
+
'view_count': view_count,
|
|
90
|
+
'upload_date': video.get('upload_date', 'Unknown'),
|
|
91
|
+
'channel': video.get('channel') or video.get('uploader', 'Unknown'),
|
|
92
|
+
'description': (video.get('description') or '')[:200],
|
|
93
|
+
'thumbnail': video.get('thumbnail', '')
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
# Filter by year if specified
|
|
97
|
+
if filter_year:
|
|
98
|
+
upload_date = video_data['upload_date']
|
|
99
|
+
if upload_date and upload_date != 'Unknown':
|
|
100
|
+
# yt-dlp provides date as YYYYMMDD
|
|
101
|
+
try:
|
|
102
|
+
video_year = int(upload_date[:4])
|
|
103
|
+
if video_year != filter_year:
|
|
104
|
+
continue
|
|
105
|
+
except (ValueError, TypeError):
|
|
106
|
+
pass
|
|
107
|
+
|
|
67
108
|
videos.append(video_data)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
109
|
+
|
|
110
|
+
if len(videos) >= max_results:
|
|
111
|
+
break
|
|
112
|
+
|
|
113
|
+
except json.JSONDecodeError:
|
|
114
|
+
continue
|
|
71
115
|
|
|
72
116
|
return videos
|
|
73
117
|
|
|
118
|
+
except subprocess.TimeoutExpired:
|
|
119
|
+
print("Error: YouTube search timed out", file=sys.stderr)
|
|
120
|
+
return []
|
|
121
|
+
except FileNotFoundError:
|
|
122
|
+
print("Error: yt-dlp not found. Install with: pip install yt-dlp", file=sys.stderr)
|
|
123
|
+
return []
|
|
74
124
|
except Exception as e:
|
|
75
125
|
print(f"Error searching YouTube: {str(e)}", file=sys.stderr)
|
|
76
126
|
return []
|
|
@@ -102,18 +152,39 @@ def format_results(videos, output_format='text'):
|
|
|
102
152
|
|
|
103
153
|
|
|
104
154
|
def get_video_details(video_url):
|
|
105
|
-
"""Get detailed information about a specific video"""
|
|
155
|
+
"""Get detailed information about a specific video using yt-dlp"""
|
|
106
156
|
try:
|
|
107
|
-
|
|
157
|
+
cmd = [
|
|
158
|
+
"yt-dlp",
|
|
159
|
+
"--dump-json",
|
|
160
|
+
"--no-download",
|
|
161
|
+
"--no-warnings",
|
|
162
|
+
video_url
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
result = subprocess.run(
|
|
166
|
+
cmd,
|
|
167
|
+
capture_output=True,
|
|
168
|
+
text=True,
|
|
169
|
+
timeout=30
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if result.returncode != 0:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
video_info = json.loads(result.stdout)
|
|
176
|
+
|
|
177
|
+
duration_secs = video_info.get('duration', 0)
|
|
178
|
+
|
|
108
179
|
return {
|
|
109
180
|
'title': video_info.get('title', 'Unknown'),
|
|
110
|
-
'duration_seconds':
|
|
111
|
-
'views': video_info.get('
|
|
112
|
-
'upload_date': video_info.get('
|
|
113
|
-
'channel': video_info.get('channel'
|
|
114
|
-
'description': video_info.get('description'
|
|
115
|
-
'is_live': video_info.get('
|
|
116
|
-
'category': video_info.get('
|
|
181
|
+
'duration_seconds': duration_secs,
|
|
182
|
+
'views': video_info.get('view_count', 'Unknown'),
|
|
183
|
+
'upload_date': video_info.get('upload_date', 'Unknown'),
|
|
184
|
+
'channel': video_info.get('channel') or video_info.get('uploader', 'Unknown'),
|
|
185
|
+
'description': (video_info.get('description') or '')[:500],
|
|
186
|
+
'is_live': video_info.get('is_live', False),
|
|
187
|
+
'category': video_info.get('categories', ['Unknown'])[0] if video_info.get('categories') else 'Unknown'
|
|
117
188
|
}
|
|
118
189
|
except Exception as e:
|
|
119
190
|
print(f"Error getting video details: {str(e)}", file=sys.stderr)
|
|
@@ -122,7 +193,7 @@ def get_video_details(video_url):
|
|
|
122
193
|
|
|
123
194
|
def main():
|
|
124
195
|
parser = argparse.ArgumentParser(
|
|
125
|
-
description="Search YouTube for videos. Returns video titles, URLs, and metadata.",
|
|
196
|
+
description="Search YouTube for videos using yt-dlp. Returns video titles, URLs, and metadata.",
|
|
126
197
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
127
198
|
epilog="""
|
|
128
199
|
Examples:
|
|
@@ -20,13 +20,13 @@ def check_link(url):
|
|
|
20
20
|
'Upgrade-Insecure-Requests': '1',
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
-
response = requests.head(url, headers=headers, timeout=
|
|
23
|
+
response = requests.head(url, headers=headers, timeout=15, allow_redirects=True)
|
|
24
24
|
|
|
25
25
|
if response.status_code == 200:
|
|
26
26
|
return True, "OK"
|
|
27
27
|
else:
|
|
28
28
|
# Retry with GET if HEAD fails (some servers block HEAD)
|
|
29
|
-
response = requests.get(url, headers=headers, timeout=
|
|
29
|
+
response = requests.get(url, headers=headers, timeout=15, stream=True)
|
|
30
30
|
if response.status_code == 200:
|
|
31
31
|
return True, "OK"
|
|
32
32
|
return False, f"Status Code: {response.status_code}"
|