@biggora/claude-plugins 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +13 -0
- package/CLAUDE.md +55 -0
- package/LICENSE +1 -1
- package/README.md +208 -39
- package/bin/cli.js +39 -0
- package/package.json +30 -17
- package/registry/registry.json +166 -1
- package/registry/schema.json +10 -0
- package/src/commands/skills/add.js +194 -0
- package/src/commands/skills/list.js +52 -0
- package/src/commands/skills/remove.js +27 -0
- package/src/commands/skills/update.js +74 -0
- package/src/config.js +5 -0
- package/src/skills/codex-cli/SKILL.md +265 -0
- package/src/skills/commafeed-api/SKILL.md +1012 -0
- package/src/skills/gemini-cli/SKILL.md +379 -0
- package/src/skills/gemini-cli/references/commands.md +145 -0
- package/src/skills/gemini-cli/references/configuration.md +182 -0
- package/src/skills/gemini-cli/references/headless-and-scripting.md +181 -0
- package/src/skills/gemini-cli/references/mcp-and-extensions.md +254 -0
- package/src/skills/n8n-api/SKILL.md +623 -0
- package/src/skills/notebook-lm/SKILL.md +217 -0
- package/src/skills/notebook-lm/references/artifact-options.md +168 -0
- package/src/skills/notebook-lm/references/auth.md +58 -0
- package/src/skills/notebook-lm/references/workflows.md +144 -0
- package/src/skills/screen-recording/SKILL.md +309 -0
- package/src/skills/screen-recording/references/approach1-programmatic.md +311 -0
- package/src/skills/screen-recording/references/approach2-xvfb.md +232 -0
- package/src/skills/screen-recording/references/design-patterns.md +168 -0
- package/src/skills/test-mobile-app/SKILL.md +212 -0
- package/src/skills/test-mobile-app/references/report-template.md +95 -0
- package/src/skills/test-mobile-app/references/setup-appium.md +154 -0
- package/src/skills/test-mobile-app/scripts/analyze_apk.py +164 -0
- package/src/skills/test-mobile-app/scripts/check_environment.py +116 -0
- package/src/skills/test-mobile-app/scripts/generate_report.py +250 -0
- package/src/skills/test-mobile-app/scripts/run_tests.py +326 -0
- package/src/skills/test-web-ui/SKILL.md +232 -0
- package/src/skills/test-web-ui/references/test_case_schema.md +102 -0
- package/src/skills/test-web-ui/scripts/discover.py +176 -0
- package/src/skills/test-web-ui/scripts/generate_report.py +237 -0
- package/src/skills/test-web-ui/scripts/run_tests.py +296 -0
- package/src/skills/text-to-speech/SKILL.md +236 -0
- package/src/skills/text-to-speech/references/espeak-cli.md +277 -0
- package/src/skills/text-to-speech/references/kokoro-onnx.md +124 -0
- package/src/skills/text-to-speech/references/online-engines.md +128 -0
- package/src/skills/text-to-speech/references/pyttsx3-espeak.md +143 -0
- package/src/skills/tm-search/SKILL.md +240 -0
- package/src/skills/tm-search/references/field-guide.md +79 -0
- package/src/skills/tm-search/references/scraping-fallback.md +140 -0
- package/src/skills/tm-search/scripts/tm_search.py +375 -0
- package/src/skills/wp-rest-api/SKILL.md +114 -0
- package/src/skills/wp-rest-api/references/authentication.md +18 -0
- package/src/skills/wp-rest-api/references/custom-content-types.md +20 -0
- package/src/skills/wp-rest-api/references/discovery-and-params.md +20 -0
- package/src/skills/wp-rest-api/references/responses-and-fields.md +30 -0
- package/src/skills/wp-rest-api/references/routes-and-endpoints.md +36 -0
- package/src/skills/wp-rest-api/references/schema.md +22 -0
- package/src/skills/youtube-search/SKILL.md +412 -0
- package/src/skills/youtube-search/references/parsing-examples.md +159 -0
- package/src/skills/youtube-search/references/youtube-api-quota.md +85 -0
- package/src/skills/youtube-thumbnail/SKILL.md +1060 -0
- package/tests/commands/info.test.js +49 -0
- package/tests/commands/install.test.js +36 -0
- package/tests/commands/list.test.js +66 -0
- package/tests/commands/publish.test.js +182 -0
- package/tests/commands/search.test.js +45 -0
- package/tests/commands/uninstall.test.js +29 -0
- package/tests/commands/update.test.js +59 -0
- package/tests/functional/skills-lifecycle.test.js +293 -0
- package/tests/helpers/fixtures.js +63 -0
- package/tests/integration/cli.test.js +83 -0
- package/tests/skills/add.test.js +138 -0
- package/tests/skills/list.test.js +63 -0
- package/tests/skills/remove.test.js +38 -0
- package/tests/skills/update.test.js +60 -0
- package/tests/unit/config.test.js +31 -0
- package/tests/unit/registry.test.js +79 -0
- package/tests/unit/utils.test.js +150 -0
- package/tests/validation/registry-schema.test.js +112 -0
- package/tests/validation/skills-validation.test.js +96 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: youtube-search
|
|
3
|
+
description: "Search YouTube for videos, channels, and playlists — and extract rich metadata, transcripts, and analytics — fully autonomously without user intervention. Use this skill whenever the user mentions finding YouTube videos, searching YouTube, getting video stats, extracting transcripts or subtitles, analyzing a YouTube channel, looking up what's trending on YouTube, finding competitor videos, researching YouTube content, or automating any YouTube data collection workflow. Trigger even for indirect requests like 'find the top videos about X' or 'what's on YouTube about Y' — if YouTube content retrieval is involved in any way, use this skill."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# YouTube Search Skill
|
|
7
|
+
|
|
8
|
+
Autonomous YouTube data retrieval for agents. No user intervention required.
|
|
9
|
+
|
|
10
|
+
## Method Selection Guide
|
|
11
|
+
|
|
12
|
+
Choose based on what's configured in the project environment:
|
|
13
|
+
|
|
14
|
+
| Situation | Best Method |
|
|
15
|
+
|---------------------------------|---|
|
|
16
|
+
| Deep scraping needed (default) | **Method E** – `yt-dlp` (environment-dependent) |
|
|
17
|
+
| No API keys available | **Method A** – `web_search` built-in tool |
|
|
18
|
+
| `YOUTUBE_API_KEY` set | **Method B** – YouTube Data API v3 (richest data) |
|
|
19
|
+
| `SERPAPI_KEY` set | **Method C** – SerpAPI YouTube engine |
|
|
20
|
+
| Video ID known, need transcript | **Method D** – `youtube-transcript-api` |
|
|
21
|
+
|
|
22
|
+
**Start with Method A** if you're unsure — it requires nothing and always works.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Method A: web_search tool (Zero Setup — Always Available)
|
|
27
|
+
|
|
28
|
+
Use the built-in `web_search` tool. Works without any API keys.
|
|
29
|
+
|
|
30
|
+
### Video Search
|
|
31
|
+
```
|
|
32
|
+
web_search("site:youtube.com <your query>")
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Channel Search
|
|
36
|
+
```
|
|
37
|
+
web_search("site:youtube.com/channel <channel name> OR site:youtube.com/@<handle>")
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Advanced Filters via Query
|
|
41
|
+
```
|
|
42
|
+
# Recent videos (last year)
|
|
43
|
+
web_search("site:youtube.com <query> 2024 OR 2025")
|
|
44
|
+
|
|
45
|
+
# Tutorial videos
|
|
46
|
+
web_search("site:youtube.com <topic> tutorial OR guide OR обзор")
|
|
47
|
+
|
|
48
|
+
# Specific language
|
|
49
|
+
web_search("site:youtube.com <query> на русском")
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### What you get from web_search
|
|
53
|
+
- Video title
|
|
54
|
+
- Channel name
|
|
55
|
+
- URL (extract video ID: `youtube.com/watch?v=VIDEO_ID`)
|
|
56
|
+
- Snippet/description excerpt
|
|
57
|
+
- Sometimes view count and publish date (in snippet)
|
|
58
|
+
|
|
59
|
+
### Extract Video ID from URL
|
|
60
|
+
```python
|
|
61
|
+
import re
|
|
62
|
+
url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
|
63
|
+
video_id = re.search(r'v=([^&]+)', url).group(1)
|
|
64
|
+
# or from youtu.be links:
|
|
65
|
+
video_id = re.search(r'youtu\.be/([^?]+)', url).group(1)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Limitation:** No structured JSON, metadata is text-parsed. For richer data, use Method B.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Method B: YouTube Data API v3 (Recommended for Production)
|
|
73
|
+
|
|
74
|
+
**Requires:** `YOUTUBE_API_KEY` environment variable (free, 10,000 units/day quota).
|
|
75
|
+
Get key: https://console.cloud.google.com → Enable "YouTube Data API v3" → Create API key.
|
|
76
|
+
|
|
77
|
+
### Search Videos
|
|
78
|
+
```python
|
|
79
|
+
import requests, os
|
|
80
|
+
|
|
81
|
+
API_KEY = os.environ.get("YOUTUBE_API_KEY")
|
|
82
|
+
BASE = "https://www.googleapis.com/youtube/v3"
|
|
83
|
+
|
|
84
|
+
def youtube_search(query, max_results=10, order="relevance",
|
|
85
|
+
video_duration=None, published_after=None, lang=None):
|
|
86
|
+
"""
|
|
87
|
+
order: relevance | date | viewCount | rating | title
|
|
88
|
+
video_duration: short (<4min) | medium (4-20min) | long (>20min)
|
|
89
|
+
published_after: ISO 8601 e.g. "2024-01-01T00:00:00Z"
|
|
90
|
+
lang: ISO 639-1 e.g. "ru", "en"
|
|
91
|
+
"""
|
|
92
|
+
params = {
|
|
93
|
+
"part": "snippet",
|
|
94
|
+
"q": query,
|
|
95
|
+
"maxResults": max_results,
|
|
96
|
+
"type": "video",
|
|
97
|
+
"order": order,
|
|
98
|
+
"key": API_KEY,
|
|
99
|
+
}
|
|
100
|
+
if video_duration:
|
|
101
|
+
params["videoDuration"] = video_duration
|
|
102
|
+
if published_after:
|
|
103
|
+
params["publishedAfter"] = published_after
|
|
104
|
+
if lang:
|
|
105
|
+
params["relevanceLanguage"] = lang
|
|
106
|
+
|
|
107
|
+
r = requests.get(f"{BASE}/search", params=params)
|
|
108
|
+
r.raise_for_status()
|
|
109
|
+
items = r.json().get("items", [])
|
|
110
|
+
|
|
111
|
+
return [{
|
|
112
|
+
"video_id": item["id"]["videoId"],
|
|
113
|
+
"title": item["snippet"]["title"],
|
|
114
|
+
"channel": item["snippet"]["channelTitle"],
|
|
115
|
+
"channel_id": item["snippet"]["channelId"],
|
|
116
|
+
"description": item["snippet"]["description"],
|
|
117
|
+
"published_at": item["snippet"]["publishedAt"],
|
|
118
|
+
"thumbnail": item["snippet"]["thumbnails"]["high"]["url"],
|
|
119
|
+
"url": f"https://youtube.com/watch?v={item['id']['videoId']}"
|
|
120
|
+
} for item in items if item["id"].get("videoId")]
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Get Video Statistics (views, likes, duration)
|
|
124
|
+
```python
|
|
125
|
+
def get_video_stats(video_ids: list):
|
|
126
|
+
"""Pass list of video IDs, get stats back. Costs 1 quota unit per call."""
|
|
127
|
+
ids = ",".join(video_ids[:50]) # max 50 per request
|
|
128
|
+
params = {
|
|
129
|
+
"part": "statistics,contentDetails,snippet",
|
|
130
|
+
"id": ids,
|
|
131
|
+
"key": API_KEY,
|
|
132
|
+
}
|
|
133
|
+
r = requests.get(f"{BASE}/videos", params=params)
|
|
134
|
+
r.raise_for_status()
|
|
135
|
+
|
|
136
|
+
results = []
|
|
137
|
+
for item in r.json().get("items", []):
|
|
138
|
+
stats = item.get("statistics", {})
|
|
139
|
+
content = item.get("contentDetails", {})
|
|
140
|
+
results.append({
|
|
141
|
+
"video_id": item["id"],
|
|
142
|
+
"title": item["snippet"]["title"],
|
|
143
|
+
"views": int(stats.get("viewCount", 0)),
|
|
144
|
+
"likes": int(stats.get("likeCount", 0)),
|
|
145
|
+
"comments": int(stats.get("commentCount", 0)),
|
|
146
|
+
"duration_iso": content.get("duration"), # e.g. "PT5M30S"
|
|
147
|
+
"tags": item["snippet"].get("tags", []),
|
|
148
|
+
})
|
|
149
|
+
return results
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Parse ISO 8601 Duration
|
|
153
|
+
```python
|
|
154
|
+
import re
|
|
155
|
+
def parse_duration(iso_duration):
|
|
156
|
+
"""Convert PT5M30S → 330 seconds"""
|
|
157
|
+
match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', iso_duration)
|
|
158
|
+
if not match: return 0
|
|
159
|
+
h, m, s = [int(x or 0) for x in match.groups()]
|
|
160
|
+
return h * 3600 + m * 60 + s
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Channel Search & Stats
|
|
164
|
+
```python
|
|
165
|
+
def search_channel(channel_name, max_results=5):
|
|
166
|
+
params = {
|
|
167
|
+
"part": "snippet",
|
|
168
|
+
"q": channel_name,
|
|
169
|
+
"type": "channel",
|
|
170
|
+
"maxResults": max_results,
|
|
171
|
+
"key": API_KEY,
|
|
172
|
+
}
|
|
173
|
+
r = requests.get(f"{BASE}/search", params=params)
|
|
174
|
+
channel_ids = [item["id"]["channelId"] for item in r.json().get("items", [])]
|
|
175
|
+
|
|
176
|
+
# Get channel stats
|
|
177
|
+
params2 = {"part": "statistics,snippet", "id": ",".join(channel_ids), "key": API_KEY}
|
|
178
|
+
r2 = requests.get(f"{BASE}/channels", params=params2)
|
|
179
|
+
return [{
|
|
180
|
+
"channel_id": ch["id"],
|
|
181
|
+
"name": ch["snippet"]["title"],
|
|
182
|
+
"subscribers": int(ch["statistics"].get("subscriberCount", 0)),
|
|
183
|
+
"total_views": int(ch["statistics"].get("viewCount", 0)),
|
|
184
|
+
"video_count": int(ch["statistics"].get("videoCount", 0)),
|
|
185
|
+
"url": f"https://youtube.com/channel/{ch['id']}"
|
|
186
|
+
} for ch in r2.json().get("items", [])]
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Quota Costs (10,000 units/day free)
|
|
190
|
+
| Operation | Cost |
|
|
191
|
+
|---|---|
|
|
192
|
+
| search.list | 100 units |
|
|
193
|
+
| videos.list (stats) | 1 unit |
|
|
194
|
+
| channels.list | 1 unit |
|
|
195
|
+
| playlists.list | 1 unit |
|
|
196
|
+
|
|
197
|
+
**Tip:** Search = 100 units. Get stats for 50 videos = 1 unit. Always batch `videos.list` calls.
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Method C: SerpAPI (Structured Scraping, No Quota Issues)
|
|
202
|
+
|
|
203
|
+
**Requires:** `SERPAPI_KEY` environment variable.
|
|
204
|
+
Free tier: 100 searches/month. Paid plans available.
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
import requests, os
|
|
208
|
+
|
|
209
|
+
def serpapi_youtube_search(query, max_results=10, lang="ru"):
|
|
210
|
+
params = {
|
|
211
|
+
"engine": "youtube",
|
|
212
|
+
"search_query": query,
|
|
213
|
+
"api_key": os.environ.get("SERPAPI_KEY"),
|
|
214
|
+
"hl": lang, # interface language
|
|
215
|
+
}
|
|
216
|
+
r = requests.get("https://serpapi.com/search", params=params)
|
|
217
|
+
r.raise_for_status()
|
|
218
|
+
|
|
219
|
+
results = []
|
|
220
|
+
for item in r.json().get("video_results", [])[:max_results]:
|
|
221
|
+
results.append({
|
|
222
|
+
"title": item.get("title"),
|
|
223
|
+
"video_id": item.get("id") or item.get("link", "").split("v=")[-1],
|
|
224
|
+
"url": item.get("link"),
|
|
225
|
+
"channel": item.get("channel", {}).get("name"),
|
|
226
|
+
"views": item.get("views"),
|
|
227
|
+
"duration": item.get("length"),
|
|
228
|
+
"published": item.get("published_date"),
|
|
229
|
+
"description": item.get("description"),
|
|
230
|
+
"thumbnail": item.get("thumbnail", {}).get("static"),
|
|
231
|
+
})
|
|
232
|
+
return results
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
**Advantage over YouTube API:** Returns views, duration, publish date directly from search — no extra API calls needed.
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## Method D: youtube-transcript-api (Transcripts by Video ID)
|
|
240
|
+
|
|
241
|
+
**Requires:** `pip install youtube-transcript-api --break-system-packages`
|
|
242
|
+
No API key needed.
|
|
243
|
+
|
|
244
|
+
```python
|
|
245
|
+
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
|
246
|
+
|
|
247
|
+
def get_transcript(video_id, languages=["ru", "en"]):
|
|
248
|
+
"""
|
|
249
|
+
Returns full transcript as string.
|
|
250
|
+
languages: preference order, falls back to auto-generated.
|
|
251
|
+
"""
|
|
252
|
+
try:
|
|
253
|
+
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
|
254
|
+
|
|
255
|
+
# Try preferred languages first
|
|
256
|
+
try:
|
|
257
|
+
transcript = transcript_list.find_transcript(languages)
|
|
258
|
+
except NoTranscriptFound:
|
|
259
|
+
# Fall back to any available
|
|
260
|
+
transcript = transcript_list.find_generated_transcript(
|
|
261
|
+
transcript_list._generated_transcripts.keys()
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
entries = transcript.fetch()
|
|
265
|
+
full_text = " ".join([e["text"] for e in entries])
|
|
266
|
+
return {
|
|
267
|
+
"video_id": video_id,
|
|
268
|
+
"language": transcript.language_code,
|
|
269
|
+
"is_generated": transcript.is_generated,
|
|
270
|
+
"text": full_text,
|
|
271
|
+
"entries": entries # list of {text, start, duration}
|
|
272
|
+
}
|
|
273
|
+
except TranscriptsDisabled:
|
|
274
|
+
return {"error": "Transcripts disabled for this video"}
|
|
275
|
+
except Exception as e:
|
|
276
|
+
return {"error": str(e)}
|
|
277
|
+
|
|
278
|
+
def get_available_languages(video_id):
|
|
279
|
+
"""List all available transcript languages for a video."""
|
|
280
|
+
tl = YouTubeTranscriptApi.list_transcripts(video_id)
|
|
281
|
+
return [{"code": t.language_code, "name": t.language, "generated": t.is_generated}
|
|
282
|
+
for t in tl]
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
**Use case:** After finding video IDs via Method A or B, extract full text content for analysis, summarization, or content research.
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Method E: yt-dlp (Deep Metadata + Transcripts)
|
|
290
|
+
|
|
291
|
+
**Requires:** `pip install yt-dlp --break-system-packages`
|
|
292
|
+
No API key. May be blocked in sandboxed environments — test first.
|
|
293
|
+
|
|
294
|
+
```python
|
|
295
|
+
import yt_dlp, json
|
|
296
|
+
|
|
297
|
+
def ytdlp_search(query, max_results=10):
|
|
298
|
+
"""Search YouTube with yt-dlp. Returns rich metadata."""
|
|
299
|
+
ydl_opts = {
|
|
300
|
+
"quiet": True,
|
|
301
|
+
"no_warnings": True,
|
|
302
|
+
"extract_flat": True,
|
|
303
|
+
}
|
|
304
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
305
|
+
results = ydl.extract_info(f"ytsearch{max_results}:{query}", download=False)
|
|
306
|
+
|
|
307
|
+
return [{
|
|
308
|
+
"title": v.get("title"),
|
|
309
|
+
"video_id": v.get("id"),
|
|
310
|
+
"url": f"https://youtube.com/watch?v={v.get('id')}",
|
|
311
|
+
"duration": v.get("duration"),
|
|
312
|
+
"view_count": v.get("view_count"),
|
|
313
|
+
"channel": v.get("channel"),
|
|
314
|
+
"upload_date": v.get("upload_date"),
|
|
315
|
+
} for v in results.get("entries", []) if v]
|
|
316
|
+
|
|
317
|
+
def ytdlp_get_video_info(video_url):
|
|
318
|
+
"""Get full metadata for a single video."""
|
|
319
|
+
ydl_opts = {"quiet": True, "no_warnings": True}
|
|
320
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
321
|
+
info = ydl.extract_info(video_url, download=False)
|
|
322
|
+
return info
|
|
323
|
+
|
|
324
|
+
def ytdlp_get_subtitles(video_url, lang="ru"):
|
|
325
|
+
"""Download and return subtitle text."""
|
|
326
|
+
import tempfile, os
|
|
327
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
328
|
+
ydl_opts = {
|
|
329
|
+
"quiet": True,
|
|
330
|
+
"writesubtitles": True,
|
|
331
|
+
"writeautomaticsub": True,
|
|
332
|
+
"subtitleslangs": [lang, "en"],
|
|
333
|
+
"skip_download": True,
|
|
334
|
+
"outtmpl": f"{tmpdir}/%(id)s.%(ext)s",
|
|
335
|
+
}
|
|
336
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
337
|
+
ydl.download([video_url])
|
|
338
|
+
|
|
339
|
+
for f in os.listdir(tmpdir):
|
|
340
|
+
if f.endswith(".vtt") or f.endswith(".srt"):
|
|
341
|
+
return open(os.path.join(tmpdir, f)).read()
|
|
342
|
+
return None
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
**Note:** yt-dlp makes direct requests to YouTube — may be blocked in restricted network environments. Always test with a quick `yt-dlp --version` call first.
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
349
|
+
## Recommended Workflow for Automation Projects
|
|
350
|
+
|
|
351
|
+
### Pattern 1: Competitor/Topic Research
|
|
352
|
+
```python
|
|
353
|
+
# 1. Search for videos
|
|
354
|
+
results = youtube_search("AI сервисы обзор", max_results=20,
|
|
355
|
+
order="viewCount", lang="ru")
|
|
356
|
+
|
|
357
|
+
# 2. Enrich with stats
|
|
358
|
+
video_ids = [v["video_id"] for v in results]
|
|
359
|
+
stats = get_video_stats(video_ids)
|
|
360
|
+
|
|
361
|
+
# 3. Get transcripts for top videos
|
|
362
|
+
top_videos = sorted(stats, key=lambda x: x["views"], reverse=True)[:5]
|
|
363
|
+
for v in top_videos:
|
|
364
|
+
transcript = get_transcript(v["video_id"], languages=["ru"])
|
|
365
|
+
# analyze, summarize, extract keywords...
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
### Pattern 2: Zero-Config Content Discovery (web_search only)
|
|
369
|
+
```python
|
|
370
|
+
# No setup required - use built-in web_search tool
|
|
371
|
+
# web_search("site:youtube.com AI сервисы обзор 2025")
|
|
372
|
+
# Parse results, extract video IDs, then use transcript API if needed
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### Pattern 3: Channel Monitoring
|
|
376
|
+
```python
|
|
377
|
+
# Find channel
|
|
378
|
+
channels = search_channel("название канала")
|
|
379
|
+
channel_id = channels[0]["channel_id"]
|
|
380
|
+
|
|
381
|
+
# Get latest videos from channel
|
|
382
|
+
params = {
|
|
383
|
+
"part": "snippet",
|
|
384
|
+
"channelId": channel_id,
|
|
385
|
+
"order": "date",
|
|
386
|
+
"maxResults": 10,
|
|
387
|
+
"type": "video",
|
|
388
|
+
"key": API_KEY,
|
|
389
|
+
}
|
|
390
|
+
r = requests.get(f"{BASE}/search", params=params)
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
---
|
|
394
|
+
|
|
395
|
+
## Environment Setup Checklist
|
|
396
|
+
|
|
397
|
+
```bash
|
|
398
|
+
# Required for Method B (YouTube Data API)
|
|
399
|
+
export YOUTUBE_API_KEY="AIza..."
|
|
400
|
+
|
|
401
|
+
# Required for Method C (SerpAPI)
|
|
402
|
+
export SERPAPI_KEY="..."
|
|
403
|
+
|
|
404
|
+
# Required for Methods D & E (Python libraries)
|
|
405
|
+
pip install youtube-transcript-api yt-dlp --break-system-packages
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
---
|
|
409
|
+
|
|
410
|
+
## See Also
|
|
411
|
+
- `references/youtube-api-quota.md` — Quota optimization strategies
|
|
412
|
+
- `references/parsing-examples.md` — Real-world parsing examples for Russian-language content
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# Parsing Examples — Russian YouTube Content
|
|
2
|
+
|
|
3
|
+
## Example 1: Find top Russian AI channels (Method B)
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import requests, os
|
|
7
|
+
|
|
8
|
+
API_KEY = os.environ.get("YOUTUBE_API_KEY")
|
|
9
|
+
BASE = "https://www.googleapis.com/youtube/v3"
|
|
10
|
+
|
|
11
|
+
queries = ["AI сервисы обзор", "нейросети для работы", "искусственный интеллект обзор"]
|
|
12
|
+
all_channels = {}
|
|
13
|
+
|
|
14
|
+
for q in queries:
|
|
15
|
+
r = requests.get(f"{BASE}/search", params={
|
|
16
|
+
"part": "snippet",
|
|
17
|
+
"q": q,
|
|
18
|
+
"type": "video",
|
|
19
|
+
"maxResults": 20,
|
|
20
|
+
"relevanceLanguage": "ru",
|
|
21
|
+
"order": "viewCount",
|
|
22
|
+
"key": API_KEY,
|
|
23
|
+
})
|
|
24
|
+
for item in r.json().get("items", []):
|
|
25
|
+
ch_id = item["snippet"]["channelId"]
|
|
26
|
+
ch_name = item["snippet"]["channelTitle"]
|
|
27
|
+
if ch_id not in all_channels:
|
|
28
|
+
all_channels[ch_id] = {"name": ch_name, "video_count": 0}
|
|
29
|
+
all_channels[ch_id]["video_count"] += 1
|
|
30
|
+
|
|
31
|
+
# Get subscriber counts for top channels
|
|
32
|
+
top_ids = list(all_channels.keys())[:50]
|
|
33
|
+
r2 = requests.get(f"{BASE}/channels", params={
|
|
34
|
+
"part": "statistics",
|
|
35
|
+
"id": ",".join(top_ids),
|
|
36
|
+
"key": API_KEY,
|
|
37
|
+
})
|
|
38
|
+
for ch in r2.json().get("items", []):
|
|
39
|
+
all_channels[ch["id"]]["subscribers"] = int(
|
|
40
|
+
ch["statistics"].get("subscriberCount", 0)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Sort by subscribers
|
|
44
|
+
ranked = sorted(all_channels.values(), key=lambda x: x.get("subscribers", 0), reverse=True)
|
|
45
|
+
for i, ch in enumerate(ranked[:10], 1):
|
|
46
|
+
print(f"{i}. {ch['name']} — {ch.get('subscribers', 0):,} подписчиков")
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Example 2: Video Research for Affiliate Review (Method A + D)
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
# Step 1: Find videos via web_search (no API key)
|
|
53
|
+
# web_search("site:youtube.com midjourney обзор 2025 на русском")
|
|
54
|
+
# → Returns list of YouTube URLs
|
|
55
|
+
|
|
56
|
+
# Step 2: Extract video IDs
|
|
57
|
+
import re
|
|
58
|
+
urls = [
|
|
59
|
+
"https://www.youtube.com/watch?v=ABC123",
|
|
60
|
+
"https://youtu.be/XYZ789",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
def extract_id(url):
|
|
64
|
+
m = re.search(r'(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})', url)
|
|
65
|
+
return m.group(1) if m else None
|
|
66
|
+
|
|
67
|
+
video_ids = [extract_id(u) for u in urls if extract_id(u)]
|
|
68
|
+
|
|
69
|
+
# Step 3: Get transcripts
|
|
70
|
+
from youtube_transcript_api import YouTubeTranscriptApi
|
|
71
|
+
|
|
72
|
+
for vid_id in video_ids:
|
|
73
|
+
try:
|
|
74
|
+
entries = YouTubeTranscriptApi.get_transcript(vid_id, languages=["ru", "en"])
|
|
75
|
+
text = " ".join([e["text"] for e in entries])
|
|
76
|
+
# Now pass to Claude for summarization or keyword extraction
|
|
77
|
+
print(f"Video {vid_id}: {len(text)} chars")
|
|
78
|
+
except Exception as e:
|
|
79
|
+
print(f"Video {vid_id}: {e}")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Example 3: Trending Videos Last 7 Days (Method B)
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from datetime import datetime, timedelta
|
|
86
|
+
|
|
87
|
+
week_ago = (datetime.utcnow() - timedelta(days=7)).isoformat() + "Z"
|
|
88
|
+
|
|
89
|
+
results = []
|
|
90
|
+
for query in ["ChatGPT", "Midjourney", "Stable Diffusion", "Claude AI", "Sora"]:
|
|
91
|
+
r = requests.get(f"{BASE}/search", params={
|
|
92
|
+
"part": "snippet",
|
|
93
|
+
"q": f"{query} обзор OR туториал OR как использовать",
|
|
94
|
+
"type": "video",
|
|
95
|
+
"maxResults": 5,
|
|
96
|
+
"relevanceLanguage": "ru",
|
|
97
|
+
"publishedAfter": week_ago,
|
|
98
|
+
"order": "viewCount",
|
|
99
|
+
"key": API_KEY,
|
|
100
|
+
})
|
|
101
|
+
results.extend(r.json().get("items", []))
|
|
102
|
+
|
|
103
|
+
# Get stats for all found videos in one batch call (1 quota unit!)
|
|
104
|
+
ids = [item["id"]["videoId"] for item in results if item["id"].get("videoId")]
|
|
105
|
+
stats_r = requests.get(f"{BASE}/videos", params={
|
|
106
|
+
"part": "statistics,contentDetails",
|
|
107
|
+
"id": ",".join(ids),
|
|
108
|
+
"fields": "items(id,statistics(viewCount,likeCount),contentDetails(duration))",
|
|
109
|
+
"key": API_KEY,
|
|
110
|
+
})
|
|
111
|
+
|
|
112
|
+
stats_map = {
|
|
113
|
+
item["id"]: item
|
|
114
|
+
for item in stats_r.json().get("items", [])
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# Combine and sort
|
|
118
|
+
combined = []
|
|
119
|
+
for item in results:
|
|
120
|
+
vid_id = item["id"].get("videoId")
|
|
121
|
+
if vid_id and vid_id in stats_map:
|
|
122
|
+
combined.append({
|
|
123
|
+
"title": item["snippet"]["title"],
|
|
124
|
+
"channel": item["snippet"]["channelTitle"],
|
|
125
|
+
"url": f"https://youtube.com/watch?v={vid_id}",
|
|
126
|
+
"views": int(stats_map[vid_id]["statistics"].get("viewCount", 0)),
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
for v in sorted(combined, key=lambda x: x["views"], reverse=True)[:10]:
|
|
130
|
+
print(f"{v['views']:,} просмотров — {v['title']} ({v['channel']})")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Example 4: SerpAPI for Rich Structured Data (Method C)
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
import requests, os
|
|
137
|
+
|
|
138
|
+
def search_youtube_serp(query, num=10):
|
|
139
|
+
r = requests.get("https://serpapi.com/search", params={
|
|
140
|
+
"engine": "youtube",
|
|
141
|
+
"search_query": query,
|
|
142
|
+
"api_key": os.environ.get("SERPAPI_KEY"),
|
|
143
|
+
"hl": "ru",
|
|
144
|
+
})
|
|
145
|
+
videos = r.json().get("video_results", [])[:num]
|
|
146
|
+
return [{
|
|
147
|
+
"title": v.get("title"),
|
|
148
|
+
"url": v.get("link"),
|
|
149
|
+
"channel": v.get("channel", {}).get("name"),
|
|
150
|
+
"views": v.get("views"), # Already parsed: "1.2M views"
|
|
151
|
+
"duration": v.get("length"), # "12:34"
|
|
152
|
+
"published": v.get("published_date"), # "3 months ago"
|
|
153
|
+
"description": v.get("description"),
|
|
154
|
+
} for v in videos]
|
|
155
|
+
|
|
156
|
+
results = search_youtube_serp("Midjourney v7 обзор", num=10)
|
|
157
|
+
for v in results:
|
|
158
|
+
print(f"{v['title']} | {v['views']} | {v['channel']}")
|
|
159
|
+
```
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# YouTube Data API v3 — Quota Optimization
|
|
2
|
+
|
|
3
|
+
## Daily Quota: 10,000 units (free tier)
|
|
4
|
+
|
|
5
|
+
## Quota Cost Table
|
|
6
|
+
| Method | Quota Cost | Notes |
|
|
7
|
+
|---|---|---|
|
|
8
|
+
| search.list | **100 units** | Most expensive — use sparingly |
|
|
9
|
+
| videos.list | 1 unit per call | Batch up to 50 IDs per call |
|
|
10
|
+
| channels.list | 1 unit per call | Batch up to 50 IDs |
|
|
11
|
+
| playlists.list | 1 unit per call | |
|
|
12
|
+
| playlistItems.list | 1 unit per call | |
|
|
13
|
+
| commentThreads.list | 1 unit per call | |
|
|
14
|
+
|
|
15
|
+
## Optimization Strategies
|
|
16
|
+
|
|
17
|
+
### 1. Minimize search.list calls
|
|
18
|
+
- One `search.list` = 100 units = 1% of daily quota
|
|
19
|
+
- Max 100 searches/day on free tier
|
|
20
|
+
- Cache results when possible
|
|
21
|
+
|
|
22
|
+
### 2. Batch videos.list
|
|
23
|
+
```python
|
|
24
|
+
# BAD: 50 separate calls = 50 units
|
|
25
|
+
for video_id in video_ids:
|
|
26
|
+
get_stats(video_id) # 1 unit each
|
|
27
|
+
|
|
28
|
+
# GOOD: 1 call = 1 unit
|
|
29
|
+
get_stats(",".join(video_ids[:50])) # batch all at once
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### 3. Use `fields` parameter to reduce response size
|
|
33
|
+
```python
|
|
34
|
+
params = {
|
|
35
|
+
"part": "statistics",
|
|
36
|
+
"id": video_ids,
|
|
37
|
+
"fields": "items(id,statistics(viewCount,likeCount))", # only what you need
|
|
38
|
+
"key": API_KEY,
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### 4. Cache channel IDs
|
|
43
|
+
Channel IDs don't change. Store them after first lookup:
|
|
44
|
+
```python
|
|
45
|
+
CHANNEL_CACHE = {} # {"Channel Name": "UCxxxxxxxx"}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### 5. Use `publishedAfter` to limit search scope
|
|
49
|
+
Instead of sorting by date after fetching, filter at API level:
|
|
50
|
+
```python
|
|
51
|
+
from datetime import datetime, timedelta
|
|
52
|
+
week_ago = (datetime.utcnow() - timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
53
|
+
params["publishedAfter"] = week_ago
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Quota Monitoring
|
|
57
|
+
```python
|
|
58
|
+
# Quota usage is visible at:
|
|
59
|
+
# https://console.cloud.google.com/apis/api/youtube.googleapis.com/quotas
|
|
60
|
+
# No programmatic way to check remaining quota via API itself.
|
|
61
|
+
|
|
62
|
+
# Implement local counter:
|
|
63
|
+
quota_used = 0
|
|
64
|
+
def tracked_search(*args, **kwargs):
|
|
65
|
+
global quota_used
|
|
66
|
+
quota_used += 100 # search costs 100
|
|
67
|
+
return youtube_search(*args, **kwargs)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Error Handling
|
|
71
|
+
```python
|
|
72
|
+
from googleapiclient.errors import HttpError
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
results = youtube_search(query)
|
|
76
|
+
except requests.exceptions.HTTPError as e:
|
|
77
|
+
if e.response.status_code == 403:
|
|
78
|
+
error_body = e.response.json()
|
|
79
|
+
if "quotaExceeded" in str(error_body):
|
|
80
|
+
# Switch to Method A (web_search) as fallback
|
|
81
|
+
results = fallback_web_search(query)
|
|
82
|
+
elif "keyInvalid" in str(error_body):
|
|
83
|
+
raise ValueError("Invalid YouTube API key")
|
|
84
|
+
raise
|
|
85
|
+
```
|