vidlens-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +448 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +8 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/index.d.ts +8 -0
  7. package/dist/index.js +9 -0
  8. package/dist/index.js.map +1 -0
  9. package/dist/lib/analysis.d.ts +71 -0
  10. package/dist/lib/analysis.js +605 -0
  11. package/dist/lib/analysis.js.map +1 -0
  12. package/dist/lib/cache-store.d.ts +49 -0
  13. package/dist/lib/cache-store.js +180 -0
  14. package/dist/lib/cache-store.js.map +1 -0
  15. package/dist/lib/cli-runtime.d.ts +68 -0
  16. package/dist/lib/cli-runtime.js +601 -0
  17. package/dist/lib/cli-runtime.js.map +1 -0
  18. package/dist/lib/comment-knowledge-base.d.ts +46 -0
  19. package/dist/lib/comment-knowledge-base.js +852 -0
  20. package/dist/lib/comment-knowledge-base.js.map +1 -0
  21. package/dist/lib/embedding-provider.d.ts +20 -0
  22. package/dist/lib/embedding-provider.js +124 -0
  23. package/dist/lib/embedding-provider.js.map +1 -0
  24. package/dist/lib/gemini-visual-describer.d.ts +17 -0
  25. package/dist/lib/gemini-visual-describer.js +109 -0
  26. package/dist/lib/gemini-visual-describer.js.map +1 -0
  27. package/dist/lib/id-parsing.d.ts +19 -0
  28. package/dist/lib/id-parsing.js +122 -0
  29. package/dist/lib/id-parsing.js.map +1 -0
  30. package/dist/lib/install-diagnostics.d.ts +36 -0
  31. package/dist/lib/install-diagnostics.js +249 -0
  32. package/dist/lib/install-diagnostics.js.map +1 -0
  33. package/dist/lib/knowledge-base.d.ts +59 -0
  34. package/dist/lib/knowledge-base.js +1046 -0
  35. package/dist/lib/knowledge-base.js.map +1 -0
  36. package/dist/lib/macos-vision.d.ts +17 -0
  37. package/dist/lib/macos-vision.js +224 -0
  38. package/dist/lib/macos-vision.js.map +1 -0
  39. package/dist/lib/media-downloader.d.ts +35 -0
  40. package/dist/lib/media-downloader.js +205 -0
  41. package/dist/lib/media-downloader.js.map +1 -0
  42. package/dist/lib/media-store.d.ts +80 -0
  43. package/dist/lib/media-store.js +218 -0
  44. package/dist/lib/media-store.js.map +1 -0
  45. package/dist/lib/page-extract-client.d.ts +7 -0
  46. package/dist/lib/page-extract-client.js +133 -0
  47. package/dist/lib/page-extract-client.js.map +1 -0
  48. package/dist/lib/rate-limiter.d.ts +47 -0
  49. package/dist/lib/rate-limiter.js +115 -0
  50. package/dist/lib/rate-limiter.js.map +1 -0
  51. package/dist/lib/retry.d.ts +25 -0
  52. package/dist/lib/retry.js +68 -0
  53. package/dist/lib/retry.js.map +1 -0
  54. package/dist/lib/schema-migration.d.ts +40 -0
  55. package/dist/lib/schema-migration.js +109 -0
  56. package/dist/lib/schema-migration.js.map +1 -0
  57. package/dist/lib/telemetry.d.ts +76 -0
  58. package/dist/lib/telemetry.js +184 -0
  59. package/dist/lib/telemetry.js.map +1 -0
  60. package/dist/lib/thumbnail-extractor.d.ts +39 -0
  61. package/dist/lib/thumbnail-extractor.js +159 -0
  62. package/dist/lib/thumbnail-extractor.js.map +1 -0
  63. package/dist/lib/token-benchmark.d.ts +13 -0
  64. package/dist/lib/token-benchmark.js +8 -0
  65. package/dist/lib/token-benchmark.js.map +1 -0
  66. package/dist/lib/token-controls.d.ts +29 -0
  67. package/dist/lib/token-controls.js +120 -0
  68. package/dist/lib/token-controls.js.map +1 -0
  69. package/dist/lib/types.d.ts +1150 -0
  70. package/dist/lib/types.js +2 -0
  71. package/dist/lib/types.js.map +1 -0
  72. package/dist/lib/visual-search.d.ts +165 -0
  73. package/dist/lib/visual-search.js +640 -0
  74. package/dist/lib/visual-search.js.map +1 -0
  75. package/dist/lib/youtube-api-client.d.ts +36 -0
  76. package/dist/lib/youtube-api-client.js +277 -0
  77. package/dist/lib/youtube-api-client.js.map +1 -0
  78. package/dist/lib/youtube-service.d.ts +95 -0
  79. package/dist/lib/youtube-service.js +2599 -0
  80. package/dist/lib/youtube-service.js.map +1 -0
  81. package/dist/lib/ytdlp-client.d.ts +25 -0
  82. package/dist/lib/ytdlp-client.js +394 -0
  83. package/dist/lib/ytdlp-client.js.map +1 -0
  84. package/dist/server/mcp-server.d.ts +9 -0
  85. package/dist/server/mcp-server.js +1250 -0
  86. package/dist/server/mcp-server.js.map +1 -0
  87. package/package.json +69 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,448 @@
1
+ <p align="center">
2
+ <img src="assets/vidlens-logo.png" alt="VidLens" width="400" />
3
+ </p>
4
+
5
+ <p align="center">
6
+ <strong>The YouTube intelligence layer for AI agents</strong><br/>
7
+ <em>Zero config · 40 tools · Three-tier fallback · Transcript + visual search</em>
8
+ </p>
9
+
10
+ <p align="center">
11
+ <a href="https://www.npmjs.com/package/vidlens-mcp"><img src="https://img.shields.io/npm/v/vidlens-mcp?style=flat-square&color=red" alt="npm" /></a>
12
+ <a href="https://github.com/rajanrengasamy/vidlens-mcp/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue?style=flat-square" alt="License" /></a>
13
+ <a href="https://modelcontextprotocol.io/"><img src="https://img.shields.io/badge/MCP-compatible-green?style=flat-square" alt="MCP" /></a>
14
+ <img src="https://img.shields.io/badge/tools-40-orange?style=flat-square" alt="40 tools" />
15
+ <img src="https://img.shields.io/badge/zero--config-✓-brightgreen?style=flat-square" alt="Zero Config" />
16
+ </p>
17
+
18
+ ---
19
+
20
+ ## 🔍 What is VidLens?
21
+
22
+ VidLens is a [Model Context Protocol](https://modelcontextprotocol.io/) server that gives AI agents deep, reliable access to YouTube. Not just transcripts - full intelligence: sentiment analysis, trend discovery, semantic search, media assets, creator analytics, and image-backed visual search.
23
+
24
+ **No API key required to start.** Every tool has a three-tier fallback chain (YouTube API → yt-dlp → page extraction) so nothing breaks when quota runs out or keys aren't configured.
25
+
26
+ ---
27
+
28
+ ## 🎯 Core Capabilities
29
+
30
+ ### 🔎 Semantic Search Across Playlists
31
+ Import entire playlists or video sets, index every transcript with Gemini embeddings, and search across hundreds of hours of content by meaning — not just keywords.
32
+
33
+ > *"Find every mention of gradient descent across 50 Stanford CS lectures"*
34
+ >
35
+ > *"What did the instructor say about backpropagation in any of these videos?"*
36
+
37
+ ### 👁️ Visual Search — See What's In Videos
38
+ Extract keyframes, describe them with Gemini Vision, run OCR on slides and whiteboards, and search by what you **see** — not just what's said. Three layers: Apple Vision feature prints for image similarity, Gemini frame descriptions for scene understanding, and semantic embeddings for text→visual search.
39
+
40
+ > *"Find the frame where he draws the system architecture diagram"*
41
+ >
42
+ > *"Show me every slide that mentions 'transformer architecture'"*
43
+
44
+ ### 📊 Intelligence Layer — Not Just Data
45
+ Sentiment analysis with themes and risk signals. Niche trend discovery with momentum and saturation scoring. Content gap detection. Hook pattern analysis. Upload timing recommendations. The LLM does the thinking — VidLens gives it the right data.
46
+
47
+ > *"What's the audience sentiment on this video? Any risk signals?"*
48
+ >
49
+ > *"What's trending in the AI coding niche right now?"*
50
+
51
+ ### ⚡ Zero Config, Always Works
52
+ No API key needed to start. Three-tier fallback chain on every tool: YouTube API → yt-dlp → page extraction. Nothing breaks when quota runs out. Keys are optional power-ups, not requirements.
53
+
54
+ ### 🎬 Full Media Pipeline
55
+ Download videos/audio/thumbnails. Extract keyframes. Index comments for semantic search. Build a local knowledge base from any YouTube content — all through natural language.
56
+
57
+ ---
58
+
59
+ ## ⚡ Why VidLens?
60
+
61
+ <table>
62
+ <tr><th></th><th>VidLens</th><th>Other YouTube MCP servers</th></tr>
63
+ <tr><td>🔑 <strong>Setup</strong></td><td>✅ Works immediately - no keys needed</td><td>❌ Most require YouTube API key upfront</td></tr>
64
+ <tr><td>🛡️ <strong>Reliability</strong></td><td>✅ Three-tier fallback on every tool</td><td>❌ Single point of failure - API down = broken</td></tr>
65
+ <tr><td>🧠 <strong>Intelligence</strong></td><td>✅ Sentiment, trends, content gaps, hooks</td><td>❌ Raw data dumps - you do the analysis</td></tr>
66
+ <tr><td>📦 <strong>Token efficiency</strong></td><td>✅ 75-87% smaller responses</td><td>❌ Verbose JSON with thumbnails, etags, junk</td></tr>
67
+ <tr><td>🔬 <strong>Depth</strong></td><td>✅ 40 tools across 9 modules</td><td>⚠️ 1-5 tools, mostly transcripts only</td></tr>
68
+ <tr><td>🖼️ <strong>Visual evidence</strong></td><td>✅ Returns actual frame paths + timestamps, not just text hits</td><td>⚠️ Usually transcript-only or raw frame dumps</td></tr>
69
+ <tr><td>⚖️ <strong>Trademark</strong></td><td>✅ Compliant naming</td><td>⚠️ Most violate YouTube trademark</td></tr>
70
+ </table>
71
+
72
+ ---
73
+
74
+ ## 🚀 Quick Start
75
+
76
+ ### 1. Install
77
+
78
+ ```bash
79
+ npx vidlens-mcp setup
80
+ ```
81
+
82
+ This auto-detects your MCP client (Claude Desktop, Cursor, etc.) and configures it.
83
+
84
+ ### 2. Or configure manually
85
+
86
+ **Claude Desktop** - add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
87
+
88
+ ```json
89
+ {
90
+ "mcpServers": {
91
+ "vidlens-mcp": {
92
+ "command": "npx",
93
+ "args": ["-y", "vidlens-mcp", "serve"]
94
+ }
95
+ }
96
+ }
97
+ ```
98
+
99
+ ### 3. Restart your MCP client
100
+
101
+ Fully quit and reopen Claude Desktop (or your client). VidLens will appear in the tool list.
102
+
103
+ ### 4. Try it
104
+
105
+ > "Import this playlist and search across all videos for mentions of machine learning"
106
+ >
107
+ > "Search this video's visuals for the whiteboard architecture diagram and show me the frame evidence"
108
+ >
109
+ > "What's trending in the AI coding niche right now?"
110
+ >
111
+ > "Build a complete dossier for this video — metadata, transcript, sentiment, hooks, everything"
112
+ >
113
+ > "What's the audience sentiment on this video? Any risk signals?"
114
+ >
115
+ > "Get the transcript of this video: https://youtube.com/watch?v=dQw4w9WgXcQ"
116
+
117
+ ---
118
+
119
+ ## 🧰 Tools - 40 across 9 modules
120
+
121
+ ### 📺 Core - Video & Channel Intelligence
122
+ *Always available, no API key needed*
123
+
124
+ | Tool | What it does |
125
+ |---|---|
126
+ | `findVideos` | Search YouTube by query with metadata |
127
+ | `inspectVideo` | Deep metadata - tags, engagement, language, category |
128
+ | `inspectChannel` | Channel stats, description, recent uploads |
129
+ | `listChannelCatalog` | Browse a channel's full video library |
130
+ | `readTranscript` | Full transcript with timestamps and chapters |
131
+ | `readComments` | Top comments with likes and engagement |
132
+ | `expandPlaylist` | List all videos in any playlist |
133
+
134
+ ### 🔎 Knowledge Base - Semantic Search
135
+ *Index transcripts and search across them with natural language*
136
+
137
+ | Tool | What it does |
138
+ |---|---|
139
+ | `importPlaylist` | Index an entire playlist's transcripts |
140
+ | `importVideos` | Index specific videos by URL/ID |
141
+ | `searchTranscripts` | Natural language search across indexed content |
142
+ | `listCollections` | Browse your indexed collections |
143
+ | `setActiveCollection` | Scope searches to one collection |
144
+ | `clearActiveCollection` | Search across all collections |
145
+ | `removeCollection` | Delete a collection and its index |
146
+
147
+ ### 💬 Sentiment & Analysis
148
+ *Understand what audiences think and feel*
149
+
150
+ | Tool | What it does |
151
+ |---|---|
152
+ | `measureAudienceSentiment` | Comment sentiment with themes and risk signals |
153
+ | `analyzeVideoSet` | Compare performance across multiple videos |
154
+ | `analyzePlaylist` | Playlist-level engagement analytics |
155
+ | `buildVideoDossier` | Complete single-video deep analysis |
156
+
157
+ ### 🎯 Creator Intelligence
158
+ *Insights for content strategy*
159
+
160
+ | Tool | What it does |
161
+ |---|---|
162
+ | `scoreHookPatterns` | Analyze what makes video openings work |
163
+ | `researchTagsAndTitles` | Tag and title optimization insights |
164
+ | `compareShortsVsLong` | Short-form vs long-form performance |
165
+ | `recommendUploadWindows` | Best times to publish for engagement |
166
+
167
+ ### 📈 Discovery & Trends
168
+ *Find what's working in any niche*
169
+
170
+ | Tool | What it does |
171
+ |---|---|
172
+ | `discoverNicheTrends` | Momentum, saturation, content gaps in any topic |
173
+ | `exploreNicheCompetitors` | Channel landscape and top performers |
174
+
175
+ ### 🎬 Media Assets
176
+ *Download and manage video files locally*
177
+
178
+ | Tool | What it does |
179
+ |---|---|
180
+ | `downloadAsset` | Download video, audio, or thumbnails |
181
+ | `listMediaAssets` | Browse stored media files |
182
+ | `removeMediaAsset` | Clean up downloaded assets |
183
+ | `extractKeyframes` | Extract key frames from videos |
184
+ | `mediaStoreHealth` | Storage usage and diagnostics |
185
+
186
+ ### 🖼️ Visual Search
187
+ *Three-layer visual intelligence. Not transcript reuse.*
188
+
189
+ | Tool | What it does |
190
+ |---|---|
191
+ | `indexVisualContent` | Extract frames, run Apple Vision OCR + feature prints, Gemini frame descriptions, and Gemini semantic embeddings |
192
+ | `searchVisualContent` | Search visual frames using semantic embeddings + lexical matching. Returns actual image paths + timestamps as evidence |
193
+ | `findSimilarFrames` | Image-to-image frame similarity using Apple Vision feature prints |
194
+
195
+ **Three layers, all real:**
196
+ 1. **Apple Vision feature prints** — image-to-image similarity (find frames that look alike)
197
+ 2. **Gemini 2.5 Flash frame descriptions** — natural language scene understanding per frame
198
+ 3. **Gemini semantic embeddings** — 768-dim embedding retrieval over OCR + description text for true text→visual search
199
+
200
+ **What you always get back:** frame path on disk, timestamp, source video URL/title, match explanation, OCR text, visual description.
201
+
202
+ **What is NOT happening:** no transcript embeddings are reused for visual search. This is a separate visual index.
203
+
204
+ ### 💭 Comment Knowledge Base
205
+ *Index and semantically search YouTube comments*
206
+
207
+ | Tool | What it does |
208
+ |---|---|
209
+ | `importComments` | Index a video's comments for search |
210
+ | `searchComments` | Natural language search over comment corpus |
211
+ | `listCommentCollections` | Browse comment collections |
212
+ | `setActiveCommentCollection` | Scope comment searches |
213
+ | `clearActiveCommentCollection` | Search all comment collections |
214
+ | `removeCommentCollection` | Delete a comment collection |
215
+
216
+ ### 🏥 Diagnostics
217
+ *Health checks and pre-flight validation*
218
+
219
+ | Tool | What it does |
220
+ |---|---|
221
+ | `checkSystemHealth` | Full system diagnostic report |
222
+ | `checkImportReadiness` | Validate before importing content |
223
+
224
+ ---
225
+
226
+ ## 🔑 API Keys (Optional)
227
+
228
+ VidLens works **without any API keys**. Add them to unlock more capabilities:
229
+
230
+ | Key | What it unlocks | Free? | How to get it |
231
+ |---|---|---|---|
232
+ | `YOUTUBE_API_KEY` | Better metadata, comment API, search via YouTube API | ✅ Free tier (10,000 units/day) | [Google Cloud Console](https://console.cloud.google.com/) → APIs → Enable YouTube Data API v3 → Credentials → Create API Key |
233
+ | `GEMINI_API_KEY` | Higher-quality embeddings for semantic search (768d vs 384d) | ✅ Free tier | [Google AI Studio](https://aistudio.google.com/) → Get API Key |
234
+
235
+ > ⚠️ **These are separate keys from separate Google services.** A Gemini key will NOT work for YouTube API calls and vice versa. Create them independently.
236
+
237
+ ```bash
238
+ # Configure via setup wizard
239
+ npx vidlens-mcp setup --youtube-api-key YOUR_YOUTUBE_KEY --gemini-api-key YOUR_GEMINI_KEY
240
+
241
+ # Or via environment variables
242
+ export YOUTUBE_API_KEY=your_youtube_key
243
+ export GEMINI_API_KEY=your_gemini_key
244
+ ```
245
+
246
+ ---
247
+
248
+ ## 💻 CLI
249
+
250
+ ```bash
251
+ npx vidlens-mcp # Start MCP server (stdio)
252
+ npx vidlens-mcp serve # Start MCP server (explicit)
253
+ npx vidlens-mcp setup # Auto-configure MCP clients
254
+ npx vidlens-mcp doctor # Run diagnostics
255
+ npx vidlens-mcp version # Print version
256
+ npx vidlens-mcp help # Usage guide
257
+ ```
258
+
259
+ ### Doctor - diagnose issues
260
+
261
+ ```bash
262
+ npx vidlens-mcp doctor --no-live
263
+ ```
264
+
265
+ Checks: Node.js version, yt-dlp availability, API key validation, data directory health, MCP client detection (Claude Desktop, ChatGPT Desktop, Cursor, VS Code).
266
+
267
+ ---
268
+
269
+ ## 🏗️ Architecture
270
+
271
+ ### System Overview
272
+
273
+ ```mermaid
274
+ graph TB
275
+ subgraph Client["MCP Client"]
276
+ Claude["Claude Desktop"]
277
+ Cursor["Cursor"]
278
+ VSCode["VS Code"]
279
+ Other["Any MCP Client"]
280
+ end
281
+
282
+ subgraph VidLens["VidLens MCP Server"]
283
+ Router["Tool Router"]
284
+
285
+ subgraph Modules["9 Tool Modules · 40 Tools"]
286
+ Core["📺 Core<br/>7 tools"]
287
+ KB["🔎 Knowledge Base<br/>7 tools"]
288
+ Sentiment["💬 Sentiment<br/>4 tools"]
289
+ Creator["🎯 Creator Intel<br/>4 tools"]
290
+ Trends["📈 Discovery<br/>2 tools"]
291
+ Media["🎬 Media<br/>5 tools"]
292
+ Visual["🖼️ Visual Search<br/>3 tools"]
293
+ Comments["💭 Comments<br/>6 tools"]
294
+ Diag["🏥 Diagnostics<br/>2 tools"]
295
+ end
296
+
297
+ subgraph Storage["Local Storage"]
298
+ SQLite["SQLite + sqlite-vec"]
299
+ Embeddings["Embedding Index<br/>384d / 768d vectors"]
300
+ MediaStore["Media Store<br/>video · audio · frames"]
301
+ end
302
+
303
+ subgraph Fallback["Three-Tier Fallback Chain"]
304
+ T1["① YouTube Data API v3"]
305
+ T2["② yt-dlp"]
306
+ T3["③ Page extraction"]
307
+ end
308
+ end
309
+
310
+ subgraph External["External Services"]
311
+ YT["YouTube"]
312
+ Gemini["Gemini API"]
313
+ AppleVision["Apple Vision"]
314
+ end
315
+
316
+ Client -->|stdio / SSE| Router
317
+ Router --> Modules
318
+ Modules --> Fallback
319
+ Fallback --> YT
320
+ KB --> SQLite
321
+ KB --> Embeddings
322
+ Visual --> Embeddings
323
+ Visual --> MediaStore
324
+ Visual --> AppleVision
325
+ Visual --> Gemini
326
+ Comments --> SQLite
327
+ Media --> MediaStore
328
+
329
+ T1 -.->|"quota exceeded?"| T2
330
+ T2 -.->|"unavailable?"| T3
331
+ ```
332
+
333
+ ### How the Fallback Chain Works
334
+
335
+ Every tool that touches YouTube data uses the same resilience pattern:
336
+
337
+ ```mermaid
338
+ flowchart LR
339
+ Request["Tool call"] --> Check{"API key<br/>configured?"}
340
+ Check -->|Yes| API["YouTube API v3"]
341
+ Check -->|No| YTD["yt-dlp"]
342
+ API -->|"200 ✓"| OK["Return data +<br/>provenance tag"]
343
+ API -->|"403 / quota"| YTD
344
+ YTD -->|"✓"| OK
345
+ YTD -->|"error"| Page["Page Extraction"]
346
+ Page -->|"✓"| OK
347
+ Page -->|"error"| Fail["Graceful error +<br/>fallback report"]
348
+ ```
349
+
350
+ Every response includes a `provenance` field telling you exactly which tier served the data and whether anything was partial. No silent degradation — you always know what happened.
351
+
352
+ ### Visual Search Pipeline
353
+
354
+ Visual search is not transcript reuse. It's a dedicated three-layer index:
355
+
356
+ ```mermaid
357
+ flowchart TB
358
+ Video["Video URL"] --> Extract["Extract Keyframes<br/>ffmpeg scene detection"]
359
+
360
+ Extract --> L1["Layer 1: Apple Vision<br/>Feature prints + OCR"]
361
+ Extract --> L2["Layer 2: Gemini Vision<br/>Frame descriptions"]
362
+
363
+ L1 --> FP["Feature Print Index"]
364
+ L1 --> OCR["OCR Text"]
365
+ L2 --> Desc["Scene Descriptions"]
366
+
367
+ OCR --> L3["Layer 3: Gemini Embeddings<br/>768-dim semantic vectors"]
368
+ Desc --> L3
369
+
370
+ L3 --> VecDB["sqlite-vec Index"]
371
+ FP --> VecDB
372
+
373
+ VecDB --> Search["searchVisualContent / findSimilarFrames"]
374
+ Search --> Results["Frame path · Timestamp · Source video<br/>Match reason · OCR text · Visual description"]
375
+ ```
376
+
377
+ **Three layers, all real:**
378
+ 1. **Apple Vision feature prints** — image-to-image similarity (find frames that *look* alike)
379
+ 2. **Gemini Vision frame descriptions** — natural language scene understanding per frame
380
+ 3. **Gemini semantic embeddings** — 768-dim retrieval over OCR + description text
381
+
382
+ ### Data Storage
383
+
384
+ Everything lives in a single directory. No external databases, no Docker, no infrastructure.
385
+
386
+ ```
387
+ ~/.vidlens/
388
+ ├── vidlens.db # Metadata, embeddings, indexes (sqlite-vec)
389
+ ├── media/
390
+ │ └── {video-id}/
391
+ │ ├── video.mp4
392
+ │ ├── audio.mp3
393
+ │ └── frames/
394
+ │ ├── frame_001_00m30s.jpg
395
+ │ └── frame_002_01m15s.jpg
396
+ └── logs/ # Diagnostics output
397
+ ```
398
+
399
+ One directory. Portable. Back it up by copying. Delete it to start fresh.
400
+
401
+ ---
402
+
403
+ ## 📋 Requirements
404
+
405
+ | Requirement | Status | Notes |
406
+ |---|---|---|
407
+ | **Node.js ≥ 20** | Required | `node --version` to check |
408
+ | **yt-dlp** | Recommended | `brew install yt-dlp` - enables zero-config mode |
409
+ | **ffmpeg** | Optional | Needed for frame extraction and visual indexing |
410
+ | **YouTube API key** | Optional | Unlocks comments, better metadata |
411
+ | **Gemini API key** | Optional | Upgrades transcript embeddings and frame descriptions for visual search |
412
+ | **macOS Apple Vision** | Automatic on macOS | Powers native OCR and image similarity for visual search |
413
+
414
+ ---
415
+
416
+ ## 🔧 Troubleshooting
417
+
418
+ ### "Tool not found" in Claude Desktop
419
+ Fully quit Claude Desktop (⌘Q, not just close window) and reopen. MCP servers only load on startup.
420
+
421
+ ### "YOUTUBE_API_KEY not configured" warning
422
+ This is informational, not an error. VidLens works without it. Add a key only if you need comments/sentiment features.
423
+
424
+ ### "API_KEY_SERVICE_BLOCKED" error
425
+ Your API key has restrictions. Create a new **unrestricted** key in Google Cloud Console, or remove the API restriction from the existing key.
426
+
427
+ ### Gemini key doesn't work for YouTube API
428
+ These are **separate services**. You need a YouTube API key from Google Cloud Console AND a Gemini key from Google AI Studio. They are not interchangeable.
429
+
430
+ ### Build errors
431
+ ```bash
432
+ npx vidlens-mcp doctor # Run diagnostics
433
+ npx vidlens-mcp doctor --no-live # Skip network checks
434
+ ```
435
+
436
+ ---
437
+
438
+ ## 📄 License
439
+
440
+ MIT
441
+
442
+ ---
443
+
444
+ <p align="center">
445
+ <a href="https://github.com/rajanrengasamy/vidlens-mcp">GitHub</a> ·
446
+ <a href="https://www.npmjs.com/package/vidlens-mcp">npm</a> ·
447
+ <a href="https://modelcontextprotocol.io/">Model Context Protocol</a>
448
+ </p>
package/dist/cli.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+ import { runCli } from "./lib/cli-runtime.js";
3
+ runCli(process.argv.slice(2)).catch((error) => {
4
+ const message = error instanceof Error ? error.stack ?? error.message : String(error);
5
+ process.stderr.write(`vidlens-mcp failed: ${message}\n`);
6
+ process.exitCode = 1;
7
+ });
8
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC;AAE9C,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IAC5C,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACtF,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,uBAAuB,OAAO,IAAI,CAAC,CAAC;IACzD,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;AACvB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,8 @@
1
+ export { createYouTubeMcpServer, startStdioServer, tools } from "./server/mcp-server.js";
2
+ export { YouTubeService } from "./lib/youtube-service.js";
3
+ export { CommentKnowledgeBase } from "./lib/comment-knowledge-base.js";
4
+ export { MediaStore } from "./lib/media-store.js";
5
+ export { MediaDownloader } from "./lib/media-downloader.js";
6
+ export { ThumbnailExtractor } from "./lib/thumbnail-extractor.js";
7
+ export { VisualIndexStore, VisualSearchEngine } from "./lib/visual-search.js";
8
+ export * from "./lib/types.js";
package/dist/index.js ADDED
@@ -0,0 +1,9 @@
1
+ export { createYouTubeMcpServer, startStdioServer, tools } from "./server/mcp-server.js";
2
+ export { YouTubeService } from "./lib/youtube-service.js";
3
+ export { CommentKnowledgeBase } from "./lib/comment-knowledge-base.js";
4
+ export { MediaStore } from "./lib/media-store.js";
5
+ export { MediaDownloader } from "./lib/media-downloader.js";
6
+ export { ThumbnailExtractor } from "./lib/thumbnail-extractor.js";
7
+ export { VisualIndexStore, VisualSearchEngine } from "./lib/visual-search.js";
8
+ export * from "./lib/types.js";
9
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,KAAK,EAAE,MAAM,wBAAwB,CAAC;AACzF,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5D,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,OAAO,EAAE,gBAAgB,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAC9E,cAAc,gBAAgB,CAAC"}
@@ -0,0 +1,71 @@
1
+ import type { Chapter, CommentRecord, ContentGap, NicheMomentum, NicheSaturation, TranscriptRecord, TranscriptSegment, TrendingVideo, VideoRecord } from "./types.js";
2
+ export interface SentimentSummary {
3
+ positivePct: number;
4
+ neutralPct: number;
5
+ negativePct: number;
6
+ sentimentScore: number;
7
+ }
8
+ export interface ThemeScore {
9
+ theme: string;
10
+ prevalencePct: number;
11
+ sentimentScore: number;
12
+ }
13
+ export interface RiskSignal {
14
+ signal: string;
15
+ severity: "low" | "medium" | "high";
16
+ frequencyPct: number;
17
+ }
18
+ export interface QuoteSample {
19
+ text: string;
20
+ sentiment: "positive" | "neutral" | "negative";
21
+ }
22
+ export interface HookPatternResult {
23
+ hookScore: number;
24
+ hookType: "question" | "promise" | "shock" | "story" | "proof" | "other";
25
+ first30SecSummary: string;
26
+ weakSignals: string[];
27
+ improvements: string[];
28
+ }
29
+ export declare function median(values: number[]): number | undefined;
30
+ export declare function percentile(values: number[], ratio: number): number | undefined;
31
+ export declare function average(values: number[]): number | undefined;
32
+ export declare function inferVideoFormat(durationSec?: number): "short" | "long" | "unknown";
33
+ export declare function computeEngagementRate(video: Pick<VideoRecord, "views" | "likes" | "comments">): number | undefined;
34
+ export declare function computeCommentRate(video: Pick<VideoRecord, "views" | "comments">): number | undefined;
35
+ export declare function computeLikeRate(video: Pick<VideoRecord, "views" | "likes">): number | undefined;
36
+ export declare function computeViewVelocity24h(views: number | undefined, publishedAt?: string): number | undefined;
37
+ export declare function summarizeText(text: string, maxSentences?: number): string;
38
+ export declare function parseDescriptionChapters(description?: string): Chapter[];
39
+ export declare function buildTranscriptSegmentsForWindow(transcript: TranscriptRecord, windowSec: number, maxSegments?: number): TranscriptSegment[];
40
+ export declare function buildChapterTranscriptSegments(transcript: TranscriptRecord): TranscriptSegment[];
41
+ export declare function analyzeComments(comments: CommentRecord[], includeThemes?: boolean, includeQuotes?: boolean): {
42
+ sentiment: SentimentSummary;
43
+ themes?: ThemeScore[];
44
+ riskSignals: RiskSignal[];
45
+ representativeQuotes?: QuoteSample[];
46
+ };
47
+ export declare function scoreHookPattern(videoId: string, transcript: TranscriptRecord, hookWindowSec?: number): HookPatternResult;
48
+ export declare function extractRecurringKeywords(videos: VideoRecord[], limit?: number): string[];
49
+ export declare function titleStructure(title: string): string;
50
+ /**
51
+ * Compute momentum indicator by comparing recent videos vs older videos.
52
+ * Splits the video set by median publish date and compares median views.
53
+ */
54
+ export declare function computeNicheMomentum(videos: TrendingVideo[], lookbackDays: number): NicheMomentum;
55
+ /**
56
+ * Estimate niche saturation from the view distribution of search results.
57
+ */
58
+ export declare function computeNicheSaturation(videos: TrendingVideo[]): NicheSaturation;
59
+ /**
60
+ * Detect content gap angles from title/tag patterns. Heuristic:
61
+ * look for under-represented sub-topics and format gaps.
62
+ */
63
+ export declare function detectContentGaps(videos: TrendingVideo[], niche: string): ContentGap[];
64
+ /**
65
+ * Compute format breakdown percentages.
66
+ */
67
+ export declare function computeFormatBreakdown(videos: TrendingVideo[]): {
68
+ shortsPct: number;
69
+ longFormPct: number;
70
+ unknownPct: number;
71
+ };