mcptube 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. mcptube-0.1.0/.gitignore +41 -0
  2. mcptube-0.1.0/PKG-INFO +461 -0
  3. mcptube-0.1.0/README.md +436 -0
  4. mcptube-0.1.0/lewis_hamilton_ferrari_report.html +465 -0
  5. mcptube-0.1.0/lewis_hamilton_report.html +315 -0
  6. mcptube-0.1.0/multi_report.html +36 -0
  7. mcptube-0.1.0/pyproject.toml +53 -0
  8. mcptube-0.1.0/report.html +65 -0
  9. mcptube-0.1.0/src/mcptube/__init__.py +0 -0
  10. mcptube-0.1.0/src/mcptube/cli.py +286 -0
  11. mcptube-0.1.0/src/mcptube/config.py +54 -0
  12. mcptube-0.1.0/src/mcptube/discovery.py +162 -0
  13. mcptube-0.1.0/src/mcptube/ingestion/__init__.py +0 -0
  14. mcptube-0.1.0/src/mcptube/ingestion/frames.py +111 -0
  15. mcptube-0.1.0/src/mcptube/ingestion/youtube.py +187 -0
  16. mcptube-0.1.0/src/mcptube/llm.py +118 -0
  17. mcptube-0.1.0/src/mcptube/models.py +47 -0
  18. mcptube-0.1.0/src/mcptube/report.py +332 -0
  19. mcptube-0.1.0/src/mcptube/server.py +446 -0
  20. mcptube-0.1.0/src/mcptube/service.py +411 -0
  21. mcptube-0.1.0/src/mcptube/storage/__init__.py +0 -0
  22. mcptube-0.1.0/src/mcptube/storage/repository.py +38 -0
  23. mcptube-0.1.0/src/mcptube/storage/sqlite.py +134 -0
  24. mcptube-0.1.0/src/mcptube/storage/vectorstore.py +187 -0
  25. mcptube-0.1.0/synthesis.html +36 -0
  26. mcptube-0.1.0/tests/conftest.py +120 -0
  27. mcptube-0.1.0/tests/test_cli_integration.py +93 -0
  28. mcptube-0.1.0/tests/test_config.py +38 -0
  29. mcptube-0.1.0/tests/test_discovery.py +76 -0
  30. mcptube-0.1.0/tests/test_frames.py +89 -0
  31. mcptube-0.1.0/tests/test_ingestion.py +0 -0
  32. mcptube-0.1.0/tests/test_llm.py +65 -0
  33. mcptube-0.1.0/tests/test_models.py +69 -0
  34. mcptube-0.1.0/tests/test_report.py +105 -0
  35. mcptube-0.1.0/tests/test_resolve.py +66 -0
  36. mcptube-0.1.0/tests/test_server_integration.py +108 -0
  37. mcptube-0.1.0/tests/test_service.py +154 -0
  38. mcptube-0.1.0/tests/test_sqllite.py +73 -0
  39. mcptube-0.1.0/tests/test_storage.py +0 -0
  40. mcptube-0.1.0/tests/test_vectorstore.py +60 -0
  41. mcptube-0.1.0/tests/test_youtube.py +176 -0
@@ -0,0 +1,41 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+ *.whl
10
+
11
+ # Virtual environment
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+
22
+ # mcptube data
23
+ .mcptube/
24
+
25
+ # ChromaDB cache
26
+ .cache/chroma/
27
+
28
+ # OS
29
+ .DS_Store
30
+ Thumbs.db
31
+
32
+ # Testing
33
+ .pytest_cache/
34
+ htmlcov/
35
+ .coverage
36
+
37
+ # Environment
38
+ .env
39
+
40
+ dist/
41
+ *.egg-info/
mcptube-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,461 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcptube
3
+ Version: 0.1.0
4
+ Summary: Convert any YouTube video into an AI-queryable MCP server
5
+ Author-email: 0xchamin <chmk90@gmail.com>
6
+ License-Expression: MIT
7
+ Keywords: ai,fastmcp,mcp,video,youtube
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.12
14
+ Requires-Dist: chromadb>=1.5
15
+ Requires-Dist: fastmcp<4.0,>=3.0
16
+ Requires-Dist: litellm>=1.50
17
+ Requires-Dist: pydantic-settings>=2.0
18
+ Requires-Dist: typer>=0.9
19
+ Requires-Dist: yt-dlp>=2025.0.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
22
+ Requires-Dist: pytest>=8.0; extra == 'dev'
23
+ Requires-Dist: ruff>=0.4; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # 🎬 mcptube
27
+
28
+ **Convert any YouTube video into an AI-queryable MCP server.**
29
+
30
+ YouTube URL in β†’ searchable library β†’ ask your AI anything about any video.
31
+
32
+ mcptube extracts transcripts, metadata, and frames from YouTube videos, indexes them into a local vector database, and exposes everything as [MCP](https://modelcontextprotocol.io/) tools β€” queryable by Claude, ChatGPT, VS Code Copilot, Cursor, Gemini, and any MCP-compatible client.
33
+
34
+ ---
35
+
36
+ ## ✨ Features
37
+
38
+ - **Semantic search** across video transcripts (single video or entire library)
39
+ - **Frame extraction** at any timestamp or by natural language query
40
+ - **Auto-classification** with LLM-generated tags
41
+ - **Illustrated reports** β€” single-video or cross-video, markdown or HTML
42
+ - **Video discovery** β€” search YouTube by topic, filter and cluster results
43
+ - **Cross-video synthesis** β€” themes, agreements, and contradictions across videos
44
+ - **Dual interface** β€” full CLI + MCP server
45
+ - **Passthrough LLM** β€” MCP tools require zero API keys; the client LLM does the reasoning
46
+ - **BYOK** β€” CLI mode supports 100+ LLM providers via LiteLLM
47
+ - **Smart video resolver** β€” reference videos by ID, index, or title substring
48
+
49
+ ---
50
+
51
+ ## πŸ“‹ Prerequisites
52
+
53
+ - **Python 3.12+**
54
+ - **ffmpeg** β€” required for frame extraction
55
+ ```bash
56
+ # macOS
57
+ brew install ffmpeg
58
+
59
+ # Ubuntu/Debian
60
+ sudo apt install ffmpeg
61
+
62
+ # Windows
63
+ winget install ffmpeg
64
+ ```
65
+
66
+ ---
67
+
68
+ ## πŸš€ Installation
69
+
70
+ ```bash
71
+ pip install mcptube
72
+ ```
73
+
74
+ ### From source (development)
75
+
76
+ ```bash
77
+ git clone https://github.com/0xchamin/mcptube.git
78
+ cd mcptube
79
+ python -m venv .venv
80
+ source .venv/bin/activate
81
+ pip install -e ".[dev]"
82
+ ```
83
+
84
+ ---
85
+
86
+ ## ⚑ Quick Start
87
+
88
+ ### CLI
89
+
90
+ ```bash
91
+ # Add a video
92
+ mcptube add "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
93
+
94
+ # List your library
95
+ mcptube list
96
+
97
+ # Search across all videos
98
+ mcptube search "machine learning basics"
99
+
100
+ # Extract a frame
101
+ mcptube frame 1 120.5
102
+
103
+ # Start the MCP server
104
+ mcptube serve
105
+ ```
106
+
107
+ ### MCP Server
108
+
109
+ ```bash
110
+ # Streamable HTTP (default) β€” works with Claude Code, ChatGPT
111
+ mcptube serve
112
+
113
+ # stdio β€” works with VS Code, Claude Desktop, Cursor
114
+ mcptube serve --stdio
115
+ ```
116
+
117
+ ---
118
+
119
+ ## πŸ”§ CLI Commands
120
+
121
+ | Command | Description |
122
+ |---------|-------------|
123
+ | `mcptube add <url>` | Ingest a YouTube video |
124
+ | `mcptube list` | List all videos in the library |
125
+ | `mcptube info <query>` | Show video details (ID, index, or text) |
126
+ | `mcptube remove <query>` | Remove a video from the library |
127
+ | `mcptube search <query>` | Semantic search across transcripts |
128
+ | `mcptube frame <video> <timestamp>` | Extract a frame at a timestamp |
129
+ | `mcptube frame-query <video> <text>` | Search transcript + extract frame |
130
+ | `mcptube classify <video>` | Auto-classify with LLM tags (BYOK) |
131
+ | `mcptube report <video>` | Generate an illustrated report (BYOK) |
132
+ | `mcptube report-query <query>` | Cross-video report from search (BYOK) |
133
+ | `mcptube discover <topic>` | Search YouTube + cluster results (BYOK) |
134
+ | `mcptube synthesize-cmd <topic> -v <id>` | Cross-video synthesis (BYOK) |
135
+ | `mcptube serve` | Start MCP server (Streamable HTTP) |
136
+ | `mcptube serve --stdio` | Start MCP server (stdio) |
137
+
138
+ ### Smart Video Resolver
139
+
140
+ Commands that take a `<video>` or `<query>` argument accept:
141
+
142
+ | Input | Resolution |
143
+ |-------|-----------|
144
+ | `BpibZSMGtdY` | Exact YouTube video ID |
145
+ | `1` | Index number from `mcptube list` |
146
+ | `"prompting"` | Substring match on title or channel |
147
+
148
+ ### Search Options
149
+
150
+ ```bash
151
+ # Search all videos
152
+ mcptube search "attention mechanism"
153
+
154
+ # Search within a specific video
155
+ mcptube search "attention" --video "prompting"
156
+
157
+ # Limit results
158
+ mcptube search "attention" --limit 5
159
+ ```
160
+
161
+ ### Report Options
162
+
163
+ ```bash
164
+ # Full report for a video
165
+ mcptube report "prompting" --format html --output report.html
166
+
167
+ # Focused report
168
+ mcptube report "prompting" --focus "reasoning strategies" --output focused.html
169
+
170
+ # Cross-video report from search
171
+ mcptube report-query "prompt engineering" --format html --output multi.html
172
+
173
+ # Cross-video synthesis
174
+ mcptube synthesize-cmd "prompting" -v BpibZSMGtdY -v UPGB-hsAoVY --output synthesis.html
175
+ ```
176
+
177
+ ---
178
+
179
+ ## πŸ€– MCP Tools (13 tools)
180
+
181
+ All MCP tools use the **passthrough pattern** β€” no API key required on the server. The connected AI client (Claude, ChatGPT, Copilot) provides the LLM reasoning.
182
+
183
+ | Tool | Description |
184
+ |------|-------------|
185
+ | `add_video(url)` | Ingest a YouTube video |
186
+ | `remove_video(video_id)` | Remove from library |
187
+ | `list_videos()` | List all videos with metadata |
188
+ | `get_info(video_id)` | Full video details with transcript |
189
+ | `search(query, video_id?, limit)` | Semantic search (single or all videos) |
190
+ | `search_library(query, tags?, limit)` | Cross-library search with tag filter |
191
+ | `get_frame(video_id, timestamp)` | Extract frame at timestamp |
192
+ | `get_frame_by_query(video_id, query)` | Search + extract frame |
193
+ | `classify_video(video_id)` | Return metadata for client classification |
194
+ | `generate_report(video_id, query?)` | Return data for client report generation |
195
+ | `generate_report_from_query(query, tags?)` | Cross-video report data |
196
+ | `discover_videos(topic)` | YouTube search results |
197
+ | `synthesize(video_ids, topic)` | Cross-video synthesis data |
198
+
199
+ ---
200
+
201
+ ## πŸ”Œ MCP Client Configuration
202
+
203
+ ### Claude Code
204
+
205
+ ```bash
206
+ # Streamable HTTP (recommended)
207
+ claude mcp add --transport http --scope global mcptube http://127.0.0.1:9093/mcp
208
+ ```
209
+
210
+ > **Note:** Use `--scope global` to make mcptube available in all projects. Without it, the server is scoped to the directory where you ran the command.
211
+
212
+ Then start the server in a separate terminal:
213
+
214
+ ```bash
215
+ mcptube serve
216
+ ```
217
+
218
+ ### VS Code / Copilot Chat
219
+
220
+ Add to `.vscode/mcp.json` in your workspace:
221
+
222
+ ```json
223
+ {
224
+ "servers": {
225
+ "mcptube": {
226
+ "command": "mcptube",
227
+ "args": ["serve", "--stdio"]
228
+ }
229
+ }
230
+ }
231
+ ```
232
+
233
+ > **Note:** If VS Code can't find `mcptube`, use the full path to the executable:
234
+ > ```json
235
+ > "command": "/path/to/your/.venv/bin/mcptube"
236
+ > ```
237
+ > Or if installed globally via pip, the command should work as-is.
238
+
239
+ ### Claude Desktop
240
+
241
+ Add to `claude_desktop_config.json`:
242
+
243
+ ```json
244
+ {
245
+ "mcpServers": {
246
+ "mcptube": {
247
+ "command": "mcptube",
248
+ "args": ["serve", "--stdio"]
249
+ }
250
+ }
251
+ }
252
+ ```
253
+
254
+ ### Cursor
255
+
256
+ Add to `.cursor/mcp.json`:
257
+
258
+ ```json
259
+ {
260
+ "mcpServers": {
261
+ "mcptube": {
262
+ "command": "mcptube",
263
+ "args": ["serve", "--stdio"]
264
+ }
265
+ }
266
+ }
267
+ ```
268
+
269
+ ### ChatGPT
270
+
271
+ ```
272
+ Settings β†’ Connectors β†’ Add β†’ http://localhost:9093/mcp
273
+ ```
274
+
275
+ ### Gemini CLI
276
+
277
+ Add to `settings.json`:
278
+
279
+ ```json
280
+ {
281
+ "mcpServers": {
282
+ "mcptube": {
283
+ "command": "mcptube",
284
+ "args": ["serve", "--stdio"]
285
+ }
286
+ }
287
+ }
288
+ ```
289
+
290
+ ---
291
+
292
+ ## πŸ”‘ BYOK β€” Bring Your Own Key (CLI Mode)
293
+
294
+ CLI commands that use LLM features (classify, report, discover, synthesize) require an API key via environment variables:
295
+
296
+ ```bash
297
+ # Anthropic (Claude)
298
+ export ANTHROPIC_API_KEY="sk-ant-..."
299
+
300
+ # OpenAI
301
+ export OPENAI_API_KEY="sk-..."
302
+
303
+ # Google (Gemini)
304
+ export GOOGLE_API_KEY="AI..."
305
+ ```
306
+
307
+ mcptube auto-detects which key is available. Set a default model:
308
+
309
+ ```bash
310
+ export MCPTUBE_DEFAULT_MODEL="anthropic/claude-sonnet-4-20250514"
311
+ ```
312
+
313
+ > **Security:** Never pass API keys as CLI flags. Always use environment variables.
314
+
315
+ > **MCP mode does not need any API key** β€” the connected AI client provides the LLM.
316
+
317
+ ---
318
+
319
+ ## βš™οΈ Configuration
320
+
321
+ All settings can be overridden via `MCPTUBE_`-prefixed environment variables:
322
+
323
+ | Variable | Default | Description |
324
+ |----------|---------|-------------|
325
+ | `MCPTUBE_DATA_DIR` | `~/.mcptube` | Root directory for all data |
326
+ | `MCPTUBE_HOST` | `127.0.0.1` | Server bind host |
327
+ | `MCPTUBE_PORT` | `9093` | Server bind port |
328
+ | `MCPTUBE_DEFAULT_MODEL` | `gpt-4o` | Default LLM model for CLI |
329
+
330
+ ### Server Options
331
+
332
+ ```bash
333
+ mcptube serve # Streamable HTTP on 127.0.0.1:9093
334
+ mcptube serve --stdio # stdio transport
335
+ mcptube serve --host 0.0.0.0 --port 8080 # Custom host/port
336
+ mcptube serve --reload # Dev mode with hot-reload
337
+ ```
338
+
339
+ ---
340
+
341
+ ## πŸ—οΈ Architecture
342
+
343
+ ```
344
+ CLI (Typer) ←──────┐
345
+ β”œβ”€β”€ Service Layer (McpTubeService)
346
+ MCP Server (FastMCP) β†β”€β”˜ β”‚
347
+ β”Œβ”€β”€β”€β”€β”΄β”€β”€β”€β”€β”
348
+ Repository VectorStore
349
+ (SQLite) (ChromaDB)
350
+ β”‚
351
+ Ingestion Layer
352
+ β”œβ”€β”€ YouTubeExtractor (yt-dlp)
353
+ β”œβ”€β”€ FrameExtractor (yt-dlp + ffmpeg)
354
+ β”œβ”€β”€ LLMClient (LiteLLM β€” CLI only)
355
+ β”œβ”€β”€ ReportBuilder (CLI only)
356
+ └── VideoDiscovery (CLI only)
357
+ ```
358
+
359
+ ### LLM Strategy
360
+
361
+ | Mode | LLM | Cost |
362
+ |------|-----|------|
363
+ | **CLI** | LiteLLM (BYOK) | User's API key |
364
+ | **MCP** | Client LLM (passthrough) | Free β€” client provides reasoning |
365
+
366
+ ### Storage
367
+
368
+ | Component | Per video (~40 min) | 100 videos |
369
+ |-----------|-------------------|------------|
370
+ | SQLite (metadata + transcript) | ~200-500 KB | ~50 MB |
371
+ | ChromaDB (384-dim vectors) | ~1.5-2 MB | ~200 MB |
372
+ | **Total** | | **~250 MB** |
373
+
374
+ ChromaDB downloads the `all-MiniLM-L6-v2` embedding model (~80 MB) on first use. This is a one-time download cached at `~/.cache/chroma/`.
375
+
376
+ ---
377
+
378
+ ## πŸ› οΈ Tech Stack
379
+
380
+ - **FastMCP 3.0** β€” MCP server framework (Streamable HTTP + stdio)
381
+ - **yt-dlp** β€” YouTube extraction (transcripts, metadata, search)
382
+ - **ffmpeg** β€” On-demand frame extraction
383
+ - **ChromaDB** β€” Local vector database with built-in embeddings
384
+ - **LiteLLM** β€” Unified LLM interface (100+ providers)
385
+ - **Typer** β€” CLI framework
386
+ - **Pydantic** β€” Data models and settings
387
+ - **SQLite** β€” Library metadata storage
388
+
389
+ ---
390
+
391
+ ## πŸ§ͺ Development
392
+
393
+ ```bash
394
+ # Install dev dependencies
395
+ pip install -e ".[dev]"
396
+
397
+ # Run tests
398
+ pytest
399
+
400
+ # Run with coverage
401
+ pytest --cov=mcptube --cov-report=html
402
+
403
+ # Lint
404
+ ruff check src/
405
+
406
+ # Format
407
+ ruff format src/
408
+ ```
409
+
410
+ ---
411
+
412
+ ## πŸ“¦ Project Structure
413
+
414
+ ```
415
+ mcptube/
416
+ β”œβ”€β”€ src/mcptube/
417
+ β”‚ β”œβ”€β”€ __init__.py
418
+ β”‚ β”œβ”€β”€ cli.py # Typer CLI
419
+ β”‚ β”œβ”€β”€ server.py # FastMCP MCP server
420
+ β”‚ β”œβ”€β”€ service.py # Core business logic
421
+ β”‚ β”œβ”€β”€ models.py # Pydantic domain models
422
+ β”‚ β”œβ”€β”€ config.py # Settings (pydantic-settings)
423
+ β”‚ β”œβ”€β”€ llm.py # LiteLLM wrapper (BYOK)
424
+ β”‚ β”œβ”€β”€ report.py # ReportBuilder
425
+ β”‚ β”œβ”€β”€ discovery.py # VideoDiscovery
426
+ β”‚ β”œβ”€β”€ ingestion/
427
+ β”‚ β”‚ β”œβ”€β”€ __init__.py
428
+ β”‚ β”‚ β”œβ”€β”€ youtube.py # YouTubeExtractor
429
+ β”‚ β”‚ └── frames.py # FrameExtractor
430
+ β”‚ └── storage/
431
+ β”‚ β”œβ”€β”€ __init__.py
432
+ β”‚ β”œβ”€β”€ repository.py # Abstract VideoRepository
433
+ β”‚ β”œβ”€β”€ sqlite.py # SQLiteVideoRepository
434
+ β”‚ └── vectorstore.py # VectorStore + ChromaVectorStore
435
+ β”œβ”€β”€ tests/
436
+ β”œβ”€β”€ pyproject.toml
437
+ └── README.md
438
+ ```
439
+
440
+ ---
441
+
442
+ ## πŸ—ΊοΈ Roadmap
443
+
444
+ - [x] MVP β€” 13 MCP tools, CLI, semantic search, frames, reports
445
+ - [ ] MCP Apps β€” Interactive HTML UIs inline in chat
446
+ - [ ] Playlist / channel import
447
+ - [ ] Speaker diarization
448
+ - [ ] OCR on frames
449
+ - [ ] Auto-chaptering
450
+ - [ ] Multi-language transcripts
451
+ - [ ] SaaS tier (OAuth, pgvector, team libraries)
452
+
453
+ ---
454
+
455
+ ## πŸ“„ License
456
+
457
+ MIT
458
+
459
+ ---
460
+
461
+ Built with [FastMCP](https://gofastmcp.com) ⚑