getraw 0.1.3 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/release.yml +1 -0
- package/README.md +101 -110
- package/bun.lock +7 -0
- package/package.json +6 -4
- package/scripts/patch-youtubei.js +14 -0
- package/skills/getraw/SKILL.md +163 -0
- package/src/cli/index.ts +2 -1
- package/src/cli/options.ts +2 -2
- package/src/core/orchestrator.ts +1 -1
- package/src/extractors/base.ts +70 -1
- package/src/extractors/youtube/index.ts +105 -196
- package/src/extractors/youtube/innertube.ts +4 -4
package/README.md
CHANGED
|
@@ -1,10 +1,20 @@
|
|
|
1
1
|
# getraw
|
|
2
2
|
|
|
3
|
-
Fast media downloader CLI built natively in Bun/TypeScript.
|
|
3
|
+
Fast media downloader CLI built natively in Bun/TypeScript. A yt-dlp replacement with native JS execution.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
[](https://www.npmjs.com/package/getraw)
|
|
6
|
+
[]()
|
|
7
|
+
[]()
|
|
8
|
+
|
|
9
|
+
## Why getraw?
|
|
6
10
|
|
|
7
|
-
|
|
11
|
+
- **Native JS execution** — YouTube's player code runs natively in Bun. No external runtime needed (yt-dlp requires Deno/Node).
|
|
12
|
+
- **50ms cold startup** — Bun-powered, not Python.
|
|
13
|
+
- **30+ sites** — YouTube, Twitter, TikTok, Instagram, Reddit, Twitch, and more.
|
|
14
|
+
- **Zero API keys** — All extractors use public endpoints, guest tokens, and page scraping.
|
|
15
|
+
- **Agent-ready** — Install as an AI agent skill: `npx skills add onkits/getraw`
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
8
18
|
|
|
9
19
|
```sh
|
|
10
20
|
bun install -g getraw
|
|
@@ -13,53 +23,35 @@ bun install -g getraw
|
|
|
13
23
|
### From source
|
|
14
24
|
|
|
15
25
|
```sh
|
|
16
|
-
git clone https://github.com/
|
|
26
|
+
git clone https://github.com/onkits/getraw
|
|
17
27
|
cd getraw
|
|
18
28
|
bun install
|
|
19
29
|
```
|
|
20
30
|
|
|
21
|
-
|
|
31
|
+
### As an AI agent skill
|
|
22
32
|
|
|
23
33
|
```sh
|
|
24
|
-
|
|
34
|
+
npx skills add onkits/getraw
|
|
25
35
|
```
|
|
26
36
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```sh
|
|
30
|
-
bun run build
|
|
31
|
-
./getraw <URL>
|
|
32
|
-
```
|
|
37
|
+
Works with Claude Code, Cursor, Copilot, Codex, Windsurf, and 50+ other agents.
|
|
33
38
|
|
|
34
39
|
## Quick Start
|
|
35
40
|
|
|
36
|
-
Download a video at best quality:
|
|
37
|
-
|
|
38
41
|
```sh
|
|
42
|
+
# Download a video
|
|
39
43
|
getraw https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
Extract audio as MP3:
|
|
43
44
|
|
|
44
|
-
|
|
45
|
+
# Extract audio as MP3
|
|
45
46
|
getraw -x --audio-format mp3 https://soundcloud.com/artist/track
|
|
46
|
-
```
|
|
47
47
|
|
|
48
|
-
List
|
|
49
|
-
|
|
50
|
-
```sh
|
|
48
|
+
# List available formats
|
|
51
49
|
getraw -F https://vimeo.com/123456789
|
|
52
|
-
```
|
|
53
50
|
|
|
54
|
-
Download
|
|
51
|
+
# Download specific quality with subtitles
|
|
52
|
+
getraw -f "bestvideo[height<=1080]+bestaudio" --write-subs https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
|
55
53
|
|
|
56
|
-
|
|
57
|
-
getraw -f "bestvideo[height<=1080]+bestaudio" --write-subs --sub-langs en https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
Dump extracted metadata as JSON without downloading:
|
|
61
|
-
|
|
62
|
-
```sh
|
|
54
|
+
# Get metadata as JSON (no download)
|
|
63
55
|
getraw -j https://www.reddit.com/r/videos/comments/abc123/some_post/
|
|
64
56
|
```
|
|
65
57
|
|
|
@@ -69,97 +61,96 @@ getraw -j https://www.reddit.com/r/videos/comments/abc123/some_post/
|
|
|
69
61
|
Usage: getraw [OPTIONS] URL [URL...]
|
|
70
62
|
```
|
|
71
63
|
|
|
72
|
-
| Flag | Short |
|
|
73
|
-
|
|
74
|
-
| `--format` | `-f` |
|
|
75
|
-
| `--output` | `-o` |
|
|
76
|
-
| `--extract-audio` | `-x` |
|
|
77
|
-
| `--audio-format` | |
|
|
78
|
-
| `--
|
|
79
|
-
| `--
|
|
80
|
-
| `--
|
|
81
|
-
| `--
|
|
82
|
-
| `--
|
|
83
|
-
| `--
|
|
84
|
-
| `--
|
|
85
|
-
| `--
|
|
86
|
-
| `--
|
|
87
|
-
| `--
|
|
88
|
-
| `--
|
|
89
|
-
| `--
|
|
90
|
-
| `--
|
|
91
|
-
| `--
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
|
96
|
-
|
|
97
|
-
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
|
102
|
-
|
|
103
|
-
|
|
|
104
|
-
|
|
|
105
|
-
|
|
|
106
|
-
|
|
|
107
|
-
|
|
|
108
|
-
|
|
|
109
|
-
|
|
|
110
|
-
|
|
|
111
|
-
|
|
|
112
|
-
|
|
|
113
|
-
|
|
|
114
|
-
|
|
|
115
|
-
|
|
|
116
|
-
|
|
|
117
|
-
|
|
|
118
|
-
|
|
|
119
|
-
|
|
|
120
|
-
|
|
|
121
|
-
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
| Imgur | `imgur` | `imgur.com/<id>`, `imgur.com/a/<id>`, `imgur.com/gallery/<id>`, `i.imgur.com/*` | No |
|
|
129
|
-
| Rumble | `rumble` | `rumble.com/v*.html`, `rumble.com/embed/*` | No |
|
|
130
|
-
| Odysee | `odysee` | `odysee.com/@*:*/<slug>`, `lbry.tv/@*:*/<slug>` | No |
|
|
131
|
-
| TED | `ted` | `ted.com/talks/<slug>` | Yes |
|
|
132
|
-
| PeerTube | `peertube` | Any PeerTube instance: `<host>/videos/watch/*`, `<host>/w/*`, `<host>/videos/embed/*` | Yes |
|
|
133
|
-
| Google Drive | `google-drive` | `drive.google.com/file/d/*`, `docs.google.com/file/d/*` | No |
|
|
134
|
-
| Dropbox | `dropbox` | `dropbox.com/s/*`, `dropbox.com/sh/*`, `dropbox.com/scl/fo/*` | No |
|
|
135
|
-
| Archive.org | `archive.org` | `archive.org/details/*`, `archive.org/download/*` | No |
|
|
136
|
-
| Spotify | `spotify` | `open.spotify.com/episode/<id>` | No |
|
|
137
|
-
| Generic | `generic` | Any `http://` or `https://` URL (fallback) | No |
|
|
138
|
-
|
|
139
|
-
> Spotify: only 30-second preview audio is available without authentication. Full episode audio requires Spotify auth (not currently implemented).
|
|
140
|
-
|
|
141
|
-
See [docs/supported-sites.md](docs/supported-sites.md) for full format and URL pattern details.
|
|
64
|
+
| Flag | Short | Default | Description |
|
|
65
|
+
|------|-------|---------|-------------|
|
|
66
|
+
| `--format` | `-f` | `bv*+ba/b` | Format selection string |
|
|
67
|
+
| `--output` | `-o` | `%(title)s [%(id)s].%(ext)s` | Output filename template |
|
|
68
|
+
| `--extract-audio` | `-x` | | Extract audio only |
|
|
69
|
+
| `--audio-format` | | `mp3` | Audio format (mp3, aac, flac, wav, opus) |
|
|
70
|
+
| `--write-subs` | | | Write subtitles to file |
|
|
71
|
+
| `--sub-langs` | | `en` | Subtitle languages |
|
|
72
|
+
| `--list-formats` | `-F` | | List available formats |
|
|
73
|
+
| `--dump-json` | `-j` | | Dump info JSON to stdout |
|
|
74
|
+
| `--quiet` | `-q` | | Suppress output |
|
|
75
|
+
| `--verbose` | `-v` | | Verbose output |
|
|
76
|
+
| `--retries` | `-R` | `3` | Number of retries |
|
|
77
|
+
| `--rate-limit` | `-r` | | Rate limit in bytes/sec |
|
|
78
|
+
| `--proxy` | | | Proxy URL |
|
|
79
|
+
| `--cookies` | | | Cookie file path (Netscape format) |
|
|
80
|
+
| `--embed-thumbnail` | | | Embed thumbnail in output |
|
|
81
|
+
| `--embed-subs` | | | Embed subtitles in output |
|
|
82
|
+
| `--version` | `-V` | | Print version |
|
|
83
|
+
| `--help` | `-h` | | Show help |
|
|
84
|
+
|
|
85
|
+
## Supported Sites (30+)
|
|
86
|
+
|
|
87
|
+
| Site | URL Patterns |
|
|
88
|
+
|------|-------------|
|
|
89
|
+
| **YouTube** | youtube.com, youtu.be, shorts, live, playlists, channels |
|
|
90
|
+
| **Twitter/X** | twitter.com/\*/status/\*, x.com/\*/status/\*, Spaces |
|
|
91
|
+
| **TikTok** | tiktok.com/@\*/video/\*, vm.tiktok.com, user profiles |
|
|
92
|
+
| **Instagram** | instagram.com/p/\*, /reel/\*, /reels/ |
|
|
93
|
+
| **Reddit** | reddit.com/r/\*/comments/\*, v.redd.it, galleries |
|
|
94
|
+
| **Twitch** | VODs, clips, live streams |
|
|
95
|
+
| **Vimeo** | vimeo.com/\*, player embeds |
|
|
96
|
+
| **SoundCloud** | Tracks, playlists, albums |
|
|
97
|
+
| **Bilibili** | Videos, bangumi/anime |
|
|
98
|
+
| **Dailymotion** | Videos |
|
|
99
|
+
| **Bandcamp** | Tracks, albums |
|
|
100
|
+
| **Kick** | VODs, clips, live |
|
|
101
|
+
| **Rumble** | Videos |
|
|
102
|
+
| **TED** | Talks (with multi-language subtitles) |
|
|
103
|
+
| **Niconico** | Videos |
|
|
104
|
+
| **Streamable** | Videos |
|
|
105
|
+
| **Imgur** | Videos, GIFs, albums |
|
|
106
|
+
| **Coub** | Videos (video + audio merge) |
|
|
107
|
+
| **Odysee/LBRY** | Videos |
|
|
108
|
+
| **PeerTube** | Any instance |
|
|
109
|
+
| **Spotify** | Podcast episodes (30s preview) |
|
|
110
|
+
| **Archive.org** | Any public media |
|
|
111
|
+
| **Google Drive** | Public files |
|
|
112
|
+
| **Dropbox** | Public share links |
|
|
113
|
+
| **+ more** | Generic fallback for direct media URLs |
|
|
114
|
+
|
|
115
|
+
See [docs/supported-sites.md](docs/supported-sites.md) for full details.
|
|
116
|
+
|
|
117
|
+
## For AI Agents
|
|
118
|
+
|
|
119
|
+
getraw is designed to be used by AI agents. Key commands for automation:
|
|
142
120
|
|
|
143
|
-
|
|
121
|
+
```sh
|
|
122
|
+
# Get structured metadata
|
|
123
|
+
getraw --dump-json "URL" | jq '.title, .duration, .formats[0].url'
|
|
144
124
|
|
|
145
|
-
|
|
125
|
+
# Download transcript for summarization
|
|
126
|
+
getraw --write-subs --sub-langs en --skip-download "URL"
|
|
127
|
+
|
|
128
|
+
# Extract audio for transcription pipelines
|
|
129
|
+
getraw -x --audio-format wav -o "audio.wav" "URL"
|
|
130
|
+
|
|
131
|
+
# Batch download
|
|
132
|
+
getraw URL1 URL2 URL3
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Install as an agent skill for any compatible AI coding agent:
|
|
146
136
|
|
|
147
137
|
```sh
|
|
148
|
-
|
|
149
|
-
cd getraw
|
|
150
|
-
bun install
|
|
151
|
-
bun run build # produces ./getraw binary
|
|
138
|
+
npx skills add onkits/getraw
|
|
152
139
|
```
|
|
153
140
|
|
|
154
|
-
|
|
141
|
+
## Building from Source
|
|
155
142
|
|
|
156
143
|
```sh
|
|
157
|
-
|
|
144
|
+
git clone https://github.com/onkits/getraw
|
|
145
|
+
cd getraw
|
|
146
|
+
bun install
|
|
147
|
+
bun test # 386 tests
|
|
148
|
+
bun run build # standalone binary
|
|
158
149
|
```
|
|
159
150
|
|
|
160
151
|
## Writing a Custom Extractor
|
|
161
152
|
|
|
162
|
-
See [docs/plugin-guide.md](docs/plugin-guide.md) for the `BaseExtractor` interface and
|
|
153
|
+
See [docs/plugin-guide.md](docs/plugin-guide.md) for the `BaseExtractor` interface and examples.
|
|
163
154
|
|
|
164
155
|
## License
|
|
165
156
|
|
package/bun.lock
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
"dependencies": {
|
|
8
8
|
"hls-parser": "^0.13.6",
|
|
9
9
|
"mpd-parser": "^1.3.0",
|
|
10
|
+
"youtubei.js": "^17.0.1",
|
|
10
11
|
},
|
|
11
12
|
"devDependencies": {
|
|
12
13
|
"@types/bun": "latest",
|
|
@@ -19,6 +20,8 @@
|
|
|
19
20
|
"packages": {
|
|
20
21
|
"@babel/runtime": ["@babel/runtime@7.29.7", "", {}, "sha512-Nq8OhGWiZIZGV6hLHoyAKLLcJihP/xFeBMGJoUrxTX2psI8dCifzLhZISFb+VWS3wFMRDmCGw5R+dOySCqPLhw=="],
|
|
21
22
|
|
|
23
|
+
"@bufbuild/protobuf": ["@bufbuild/protobuf@2.12.0", "", {}, "sha512-B/XlCaFIP8LOwzo+bz5uFzATYokcwCKQcghqnlfwSmM5eX/qTkvDBnDPs+gXtX/RyjxJ4DRikECcPJbyALA8FA=="],
|
|
24
|
+
|
|
22
25
|
"@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="],
|
|
23
26
|
|
|
24
27
|
"@types/node": ["@types/node@25.9.3", "", { "dependencies": { "undici-types": ">=7.24.0 <7.24.7" } }, "sha512-603BddQMv3pUcr4U2dhujk83N2tTDVr/34wII2B6bJy6g+8WD6yUb11jszNs0gdi4PesVWl7ABt8nYMVpnLUcg=="],
|
|
@@ -35,6 +38,8 @@
|
|
|
35
38
|
|
|
36
39
|
"hls-parser": ["hls-parser@0.13.6", "", {}, "sha512-I40sl22E2muqeSTpG8kMN2dAegAhubkXPXtnsUXFwdKwZK47d1Q+XwuX32VMZ++AZU5oeQIZqAnGNHxSG1sWaw=="],
|
|
37
40
|
|
|
41
|
+
"meriyah": ["meriyah@6.1.4", "", {}, "sha512-Sz8FzjzI0kN13GK/6MVEsVzMZEPvOhnmmI1lU5+/1cGOiK3QUahntrNNtdVeihrO7t9JpoH75iMNXg6R6uWflQ=="],
|
|
42
|
+
|
|
38
43
|
"min-document": ["min-document@2.19.2", "", { "dependencies": { "dom-walk": "^0.1.0" } }, "sha512-8S5I8db/uZN8r9HSLFVWPdJCvYOejMcEC82VIzNUc6Zkklf/d1gg2psfE79/vyhWOj4+J8MtwmoOz3TmvaGu5A=="],
|
|
39
44
|
|
|
40
45
|
"mpd-parser": ["mpd-parser@1.3.1", "", { "dependencies": { "@babel/runtime": "^7.12.5", "@videojs/vhs-utils": "^4.0.0", "@xmldom/xmldom": "^0.8.3", "global": "^4.4.0" }, "bin": { "mpd-to-m3u8-json": "bin/parse.js" } }, "sha512-1FuyEWI5k2HcmhS1HkKnUAQV7yFPfXPht2DnRRGtoiiAAW+ESTbtEXIDpRkwdU+XyrQuwrIym7UkoPKsZ0SyFw=="],
|
|
@@ -46,5 +51,7 @@
|
|
|
46
51
|
"undici-types": ["undici-types@7.24.6", "", {}, "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg=="],
|
|
47
52
|
|
|
48
53
|
"url-toolkit": ["url-toolkit@2.2.5", "", {}, "sha512-mtN6xk+Nac+oyJ/PrI7tzfmomRVNFIWKUbG8jdYFt52hxbiReFAXIjYskvu64/dvuW71IcB7lV8l0HvZMac6Jg=="],
|
|
54
|
+
|
|
55
|
+
"youtubei.js": ["youtubei.js@17.0.1", "", { "dependencies": { "@bufbuild/protobuf": "^2.0.0", "meriyah": "^6.1.4" } }, "sha512-1lO4b8UqMDzE0oh2qEGzbBOd4UYRdxn/4PdpRM7BGTHxM6ddsEsKZTu90jp8V9FHVgC2h1UirQyqoqLiKwl+Zg=="],
|
|
49
56
|
}
|
|
50
57
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "getraw",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Fast media downloader CLI built natively in Bun/TypeScript",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -10,11 +10,13 @@
|
|
|
10
10
|
"dev": "bun run src/cli/index.ts",
|
|
11
11
|
"test": "bun test",
|
|
12
12
|
"build": "bun build src/cli/index.ts --compile --outfile=getraw",
|
|
13
|
-
"dashboard": "bun run tools/dashboard.ts"
|
|
13
|
+
"dashboard": "bun run tools/dashboard.ts",
|
|
14
|
+
"postinstall": "bun run scripts/patch-youtubei.js"
|
|
14
15
|
},
|
|
15
16
|
"dependencies": {
|
|
16
17
|
"hls-parser": "^0.13.6",
|
|
17
|
-
"mpd-parser": "^1.3.0"
|
|
18
|
+
"mpd-parser": "^1.3.0",
|
|
19
|
+
"youtubei.js": "^17.0.1"
|
|
18
20
|
},
|
|
19
21
|
"devDependencies": {
|
|
20
22
|
"@types/bun": "latest"
|
|
@@ -25,6 +27,6 @@
|
|
|
25
27
|
"license": "MIT",
|
|
26
28
|
"repository": {
|
|
27
29
|
"type": "git",
|
|
28
|
-
"url": "https://github.com/
|
|
30
|
+
"url": "https://github.com/onkits/getraw"
|
|
29
31
|
}
|
|
30
32
|
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Patches youtubei.js to use Bun-native JS evaluation instead of the default stub
|
|
3
|
+
import { writeFileSync } from "fs";
|
|
4
|
+
import { resolve } from "path";
|
|
5
|
+
|
|
6
|
+
const evalPath = resolve("node_modules/youtubei.js/dist/src/platform/jsruntime/default.js");
|
|
7
|
+
const evalCode = `export default async function evaluate(data) {
|
|
8
|
+
const fn = new Function(data.output);
|
|
9
|
+
return fn();
|
|
10
|
+
}
|
|
11
|
+
`;
|
|
12
|
+
|
|
13
|
+
writeFileSync(evalPath, evalCode);
|
|
14
|
+
console.log("Patched youtubei.js jsruntime for Bun-native evaluation");
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: getraw
|
|
3
|
+
description: Download videos, audio, and metadata from 30+ sites (YouTube, Twitter, TikTok, Instagram, Reddit, Twitch, Vimeo, SoundCloud, and more). Use when the user asks to download media, extract video info, get transcripts/subtitles, rip audio, or fetch metadata from a URL. Wraps the getraw CLI — a yt-dlp replacement built in Bun/TypeScript.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# getraw
|
|
7
|
+
|
|
8
|
+
Download and extract media from 30+ sites. Built in Bun/TypeScript as a yt-dlp replacement.
|
|
9
|
+
|
|
10
|
+
## Prerequisites
|
|
11
|
+
|
|
12
|
+
Requires `bun` and `getraw` installed:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
bun install -g getraw
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Optional: `ffmpeg` for audio extraction, format merging, and subtitle embedding.
|
|
19
|
+
|
|
20
|
+
## Commands
|
|
21
|
+
|
|
22
|
+
### Download a video
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
getraw "URL"
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Downloads the best available format to the current directory.
|
|
29
|
+
|
|
30
|
+
### Get metadata as JSON (no download)
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
getraw --dump-json "URL"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Returns full metadata: title, description, uploader, duration, formats, subtitles, thumbnails. Use this when you need info about a video without downloading it. Parse the JSON output for structured data.
|
|
37
|
+
|
|
38
|
+
### List available formats
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
getraw --list-formats "URL"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Shows all available quality/format options (resolution, codec, bitrate, filesize).
|
|
45
|
+
|
|
46
|
+
### Download specific format
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
getraw -f "best[height<=720]" "URL"
|
|
50
|
+
getraw -f "bestvideo+bestaudio" "URL"
|
|
51
|
+
getraw -f "bestaudio" "URL"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Format selection strings:
|
|
55
|
+
- `best` — best single file
|
|
56
|
+
- `bestvideo+bestaudio` — best video + best audio, merged by ffmpeg
|
|
57
|
+
- `bestaudio` — audio only (best quality)
|
|
58
|
+
- `best[height<=720]` — best format at 720p or below
|
|
59
|
+
- Format ID from `--list-formats` (e.g. `137+140`)
|
|
60
|
+
|
|
61
|
+
### Extract audio only
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
getraw -x "URL"
|
|
65
|
+
getraw -x --audio-format mp3 "URL"
|
|
66
|
+
getraw -x --audio-format flac "URL"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Supported audio formats: `mp3`, `aac`, `flac`, `wav`, `opus`, `vorbis`, `m4a`.
|
|
70
|
+
|
|
71
|
+
### Download subtitles
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
getraw --write-subs "URL"
|
|
75
|
+
getraw --write-subs --sub-langs "en,es" "URL"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Downloads subtitle files alongside the video. Use `--sub-langs` to specify languages.
|
|
79
|
+
|
|
80
|
+
### Custom output filename
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
getraw -o "%(title)s.%(ext)s" "URL"
|
|
84
|
+
getraw -o "%(uploader)s - %(title)s [%(id)s].%(ext)s" "URL"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Template variables: `%(title)s`, `%(id)s`, `%(ext)s`, `%(uploader)s`, `%(upload_date)s`, `%(duration)s`, `%(view_count)s`.
|
|
88
|
+
|
|
89
|
+
### Embed metadata
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
getraw --embed-thumbnail --embed-subs "URL"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Embeds thumbnail art and subtitles into the downloaded file (requires ffmpeg).
|
|
96
|
+
|
|
97
|
+
## Supported Sites
|
|
98
|
+
|
|
99
|
+
| Site | URL Pattern |
|
|
100
|
+
|------|------------|
|
|
101
|
+
| YouTube | youtube.com, youtu.be, youtube.com/shorts |
|
|
102
|
+
| Twitter/X | twitter.com/*/status/*, x.com/*/status/* |
|
|
103
|
+
| TikTok | tiktok.com/@*/video/*, vm.tiktok.com/* |
|
|
104
|
+
| Instagram | instagram.com/p/*, instagram.com/reel/* |
|
|
105
|
+
| Reddit | reddit.com/r/*/comments/*, v.redd.it/* |
|
|
106
|
+
| Twitch | twitch.tv/videos/*, twitch.tv/*/clip/* |
|
|
107
|
+
| Vimeo | vimeo.com/* |
|
|
108
|
+
| SoundCloud | soundcloud.com/*/* |
|
|
109
|
+
| Bilibili | bilibili.com/video/* |
|
|
110
|
+
| Dailymotion | dailymotion.com/video/* |
|
|
111
|
+
| Bandcamp | *.bandcamp.com/track/*, *.bandcamp.com/album/* |
|
|
112
|
+
| Rumble | rumble.com/* |
|
|
113
|
+
| TED | ted.com/talks/* |
|
|
114
|
+
| Kick | kick.com/video/*, kick.com/*/clips/* |
|
|
115
|
+
| Streamable | streamable.com/* |
|
|
116
|
+
| PeerTube | Any PeerTube instance |
|
|
117
|
+
| Archive.org | archive.org/details/* |
|
|
118
|
+
| + 13 more | Imgur, Coub, Odysee, Spotify podcasts, NHK, BBC, etc. |
|
|
119
|
+
|
|
120
|
+
## When to Use
|
|
121
|
+
|
|
122
|
+
- User says "download this video" or shares a video URL
|
|
123
|
+
- User wants video/audio metadata (`--dump-json`)
|
|
124
|
+
- User wants to extract audio from a video (`-x`)
|
|
125
|
+
- User wants subtitles or transcripts (`--write-subs`)
|
|
126
|
+
- User wants to check available qualities (`--list-formats`)
|
|
127
|
+
- User wants to save media for offline use or processing
|
|
128
|
+
|
|
129
|
+
## Common Patterns
|
|
130
|
+
|
|
131
|
+
### Get video transcript for summarization
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
getraw --write-subs --sub-langs en --skip-download "URL"
|
|
135
|
+
# Then read the .vtt or .srt file
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Download audio for TTS/transcription pipeline
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
getraw -x --audio-format wav -o "audio.wav" "URL"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Batch download from a list
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
getraw URL1 URL2 URL3
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Get metadata for multiple videos
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
for url in URL1 URL2 URL3; do
|
|
154
|
+
getraw --dump-json "$url"
|
|
155
|
+
done
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Error Handling
|
|
159
|
+
|
|
160
|
+
- If a site is unsupported, getraw returns a clear error with the URL
|
|
161
|
+
- If a format is unavailable, it falls back to the best available
|
|
162
|
+
- Network errors retry 3 times with exponential backoff
|
|
163
|
+
- Use `--verbose` for debug output, `--quiet` to suppress all output
|
package/src/cli/index.ts
CHANGED
|
@@ -3,7 +3,8 @@ import { parseArgs, printHelp } from "./options";
|
|
|
3
3
|
import { Orchestrator } from "../core/orchestrator";
|
|
4
4
|
import { logger } from "../core/logger";
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
import pkg from "../../package.json";
|
|
7
|
+
const VERSION = pkg.version;
|
|
7
8
|
|
|
8
9
|
async function main(): Promise<void> {
|
|
9
10
|
const args = process.argv.slice(2);
|
package/src/cli/options.ts
CHANGED
|
@@ -20,7 +20,7 @@ export const FLAG_DEFS: FlagDef[] = [
|
|
|
20
20
|
{ long: "--list-formats", short: "-F", description: "List available formats", type: "boolean", key: "listFormats" },
|
|
21
21
|
{ long: "--dump-json", short: "-j", description: "Dump info JSON to stdout", type: "boolean", key: "dumpJson" },
|
|
22
22
|
{ long: "--quiet", short: "-q", description: "Suppress output", type: "boolean", key: "quiet" },
|
|
23
|
-
{ long: "--verbose",
|
|
23
|
+
{ long: "--verbose", description: "Verbose output", type: "boolean", key: "verbose" },
|
|
24
24
|
{ long: "--no-progress", description: "Disable progress bar", type: "boolean", key: "noProgress" },
|
|
25
25
|
{ long: "--retries", short: "-R", description: "Number of retries", type: "number", key: "retries" },
|
|
26
26
|
{ long: "--rate-limit", short: "-r", description: "Rate limit in bytes/sec", type: "number", key: "rateLimit" },
|
|
@@ -32,7 +32,7 @@ export const FLAG_DEFS: FlagDef[] = [
|
|
|
32
32
|
{ long: "--embed-subs", description: "Embed subtitles in output", type: "boolean", key: "embedSubs" },
|
|
33
33
|
{ long: "--merge-output-format", description: "Output container for merging", type: "string", key: "mergeOutputFormat" },
|
|
34
34
|
{ long: "--ffmpeg-location", description: "Path to ffmpeg binary", type: "string", key: "ffmpegLocation" },
|
|
35
|
-
{ long: "--version", short: "-
|
|
35
|
+
{ long: "--version", short: "-v", description: "Print version", type: "boolean", key: "version" },
|
|
36
36
|
{ long: "--help", short: "-h", description: "Show help", type: "boolean", key: "help" },
|
|
37
37
|
];
|
|
38
38
|
|
package/src/core/orchestrator.ts
CHANGED
|
@@ -102,7 +102,7 @@ export class Orchestrator {
|
|
|
102
102
|
? `${filepath}.f${format.format_id}.${format.ext}`
|
|
103
103
|
: filepath;
|
|
104
104
|
|
|
105
|
-
await downloader.download(
|
|
105
|
+
await downloader.download(format.url, targetPath, {
|
|
106
106
|
headers: { ...info.http_headers, ...format.http_headers },
|
|
107
107
|
rateLimit: options.rateLimit,
|
|
108
108
|
retries: options.retries,
|
package/src/extractors/base.ts
CHANGED
|
@@ -1,9 +1,78 @@
|
|
|
1
1
|
import { BaseExtractor } from "../core/types";
|
|
2
2
|
import { GenericExtractor } from "./generic";
|
|
3
|
+
import { YouTubeExtractor } from "./youtube/index";
|
|
4
|
+
import { TwitterExtractor } from "./twitter/index";
|
|
5
|
+
import { TwitterSpacesExtractor } from "./twitter/spaces";
|
|
6
|
+
import { TikTokExtractor } from "./tiktok/index";
|
|
7
|
+
import { TikTokUserExtractor } from "./tiktok/user";
|
|
8
|
+
import { InstagramExtractor } from "./instagram/index";
|
|
9
|
+
import { InstagramReelsExtractor } from "./instagram/reels";
|
|
10
|
+
import { RedditExtractor } from "./reddit/index";
|
|
11
|
+
import { RedditGalleryExtractor } from "./reddit/gallery";
|
|
12
|
+
import { TwitchVODExtractor } from "./twitch/index";
|
|
13
|
+
import { TwitchClipExtractor } from "./twitch/clips";
|
|
14
|
+
import { TwitchLiveExtractor } from "./twitch/live";
|
|
15
|
+
import { VimeoExtractor } from "./vimeo/index";
|
|
16
|
+
import { SoundCloudExtractor } from "./soundcloud/index";
|
|
17
|
+
import { SoundCloudPlaylistExtractor } from "./soundcloud/playlist";
|
|
18
|
+
import { BilibiliExtractor } from "./bilibili/index";
|
|
19
|
+
import { BilibiliBangumiExtractor } from "./bilibili/bangumi";
|
|
20
|
+
import { KickExtractor } from "./kick/index";
|
|
21
|
+
import { KickClipsExtractor } from "./kick/clips";
|
|
22
|
+
import { KickLiveExtractor } from "./kick/live";
|
|
23
|
+
import { NiconicoExtractor } from "./niconico/index";
|
|
24
|
+
import { DailymotionExtractor } from "./dailymotion";
|
|
25
|
+
import { RumbleExtractor } from "./rumble";
|
|
26
|
+
import { BandcampExtractor } from "./bandcamp";
|
|
27
|
+
import { SpotifyExtractor } from "./spotify";
|
|
28
|
+
import { PeerTubeExtractor } from "./peertube";
|
|
29
|
+
import { OdyseeExtractor } from "./odysee";
|
|
30
|
+
import { StreamableExtractor } from "./streamable";
|
|
31
|
+
import { ImgurExtractor } from "./imgur";
|
|
32
|
+
import { CoubExtractor } from "./coub";
|
|
33
|
+
import { TEDExtractor } from "./ted";
|
|
34
|
+
import { ArchiveOrgExtractor } from "./archive-org";
|
|
35
|
+
import { DropboxExtractor } from "./dropbox";
|
|
36
|
+
import { GoogleDriveExtractor } from "./google-drive";
|
|
3
37
|
|
|
4
38
|
export { BaseExtractor };
|
|
5
39
|
|
|
6
|
-
const extractors: BaseExtractor[] = [
|
|
40
|
+
const extractors: BaseExtractor[] = [
|
|
41
|
+
new YouTubeExtractor(),
|
|
42
|
+
new TwitterExtractor(),
|
|
43
|
+
new TwitterSpacesExtractor(),
|
|
44
|
+
new TikTokExtractor(),
|
|
45
|
+
new TikTokUserExtractor(),
|
|
46
|
+
new InstagramExtractor(),
|
|
47
|
+
new InstagramReelsExtractor(),
|
|
48
|
+
new RedditExtractor(),
|
|
49
|
+
new RedditGalleryExtractor(),
|
|
50
|
+
new TwitchVODExtractor(),
|
|
51
|
+
new TwitchClipExtractor(),
|
|
52
|
+
new TwitchLiveExtractor(),
|
|
53
|
+
new VimeoExtractor(),
|
|
54
|
+
new SoundCloudExtractor(),
|
|
55
|
+
new SoundCloudPlaylistExtractor(),
|
|
56
|
+
new BilibiliExtractor(),
|
|
57
|
+
new BilibiliBangumiExtractor(),
|
|
58
|
+
new KickExtractor(),
|
|
59
|
+
new KickClipsExtractor(),
|
|
60
|
+
new KickLiveExtractor(),
|
|
61
|
+
new NiconicoExtractor(),
|
|
62
|
+
new DailymotionExtractor(),
|
|
63
|
+
new RumbleExtractor(),
|
|
64
|
+
new BandcampExtractor(),
|
|
65
|
+
new SpotifyExtractor(),
|
|
66
|
+
new PeerTubeExtractor(),
|
|
67
|
+
new OdyseeExtractor(),
|
|
68
|
+
new StreamableExtractor(),
|
|
69
|
+
new ImgurExtractor(),
|
|
70
|
+
new CoubExtractor(),
|
|
71
|
+
new TEDExtractor(),
|
|
72
|
+
new ArchiveOrgExtractor(),
|
|
73
|
+
new DropboxExtractor(),
|
|
74
|
+
new GoogleDriveExtractor(),
|
|
75
|
+
];
|
|
7
76
|
const genericExtractor = new GenericExtractor();
|
|
8
77
|
|
|
9
78
|
export function registerExtractor(extractor: BaseExtractor): void {
|
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
import { BaseExtractor, ExtractorError } from "../../core/types";
|
|
2
2
|
import type { InfoDict, Format, Thumbnail } from "../../core/types";
|
|
3
|
-
import { InnerTubeClient } from "./innertube";
|
|
4
|
-
import type { PlayerResponse, VideoDetails, StreamingData } from "./innertube";
|
|
5
|
-
import { fetchPlayerJs, decipherSignatureUrl, clearCache as clearSigCache } from "./signature";
|
|
6
|
-
import { transformNsig, clearNsigCache } from "./nsig";
|
|
7
3
|
import { parseCaptionTracks } from "./captions";
|
|
8
|
-
import { PlaylistExtractor } from "./playlist";
|
|
9
4
|
|
|
10
5
|
const VALID_URL = /^https?:\/\/(?:(?:www|m|music)\.)?(?:youtube\.com\/(?:watch\?.*v=|shorts\/|live\/|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
|
|
11
6
|
const PLAYLIST_URL = /^https?:\/\/(?:(?:www|m|music)\.)?youtube\.com\/playlist\?.*list=([a-zA-Z0-9_-]+)/;
|
|
12
7
|
const CHANNEL_URL = /^https?:\/\/(?:(?:www|m|music)\.)?youtube\.com\/(?:channel\/|@)([a-zA-Z0-9_-]+)/;
|
|
13
8
|
|
|
14
|
-
|
|
9
|
+
function generateCpn(): string {
|
|
10
|
+
const chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_";
|
|
11
|
+
return Array.from({ length: 16 }, () => chars[Math.floor(Math.random() * 64)]).join("");
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
let _innertube: Awaited<ReturnType<typeof createInnertube>> | null = null;
|
|
15
|
+
|
|
16
|
+
async function createInnertube() {
|
|
17
|
+
const { Innertube } = await import("youtubei.js");
|
|
18
|
+
return Innertube.create({ generate_session_locally: true });
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async function getInnertube() {
|
|
22
|
+
if (!_innertube) {
|
|
23
|
+
_innertube = await createInnertube();
|
|
24
|
+
}
|
|
25
|
+
return _innertube;
|
|
26
|
+
}
|
|
15
27
|
|
|
16
28
|
export class YouTubeExtractor extends BaseExtractor {
|
|
17
29
|
readonly _VALID_URL = new RegExp(
|
|
@@ -19,234 +31,131 @@ export class YouTubeExtractor extends BaseExtractor {
|
|
|
19
31
|
);
|
|
20
32
|
readonly _NAME = "youtube";
|
|
21
33
|
|
|
22
|
-
private playlistExtractor = new PlaylistExtractor();
|
|
23
|
-
|
|
24
34
|
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
25
|
-
const playlistMatch = url.match(PLAYLIST_URL);
|
|
26
|
-
if (playlistMatch) {
|
|
27
|
-
return this.playlistExtractor.extractPlaylist(playlistMatch[1]);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
const channelMatch = url.match(CHANNEL_URL);
|
|
31
|
-
if (channelMatch && !url.match(VALID_URL)) {
|
|
32
|
-
return this.playlistExtractor.extractChannelVideos(channelMatch[1]);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
35
|
const videoMatch = url.match(VALID_URL);
|
|
36
36
|
if (!videoMatch) {
|
|
37
|
-
throw new ExtractorError(`
|
|
37
|
+
throw new ExtractorError(`Unsupported YouTube URL: ${url}`);
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
return this.extractVideo(videoMatch[1]);
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
private async extractVideo(videoId: string): Promise<InfoDict> {
|
|
44
|
-
const
|
|
45
|
-
|
|
44
|
+
const yt = await getInnertube();
|
|
45
|
+
const info = await yt.getInfo(videoId);
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
playerResponse = await this.tryAgeGateBypass(videoId, playerResponse);
|
|
47
|
+
if (!info.basic_info.title) {
|
|
48
|
+
throw new ExtractorError("Could not extract video info");
|
|
50
49
|
}
|
|
51
50
|
|
|
52
|
-
|
|
53
|
-
throw new ExtractorError(
|
|
54
|
-
playerResponse.playabilityStatus.reason ?? "Video unavailable"
|
|
55
|
-
);
|
|
56
|
-
}
|
|
51
|
+
const formats = await this.extractFormats(info, yt);
|
|
57
52
|
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
53
|
+
const thumbnails: Thumbnail[] = (info.basic_info.thumbnail ?? []).map((t: { url: string; width: number; height: number }) => ({
|
|
54
|
+
url: t.url,
|
|
55
|
+
width: t.width,
|
|
56
|
+
height: t.height,
|
|
57
|
+
}));
|
|
62
58
|
|
|
63
|
-
|
|
59
|
+
const result: InfoDict = {
|
|
60
|
+
id: videoId,
|
|
61
|
+
title: info.basic_info.title,
|
|
62
|
+
formats,
|
|
63
|
+
thumbnails,
|
|
64
|
+
description: info.basic_info.short_description,
|
|
65
|
+
channel: info.basic_info.author,
|
|
66
|
+
channel_id: info.basic_info.channel_id,
|
|
67
|
+
duration: info.basic_info.duration,
|
|
68
|
+
view_count: info.basic_info.view_count,
|
|
69
|
+
webpage_url: `https://www.youtube.com/watch?v=${videoId}`,
|
|
70
|
+
live_status: info.basic_info.is_live ? "is_live" : "not_live",
|
|
71
|
+
};
|
|
64
72
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
73
|
+
// Extract captions from page response
|
|
74
|
+
const pageResponse = await this.fetchPagePlayerResponse(videoId);
|
|
75
|
+
if (pageResponse) {
|
|
76
|
+
const captionTracks = pageResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
|
|
77
|
+
if (captionTracks?.length) {
|
|
78
|
+
const { subtitles, automatic_captions } = parseCaptionTracks(captionTracks);
|
|
79
|
+
result.subtitles = subtitles;
|
|
80
|
+
result.automatic_captions = automatic_captions;
|
|
70
81
|
}
|
|
71
82
|
}
|
|
72
83
|
|
|
73
|
-
|
|
74
|
-
return info;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
private async tryAgeGateBypass(
|
|
78
|
-
videoId: string,
|
|
79
|
-
originalResponse: PlayerResponse,
|
|
80
|
-
): Promise<PlayerResponse> {
|
|
81
|
-
const tvClient = InnerTubeClient.withClient("TVHTML5_EMBED");
|
|
82
|
-
const embedUrl = `https://www.youtube.com/embed/${videoId}`;
|
|
83
|
-
const tvResponse = await tvClient.getPlayerResponse(videoId, embedUrl);
|
|
84
|
-
|
|
85
|
-
if (tvResponse.playabilityStatus?.status === "OK" && tvResponse.streamingData) {
|
|
86
|
-
return {
|
|
87
|
-
...tvResponse,
|
|
88
|
-
videoDetails: originalResponse.videoDetails ?? tvResponse.videoDetails,
|
|
89
|
-
captions: originalResponse.captions ?? tvResponse.captions,
|
|
90
|
-
microformat: originalResponse.microformat ?? tvResponse.microformat,
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
return originalResponse;
|
|
84
|
+
return result;
|
|
95
85
|
}
|
|
96
86
|
|
|
97
|
-
private async extractFormats(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
): Promise<Format[]> {
|
|
102
|
-
const streamingData = playerResponse.streamingData;
|
|
103
|
-
if (!streamingData) return [];
|
|
104
|
-
|
|
105
|
-
let formats = client.parseFormats(streamingData);
|
|
106
|
-
|
|
107
|
-
const needsDecipher = this.formatsNeedDecipher(streamingData);
|
|
108
|
-
if (needsDecipher) {
|
|
109
|
-
formats = await this.decipherFormats(formats, streamingData, videoId);
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
return formats;
|
|
113
|
-
}
|
|
87
|
+
private async extractFormats(info: { streaming_data?: { formats?: unknown[]; adaptive_formats?: unknown[] }; chooseFormat: (opts: { type: string; quality: string }) => unknown }, yt: { session: { player: unknown } }): Promise<Format[]> {
|
|
88
|
+
const formats: Format[] = [];
|
|
89
|
+
const player = yt.session.player;
|
|
90
|
+
const cpn = generateCpn();
|
|
114
91
|
|
|
115
|
-
private formatsNeedDecipher(streamingData: StreamingData): boolean {
|
|
116
92
|
const allFormats = [
|
|
117
|
-
...(
|
|
118
|
-
...(
|
|
93
|
+
...(info.streaming_data?.formats ?? []),
|
|
94
|
+
...(info.streaming_data?.adaptive_formats ?? []),
|
|
119
95
|
];
|
|
120
|
-
return allFormats.some((f) => f.signatureCipher && !f.url);
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
private async decipherFormats(
|
|
124
|
-
formats: Format[],
|
|
125
|
-
streamingData: StreamingData,
|
|
126
|
-
videoId: string,
|
|
127
|
-
): Promise<Format[]> {
|
|
128
|
-
const playerJsUrl = await this.getPlayerJsUrl(videoId);
|
|
129
|
-
if (!playerJsUrl) return formats;
|
|
130
|
-
|
|
131
|
-
const playerJs = await fetchPlayerJs(playerJsUrl);
|
|
132
|
-
|
|
133
|
-
const allRaw = [
|
|
134
|
-
...(streamingData.formats ?? []),
|
|
135
|
-
...(streamingData.adaptiveFormats ?? []),
|
|
136
|
-
];
|
|
137
|
-
|
|
138
|
-
for (let i = 0; i < formats.length; i++) {
|
|
139
|
-
const raw = allRaw[i];
|
|
140
|
-
if (!raw) continue;
|
|
141
96
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
97
|
+
for (const raw of allFormats) {
|
|
98
|
+
const f = raw as Record<string, unknown>;
|
|
99
|
+
try {
|
|
100
|
+
let url: string | undefined;
|
|
101
|
+
|
|
102
|
+
if (typeof (f as { decipher?: unknown }).decipher === "function") {
|
|
103
|
+
const deciphered = await (f as { decipher: (p: unknown) => Promise<unknown> }).decipher(player);
|
|
104
|
+
if (typeof deciphered === "string") {
|
|
105
|
+
const parsed = new URL(deciphered);
|
|
106
|
+
parsed.searchParams.set("cpn", cpn);
|
|
107
|
+
url = parsed.toString();
|
|
108
|
+
}
|
|
147
109
|
}
|
|
148
|
-
}
|
|
149
110
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
111
|
+
if (!url) continue;
|
|
112
|
+
|
|
113
|
+
const mime = String(f.mime_type ?? "");
|
|
114
|
+
const mimeMatch = mime.match(/^(video|audio)\/(\w+);\s*codecs="([^"]+)"/);
|
|
115
|
+
const ext = mimeMatch?.[2] ?? "mp4";
|
|
116
|
+
const codecs = mimeMatch?.[3] ?? "";
|
|
117
|
+
const isVideo = mime.startsWith("video");
|
|
118
|
+
const isAudio = mime.startsWith("audio");
|
|
119
|
+
|
|
120
|
+
formats.push({
|
|
121
|
+
format_id: String(f.itag ?? ""),
|
|
122
|
+
url,
|
|
123
|
+
ext,
|
|
124
|
+
vcodec: isVideo ? codecs.split(",")[0]?.trim() : "none",
|
|
125
|
+
acodec: isAudio ? codecs : (isVideo && codecs.includes(",") ? codecs.split(",")[1]?.trim() : undefined),
|
|
126
|
+
width: (f.width as number) ?? undefined,
|
|
127
|
+
height: (f.height as number) ?? undefined,
|
|
128
|
+
fps: (f.fps as number) ?? undefined,
|
|
129
|
+
tbr: f.bitrate ? Math.round((f.bitrate as number) / 1000) : undefined,
|
|
130
|
+
filesize: f.content_length ? parseInt(String(f.content_length), 10) : undefined,
|
|
131
|
+
format_note: String(f.quality_label ?? f.quality ?? ""),
|
|
132
|
+
audio_channels: (f.audio_channels as number) ?? undefined,
|
|
133
|
+
http_headers: {
|
|
134
|
+
"Origin": "https://www.youtube.com",
|
|
135
|
+
"Referer": "https://www.youtube.com/",
|
|
136
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
} catch {
|
|
140
|
+
continue;
|
|
156
141
|
}
|
|
157
142
|
}
|
|
158
143
|
|
|
159
144
|
return formats;
|
|
160
145
|
}
|
|
161
146
|
|
|
162
|
-
private async
|
|
163
|
-
const watchUrl = `https://www.youtube.com/watch?v=${videoId}`;
|
|
147
|
+
private async fetchPagePlayerResponse(videoId: string): Promise<Record<string, unknown> | null> {
|
|
164
148
|
try {
|
|
165
|
-
const
|
|
149
|
+
const resp = await fetch(`https://www.youtube.com/watch?v=${videoId}`, {
|
|
166
150
|
headers: {
|
|
167
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
151
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
168
152
|
},
|
|
169
153
|
});
|
|
170
|
-
const html = await
|
|
171
|
-
const match = html.match(
|
|
172
|
-
return match ?
|
|
154
|
+
const html = await resp.text();
|
|
155
|
+
const match = html.match(/var\s+ytInitialPlayerResponse\s*=\s*(\{.+?\});/s);
|
|
156
|
+
return match ? JSON.parse(match[1]) : null;
|
|
173
157
|
} catch {
|
|
174
158
|
return null;
|
|
175
159
|
}
|
|
176
160
|
}
|
|
177
|
-
|
|
178
|
-
private buildInfoDict(
|
|
179
|
-
videoId: string,
|
|
180
|
-
details: VideoDetails,
|
|
181
|
-
response: PlayerResponse,
|
|
182
|
-
formats: Format[],
|
|
183
|
-
): InfoDict {
|
|
184
|
-
const microformat = response.microformat?.playerMicroformatRenderer;
|
|
185
|
-
|
|
186
|
-
const thumbnails: Thumbnail[] = (details.thumbnail?.thumbnails ?? []).map((t) => ({
|
|
187
|
-
url: t.url,
|
|
188
|
-
width: t.width,
|
|
189
|
-
height: t.height,
|
|
190
|
-
}));
|
|
191
|
-
|
|
192
|
-
const liveStatus = this.getLiveStatus(details, response);
|
|
193
|
-
|
|
194
|
-
const info: InfoDict = {
|
|
195
|
-
id: videoId,
|
|
196
|
-
title: details.title,
|
|
197
|
-
formats,
|
|
198
|
-
thumbnails,
|
|
199
|
-
description: details.shortDescription ?? microformat?.description?.simpleText,
|
|
200
|
-
channel: details.author,
|
|
201
|
-
channel_id: details.channelId,
|
|
202
|
-
channel_url: `https://www.youtube.com/channel/${details.channelId}`,
|
|
203
|
-
uploader: details.author,
|
|
204
|
-
uploader_id: details.channelId,
|
|
205
|
-
uploader_url: microformat?.ownerProfileUrl,
|
|
206
|
-
duration: parseInt(details.lengthSeconds, 10) || undefined,
|
|
207
|
-
view_count: parseInt(details.viewCount, 10) || undefined,
|
|
208
|
-
upload_date: microformat?.uploadDate?.replace(/-/g, ""),
|
|
209
|
-
live_status: liveStatus,
|
|
210
|
-
webpage_url: `https://www.youtube.com/watch?v=${videoId}`,
|
|
211
|
-
age_limit: 0,
|
|
212
|
-
categories: microformat?.category ? [microformat.category] : undefined,
|
|
213
|
-
};
|
|
214
|
-
|
|
215
|
-
if (microformat?.liveBroadcastDetails?.startTimestamp) {
|
|
216
|
-
info.release_timestamp = Math.floor(
|
|
217
|
-
new Date(microformat.liveBroadcastDetails.startTimestamp).getTime() / 1000
|
|
218
|
-
);
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
const captionTracks = response.captions?.playerCaptionsTracklistRenderer?.captionTracks;
|
|
222
|
-
if (captionTracks?.length) {
|
|
223
|
-
const { subtitles, automatic_captions } = parseCaptionTracks(captionTracks);
|
|
224
|
-
info.subtitles = subtitles;
|
|
225
|
-
info.automatic_captions = automatic_captions;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
return info;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
private getLiveStatus(
|
|
232
|
-
details: VideoDetails,
|
|
233
|
-
response: PlayerResponse,
|
|
234
|
-
): InfoDict["live_status"] {
|
|
235
|
-
if (details.isLive) return "is_live";
|
|
236
|
-
if (details.isUpcoming) return "is_upcoming";
|
|
237
|
-
if (details.isLiveContent) return "was_live";
|
|
238
|
-
if (response.playabilityStatus?.liveStreamability) return "is_live";
|
|
239
|
-
return "not_live";
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
static clearCaches(): void {
|
|
243
|
-
clearSigCache();
|
|
244
|
-
clearNsigCache();
|
|
245
|
-
}
|
|
246
161
|
}
|
|
247
|
-
|
|
248
|
-
export { InnerTubeClient } from "./innertube";
|
|
249
|
-
export { PlaylistExtractor } from "./playlist";
|
|
250
|
-
export { parseCaptionTracks, convertToSrt, convertToVtt } from "./captions";
|
|
251
|
-
export { decipherSignatureUrl, fetchPlayerJs } from "./signature";
|
|
252
|
-
export { transformNsig } from "./nsig";
|
|
@@ -104,14 +104,14 @@ export interface BrowseResponse {
|
|
|
104
104
|
const CLIENTS: Record<string, ClientContext> = {
|
|
105
105
|
WEB: {
|
|
106
106
|
clientName: "WEB",
|
|
107
|
-
clientVersion: "2.
|
|
108
|
-
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
107
|
+
clientVersion: "2.20250615.01.00",
|
|
108
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
109
109
|
apiKey: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
|
|
110
110
|
},
|
|
111
111
|
ANDROID: {
|
|
112
112
|
clientName: "ANDROID",
|
|
113
|
-
clientVersion: "19.
|
|
114
|
-
userAgent: "com.google.android.youtube/19.
|
|
113
|
+
clientVersion: "19.44.38",
|
|
114
|
+
userAgent: "com.google.android.youtube/19.44.38 (Linux; U; Android 14) gzip",
|
|
115
115
|
apiKey: "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w",
|
|
116
116
|
clientId: 3,
|
|
117
117
|
},
|