getraw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +4 -0
- package/CLAUDE.md +57 -0
- package/README.md +166 -0
- package/RESEARCH.md +109 -0
- package/STATUS.md +23 -0
- package/bun.lock +50 -0
- package/bunfig.toml +3 -0
- package/docs/plugin-guide.md +166 -0
- package/docs/supported-sites.md +41 -0
- package/package.json +30 -0
- package/src/cli/index.ts +52 -0
- package/src/cli/options.ts +97 -0
- package/src/core/format-sorter.ts +208 -0
- package/src/core/logger.ts +101 -0
- package/src/core/orchestrator.ts +140 -0
- package/src/core/output-template.ts +58 -0
- package/src/core/types.ts +237 -0
- package/src/downloaders/base.ts +25 -0
- package/src/downloaders/dash.ts +287 -0
- package/src/downloaders/fragment.ts +226 -0
- package/src/downloaders/hls.ts +170 -0
- package/src/downloaders/http.ts +260 -0
- package/src/extractors/archive-org.ts +126 -0
- package/src/extractors/bandcamp.ts +130 -0
- package/src/extractors/base.ts +29 -0
- package/src/extractors/bilibili/bangumi.ts +205 -0
- package/src/extractors/bilibili/index.ts +233 -0
- package/src/extractors/bilibili/wbi.ts +60 -0
- package/src/extractors/coub.ts +137 -0
- package/src/extractors/dailymotion.ts +99 -0
- package/src/extractors/dropbox.ts +52 -0
- package/src/extractors/generic.ts +118 -0
- package/src/extractors/google-drive.ts +106 -0
- package/src/extractors/imgur.ts +156 -0
- package/src/extractors/instagram/index.ts +263 -0
- package/src/extractors/instagram/reels.ts +166 -0
- package/src/extractors/kick/clips.ts +91 -0
- package/src/extractors/kick/index.ts +118 -0
- package/src/extractors/kick/live.ts +89 -0
- package/src/extractors/niconico/index.ts +209 -0
- package/src/extractors/odysee.ts +126 -0
- package/src/extractors/peertube.ts +143 -0
- package/src/extractors/reddit/gallery.ts +124 -0
- package/src/extractors/reddit/index.ts +203 -0
- package/src/extractors/rumble.ts +127 -0
- package/src/extractors/soundcloud/index.ts +161 -0
- package/src/extractors/soundcloud/playlist.ts +129 -0
- package/src/extractors/spotify.ts +97 -0
- package/src/extractors/streamable.ts +121 -0
- package/src/extractors/ted.ts +151 -0
- package/src/extractors/tiktok/index.ts +207 -0
- package/src/extractors/tiktok/user.ts +176 -0
- package/src/extractors/twitch/clips.ts +125 -0
- package/src/extractors/twitch/index.ts +136 -0
- package/src/extractors/twitch/live.ts +132 -0
- package/src/extractors/twitter/index.ts +140 -0
- package/src/extractors/twitter/spaces.ts +200 -0
- package/src/extractors/vimeo/index.ts +187 -0
- package/src/extractors/youtube/captions.ts +111 -0
- package/src/extractors/youtube/index.ts +252 -0
- package/src/extractors/youtube/innertube.ts +364 -0
- package/src/extractors/youtube/nsig.ts +105 -0
- package/src/extractors/youtube/playlist.ts +227 -0
- package/src/extractors/youtube/signature.ts +163 -0
- package/src/networking/client.ts +311 -0
- package/src/networking/cookies.ts +138 -0
- package/src/networking/proxy.ts +132 -0
- package/src/networking/tls.ts +67 -0
- package/src/networking/user-agents.ts +88 -0
- package/src/postprocessors/base.ts +44 -0
- package/src/postprocessors/extract-audio.ts +98 -0
- package/src/postprocessors/ffmpeg.ts +146 -0
- package/src/postprocessors/merge.ts +102 -0
- package/src/postprocessors/metadata.ts +73 -0
- package/src/postprocessors/sponsorblock.ts +162 -0
- package/src/postprocessors/subtitles.ts +285 -0
- package/src/postprocessors/thumbnails.ts +194 -0
- package/src/utils/sanitize.ts +36 -0
- package/src/utils/traverse.ts +68 -0
- package/tests/core/format-sorter.test.ts +96 -0
- package/tests/core/output-template.test.ts +56 -0
- package/tests/core/types.test.ts +79 -0
- package/tests/unit/downloaders/dash.test.ts +57 -0
- package/tests/unit/downloaders/hls.test.ts +120 -0
- package/tests/unit/downloaders/http.test.ts +114 -0
- package/tests/unit/extractors/bilibili.test.ts +83 -0
- package/tests/unit/extractors/instagram.test.ts +273 -0
- package/tests/unit/extractors/kick.test.ts +85 -0
- package/tests/unit/extractors/misc.test.ts +942 -0
- package/tests/unit/extractors/niconico.test.ts +61 -0
- package/tests/unit/extractors/reddit.test.ts +222 -0
- package/tests/unit/extractors/soundcloud.test.ts +299 -0
- package/tests/unit/extractors/tiktok.test.ts +260 -0
- package/tests/unit/extractors/twitch.test.ts +250 -0
- package/tests/unit/extractors/twitter.test.ts +181 -0
- package/tests/unit/extractors/vimeo.test.ts +253 -0
- package/tests/unit/extractors/youtube.test.ts +259 -0
- package/tests/unit/networking/client.test.ts +272 -0
- package/tests/unit/networking/cookies.test.ts +256 -0
- package/tests/unit/networking/proxy.test.ts +137 -0
- package/tests/unit/postprocessors/extract-audio.test.ts +63 -0
- package/tests/unit/postprocessors/merge.test.ts +61 -0
- package/tests/unit/postprocessors/subtitles.test.ts +89 -0
- package/tools/dashboard.ts +112 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../../core/types";
|
|
2
|
+
import type { InfoDict, Format, Thumbnail } from "../../core/types";
|
|
3
|
+
|
|
4
|
+
const VALID_URL = /https?:\/\/(?:www\.)?bilibili\.com\/video\/(BV[\w]+|av(\d+))/;
|
|
5
|
+
|
|
6
|
+
const QUALITY_MAP: Record<number, string> = {
|
|
7
|
+
127: "8K",
|
|
8
|
+
126: "Dolby Vision",
|
|
9
|
+
125: "HDR",
|
|
10
|
+
120: "4K",
|
|
11
|
+
116: "1080p60",
|
|
12
|
+
112: "1080p+",
|
|
13
|
+
80: "1080p",
|
|
14
|
+
64: "720p",
|
|
15
|
+
32: "480p",
|
|
16
|
+
16: "360p",
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const QUALITY_PREFERENCE: Record<number, number> = {
|
|
20
|
+
127: 10,
|
|
21
|
+
126: 9,
|
|
22
|
+
125: 8,
|
|
23
|
+
120: 7,
|
|
24
|
+
116: 6,
|
|
25
|
+
112: 5,
|
|
26
|
+
80: 4,
|
|
27
|
+
64: 3,
|
|
28
|
+
32: 2,
|
|
29
|
+
16: 1,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
interface VideoViewData {
|
|
33
|
+
code: number;
|
|
34
|
+
message: string;
|
|
35
|
+
data: {
|
|
36
|
+
bvid: string;
|
|
37
|
+
aid: number;
|
|
38
|
+
cid: number;
|
|
39
|
+
title: string;
|
|
40
|
+
desc: string;
|
|
41
|
+
owner: { name: string; mid: number };
|
|
42
|
+
stat: { view: number; like: number; coin: number; reply: number };
|
|
43
|
+
pic: string;
|
|
44
|
+
duration: number;
|
|
45
|
+
pubdate: number;
|
|
46
|
+
pages?: Array<{ cid: number; page: number; part: string; duration: number }>;
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface DashStream {
|
|
51
|
+
id: number;
|
|
52
|
+
baseUrl: string;
|
|
53
|
+
base_url: string;
|
|
54
|
+
backupUrl?: string[];
|
|
55
|
+
backup_url?: string[];
|
|
56
|
+
bandwidth: number;
|
|
57
|
+
mimeType: string;
|
|
58
|
+
mime_type: string;
|
|
59
|
+
codecs: string;
|
|
60
|
+
width?: number;
|
|
61
|
+
height?: number;
|
|
62
|
+
frameRate?: string;
|
|
63
|
+
frame_rate?: string;
|
|
64
|
+
sar?: string;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
interface PlayUrlData {
|
|
68
|
+
code: number;
|
|
69
|
+
message: string;
|
|
70
|
+
data: {
|
|
71
|
+
quality: number;
|
|
72
|
+
accept_quality: number[];
|
|
73
|
+
accept_description: string[];
|
|
74
|
+
dash?: {
|
|
75
|
+
video: DashStream[];
|
|
76
|
+
audio: DashStream[];
|
|
77
|
+
};
|
|
78
|
+
durl?: Array<{ url: string; size: number; order: number }>;
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const BILIBILI_HEADERS = {
|
|
83
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
84
|
+
Referer: "https://www.bilibili.com",
|
|
85
|
+
Origin: "https://www.bilibili.com",
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
function bvToAv(bvid: string): number {
|
|
89
|
+
const TABLE = "fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF";
|
|
90
|
+
const tr: Record<string, number> = {};
|
|
91
|
+
for (let i = 0; i < TABLE.length; i++) tr[TABLE[i]] = i;
|
|
92
|
+
const s = [11, 10, 3, 8, 4, 6];
|
|
93
|
+
const xor = 177451812;
|
|
94
|
+
const add = 8728348608;
|
|
95
|
+
let r = 0n;
|
|
96
|
+
for (let i = 0; i < 6; i++) r += BigInt(tr[bvid[s[i]]]) * 58n ** BigInt(i);
|
|
97
|
+
return Number((r - BigInt(add)) ^ BigInt(xor));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
export class BilibiliExtractor extends BaseExtractor {
|
|
101
|
+
readonly _VALID_URL = VALID_URL;
|
|
102
|
+
readonly _NAME = "bilibili";
|
|
103
|
+
|
|
104
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
105
|
+
const match = VALID_URL.exec(url);
|
|
106
|
+
if (!match) throw new ExtractorError(`bilibili: invalid URL: ${url}`);
|
|
107
|
+
|
|
108
|
+
const idPart = match[1];
|
|
109
|
+
let bvid: string;
|
|
110
|
+
let aid: number;
|
|
111
|
+
|
|
112
|
+
if (idPart.startsWith("BV")) {
|
|
113
|
+
bvid = idPart;
|
|
114
|
+
aid = bvToAv(bvid);
|
|
115
|
+
} else {
|
|
116
|
+
aid = parseInt(match[2], 10);
|
|
117
|
+
bvid = `av${aid}`;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const viewUrl = `https://api.bilibili.com/x/web-interface/view?${idPart.startsWith("BV") ? "bvid=" + bvid : "aid=" + aid}`;
|
|
121
|
+
const viewResp = await fetch(viewUrl, { headers: BILIBILI_HEADERS });
|
|
122
|
+
if (!viewResp.ok) throw new ExtractorError(`bilibili: view API failed: ${viewResp.status}`);
|
|
123
|
+
|
|
124
|
+
const viewData = (await viewResp.json()) as VideoViewData;
|
|
125
|
+
if (viewData.code !== 0) throw new ExtractorError(`bilibili: ${viewData.message}`);
|
|
126
|
+
|
|
127
|
+
const video = viewData.data;
|
|
128
|
+
const cid = video.cid;
|
|
129
|
+
|
|
130
|
+
const playParams = new URLSearchParams({
|
|
131
|
+
bvid: video.bvid,
|
|
132
|
+
cid: String(cid),
|
|
133
|
+
qn: "127",
|
|
134
|
+
fnval: "4048",
|
|
135
|
+
fnver: "0",
|
|
136
|
+
fourk: "1",
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
const playResp = await fetch(`https://api.bilibili.com/x/player/playurl?${playParams}`, {
|
|
140
|
+
headers: BILIBILI_HEADERS,
|
|
141
|
+
});
|
|
142
|
+
if (!playResp.ok) throw new ExtractorError(`bilibili: playurl API failed: ${playResp.status}`);
|
|
143
|
+
|
|
144
|
+
const playData = (await playResp.json()) as PlayUrlData;
|
|
145
|
+
if (playData.code !== 0) throw new ExtractorError(`bilibili: playurl: ${playData.message}`);
|
|
146
|
+
|
|
147
|
+
const formats: Format[] = [];
|
|
148
|
+
|
|
149
|
+
if (playData.data.dash) {
|
|
150
|
+
const { video: videoStreams, audio: audioStreams } = playData.data.dash;
|
|
151
|
+
|
|
152
|
+
const bestAudio = audioStreams.reduce<DashStream | null>((best, a) => {
|
|
153
|
+
if (!best || a.bandwidth > best.bandwidth) return a;
|
|
154
|
+
return best;
|
|
155
|
+
}, null);
|
|
156
|
+
|
|
157
|
+
for (const vs of videoStreams) {
|
|
158
|
+
const streamUrl = vs.baseUrl || vs.base_url;
|
|
159
|
+
const qualityNote = QUALITY_MAP[vs.id] ?? `qn${vs.id}`;
|
|
160
|
+
const mime = vs.mimeType || vs.mime_type;
|
|
161
|
+
const ext = mime.includes("mp4") ? "mp4" : "webm";
|
|
162
|
+
const fps = vs.frameRate || vs.frame_rate ? parseFloat(vs.frameRate ?? vs.frame_rate ?? "0") : undefined;
|
|
163
|
+
|
|
164
|
+
formats.push({
|
|
165
|
+
format_id: `dash-video-${vs.id}`,
|
|
166
|
+
url: streamUrl,
|
|
167
|
+
ext,
|
|
168
|
+
vcodec: vs.codecs,
|
|
169
|
+
acodec: "none",
|
|
170
|
+
width: vs.width,
|
|
171
|
+
height: vs.height,
|
|
172
|
+
fps,
|
|
173
|
+
tbr: Math.round(vs.bandwidth / 1000),
|
|
174
|
+
format_note: qualityNote,
|
|
175
|
+
quality: QUALITY_PREFERENCE[vs.id] ?? 0,
|
|
176
|
+
protocol: "https",
|
|
177
|
+
http_headers: BILIBILI_HEADERS,
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
if (bestAudio) {
|
|
182
|
+
const audioUrl = bestAudio.baseUrl || bestAudio.base_url;
|
|
183
|
+
const mime = bestAudio.mimeType || bestAudio.mime_type;
|
|
184
|
+
const ext = mime.includes("mp4") ? "m4a" : "ogg";
|
|
185
|
+
formats.push({
|
|
186
|
+
format_id: "dash-audio-best",
|
|
187
|
+
url: audioUrl,
|
|
188
|
+
ext,
|
|
189
|
+
vcodec: "none",
|
|
190
|
+
acodec: bestAudio.codecs,
|
|
191
|
+
abr: Math.round(bestAudio.bandwidth / 1000),
|
|
192
|
+
format_note: "audio",
|
|
193
|
+
quality: 0,
|
|
194
|
+
protocol: "https",
|
|
195
|
+
http_headers: BILIBILI_HEADERS,
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
} else if (playData.data.durl) {
|
|
199
|
+
for (const [i, seg] of playData.data.durl.entries()) {
|
|
200
|
+
formats.push({
|
|
201
|
+
format_id: `flv-${i}`,
|
|
202
|
+
url: seg.url,
|
|
203
|
+
ext: "flv",
|
|
204
|
+
filesize: seg.size,
|
|
205
|
+
quality: QUALITY_PREFERENCE[playData.data.quality] ?? 0,
|
|
206
|
+
format_note: QUALITY_MAP[playData.data.quality] ?? `qn${playData.data.quality}`,
|
|
207
|
+
http_headers: BILIBILI_HEADERS,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const uploadDate = new Date(video.pubdate * 1000).toISOString().slice(0, 10).replace(/-/g, "");
|
|
213
|
+
const thumbnails: Thumbnail[] = [{ url: video.pic }];
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
id: video.bvid,
|
|
217
|
+
title: video.title,
|
|
218
|
+
description: video.desc,
|
|
219
|
+
uploader: video.owner.name,
|
|
220
|
+
uploader_id: String(video.owner.mid),
|
|
221
|
+
uploader_url: `https://space.bilibili.com/${video.owner.mid}`,
|
|
222
|
+
duration: video.duration,
|
|
223
|
+
view_count: video.stat.view,
|
|
224
|
+
like_count: video.stat.like,
|
|
225
|
+
upload_date: uploadDate,
|
|
226
|
+
timestamp: video.pubdate,
|
|
227
|
+
thumbnails,
|
|
228
|
+
formats,
|
|
229
|
+
webpage_url: url,
|
|
230
|
+
_type: "video",
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
|
|
3
|
+
const MIXIN_KEY_ENC_TAB = [
|
|
4
|
+
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29,
|
|
5
|
+
28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25,
|
|
6
|
+
54, 21, 56, 59, 6, 63, 57, 62, 11, 36, 20, 34, 44, 52,
|
|
7
|
+
];
|
|
8
|
+
|
|
9
|
+
interface NavData {
|
|
10
|
+
data: {
|
|
11
|
+
wbi_img: {
|
|
12
|
+
img_url: string;
|
|
13
|
+
sub_url: string;
|
|
14
|
+
};
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function extractKey(url: string): string {
|
|
19
|
+
return url.split("/").pop()?.replace(/\.\w+$/, "") ?? "";
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function getMixinKey(imgKey: string, subKey: string): string {
|
|
23
|
+
const raw = imgKey + subKey;
|
|
24
|
+
return MIXIN_KEY_ENC_TAB.map((i) => raw[i]).join("").slice(0, 32);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function filterNonPrintable(str: string): string {
|
|
28
|
+
return str.replace(/[!'"()\\s]/g, "");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export async function fetchMixinKey(): Promise<string> {
|
|
32
|
+
const resp = await fetch("https://api.bilibili.com/x/web-interface/nav", {
|
|
33
|
+
headers: {
|
|
34
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
35
|
+
Referer: "https://www.bilibili.com",
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
if (!resp.ok) throw new Error(`wbi: nav request failed: ${resp.status}`);
|
|
39
|
+
const data = (await resp.json()) as NavData;
|
|
40
|
+
const imgKey = extractKey(data.data.wbi_img.img_url);
|
|
41
|
+
const subKey = extractKey(data.data.wbi_img.sub_url);
|
|
42
|
+
return getMixinKey(imgKey, subKey);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function signWbi(params: Record<string, string | number>, mixinKey: string): Record<string, string> {
|
|
46
|
+
const wts = Math.floor(Date.now() / 1000);
|
|
47
|
+
const signed: Record<string, string> = {};
|
|
48
|
+
for (const [k, v] of Object.entries(params)) {
|
|
49
|
+
signed[k] = filterNonPrintable(String(v));
|
|
50
|
+
}
|
|
51
|
+
signed["wts"] = String(wts);
|
|
52
|
+
|
|
53
|
+
const query = Object.keys(signed)
|
|
54
|
+
.sort()
|
|
55
|
+
.map((k) => `${k}=${signed[k]}`)
|
|
56
|
+
.join("&");
|
|
57
|
+
|
|
58
|
+
const wRid = createHash("md5").update(query + mixinKey).digest("hex");
|
|
59
|
+
return { ...signed, w_rid: wRid };
|
|
60
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../core/types";
|
|
2
|
+
import type { InfoDict, Format, Thumbnail } from "../core/types";
|
|
3
|
+
|
|
4
|
+
interface CoubMediaSource {
|
|
5
|
+
url?: string;
|
|
6
|
+
size?: number;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
interface CoubFileVersions {
|
|
10
|
+
html5?: {
|
|
11
|
+
video?: Record<string, CoubMediaSource>;
|
|
12
|
+
audio?: Record<string, CoubMediaSource>;
|
|
13
|
+
};
|
|
14
|
+
mobile?: {
|
|
15
|
+
video?: string[];
|
|
16
|
+
audio?: string;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface CoubData {
|
|
21
|
+
id?: number;
|
|
22
|
+
title?: string;
|
|
23
|
+
description?: string;
|
|
24
|
+
views_count?: number;
|
|
25
|
+
likes_count?: number;
|
|
26
|
+
created_at?: string;
|
|
27
|
+
duration?: number;
|
|
28
|
+
file_versions?: CoubFileVersions;
|
|
29
|
+
image_versions?: {
|
|
30
|
+
template?: string;
|
|
31
|
+
versions?: string[];
|
|
32
|
+
};
|
|
33
|
+
channel?: {
|
|
34
|
+
permalink?: string;
|
|
35
|
+
title?: string;
|
|
36
|
+
};
|
|
37
|
+
tags?: Array<{ title?: string }>;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class CoubExtractor extends BaseExtractor {
|
|
41
|
+
readonly _VALID_URL = /https?:\/\/(?:www\.)?coub\.com\/(?:view|embed)\/([a-zA-Z0-9_-]+)/;
|
|
42
|
+
readonly _NAME = "coub";
|
|
43
|
+
|
|
44
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
45
|
+
const match = url.match(this._VALID_URL);
|
|
46
|
+
if (!match) throw new ExtractorError(`Invalid Coub URL: ${url}`);
|
|
47
|
+
const coubId = match[1];
|
|
48
|
+
|
|
49
|
+
const apiUrl = `https://coub.com/api/v2/coubs/${coubId}`;
|
|
50
|
+
const response = await fetch(apiUrl, {
|
|
51
|
+
headers: { "User-Agent": "Mozilla/5.0" },
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
if (!response.ok) {
|
|
55
|
+
throw new ExtractorError(`Coub API error: ${response.status}`);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const data = (await response.json()) as CoubData;
|
|
59
|
+
|
|
60
|
+
const formats: Format[] = [];
|
|
61
|
+
const html5 = data.file_versions?.html5;
|
|
62
|
+
|
|
63
|
+
const qualityOrder: Record<string, number> = {
|
|
64
|
+
higher: 4,
|
|
65
|
+
high: 3,
|
|
66
|
+
med: 2,
|
|
67
|
+
low: 1,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const heightMap: Record<string, number> = {
|
|
71
|
+
higher: 1080,
|
|
72
|
+
high: 720,
|
|
73
|
+
med: 360,
|
|
74
|
+
low: 240,
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
if (html5?.video) {
|
|
78
|
+
for (const [quality, source] of Object.entries(html5.video)) {
|
|
79
|
+
if (source.url) {
|
|
80
|
+
formats.push({
|
|
81
|
+
format_id: `video-${quality}`,
|
|
82
|
+
url: source.url,
|
|
83
|
+
ext: "mp4",
|
|
84
|
+
vcodec: "h264",
|
|
85
|
+
acodec: "none",
|
|
86
|
+
height: heightMap[quality],
|
|
87
|
+
filesize: source.size,
|
|
88
|
+
quality: qualityOrder[quality] ?? 0,
|
|
89
|
+
format_note: `video-only (${quality}); audio is separate and requires merge`,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (html5?.audio) {
|
|
96
|
+
for (const [quality, source] of Object.entries(html5.audio)) {
|
|
97
|
+
if (source.url) {
|
|
98
|
+
formats.push({
|
|
99
|
+
format_id: `audio-${quality}`,
|
|
100
|
+
url: source.url,
|
|
101
|
+
ext: "mp4",
|
|
102
|
+
vcodec: "none",
|
|
103
|
+
acodec: "aac",
|
|
104
|
+
filesize: source.size,
|
|
105
|
+
quality: qualityOrder[quality] ?? 0,
|
|
106
|
+
format_note: `audio-only (${quality})`,
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const thumbnails: Thumbnail[] = [];
|
|
113
|
+
const imgTemplate = data.image_versions?.template;
|
|
114
|
+
if (imgTemplate) {
|
|
115
|
+
for (const version of data.image_versions?.versions ?? []) {
|
|
116
|
+
thumbnails.push({ url: imgTemplate.replace("%{version}", version) });
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
id: String(data.id ?? coubId),
|
|
122
|
+
title: data.title ?? coubId,
|
|
123
|
+
description: data.description,
|
|
124
|
+
duration: data.duration,
|
|
125
|
+
view_count: data.views_count,
|
|
126
|
+
like_count: data.likes_count,
|
|
127
|
+
uploader: data.channel?.title,
|
|
128
|
+
uploader_id: data.channel?.permalink,
|
|
129
|
+
upload_date: data.created_at?.slice(0, 10).replace(/-/g, ""),
|
|
130
|
+
thumbnails,
|
|
131
|
+
formats,
|
|
132
|
+
tags: data.tags?.map((t) => t.title ?? "").filter(Boolean),
|
|
133
|
+
webpage_url: url,
|
|
134
|
+
extractor: this._NAME,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../core/types";
|
|
2
|
+
import type { InfoDict, Format, Thumbnail } from "../core/types";
|
|
3
|
+
|
|
4
|
+
interface DailymotionQuality {
|
|
5
|
+
type: string;
|
|
6
|
+
url: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
interface DailymotionMetadata {
|
|
10
|
+
id: string;
|
|
11
|
+
title: string;
|
|
12
|
+
description?: string;
|
|
13
|
+
duration?: number;
|
|
14
|
+
owner?: { screenname?: string; id?: string };
|
|
15
|
+
created_time?: number;
|
|
16
|
+
views_total?: number;
|
|
17
|
+
likes_total?: number;
|
|
18
|
+
thumbnail_url?: string;
|
|
19
|
+
qualities?: Record<string, DailymotionQuality[]>;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export class DailymotionExtractor extends BaseExtractor {
|
|
23
|
+
readonly _VALID_URL = /https?:\/\/(?:www\.)?dailymotion\.com\/video\/([a-zA-Z0-9]+)/;
|
|
24
|
+
readonly _NAME = "dailymotion";
|
|
25
|
+
|
|
26
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
27
|
+
const match = url.match(this._VALID_URL);
|
|
28
|
+
if (!match) throw new ExtractorError(`Invalid Dailymotion URL: ${url}`);
|
|
29
|
+
const videoId = match[1];
|
|
30
|
+
|
|
31
|
+
const apiUrl = `https://www.dailymotion.com/player/metadata/video/${videoId}?app=com.dailymotion.neon`;
|
|
32
|
+
const response = await fetch(apiUrl, {
|
|
33
|
+
headers: { "User-Agent": "Mozilla/5.0" },
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
if (!response.ok) {
|
|
37
|
+
throw new ExtractorError(`Dailymotion API error: ${response.status}`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const data = (await response.json()) as DailymotionMetadata;
|
|
41
|
+
|
|
42
|
+
const formats: Format[] = [];
|
|
43
|
+
const qualities = data.qualities ?? {};
|
|
44
|
+
|
|
45
|
+
const qualityMap: Record<string, number> = {
|
|
46
|
+
"2160": 2160,
|
|
47
|
+
"1440": 1440,
|
|
48
|
+
"1080": 1080,
|
|
49
|
+
"720": 720,
|
|
50
|
+
"480": 480,
|
|
51
|
+
"380": 380,
|
|
52
|
+
"240": 240,
|
|
53
|
+
"144": 144,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
for (const [qualityKey, streams] of Object.entries(qualities)) {
|
|
57
|
+
for (const stream of streams) {
|
|
58
|
+
if (stream.type === "video/mp4" && stream.url) {
|
|
59
|
+
const height = qualityMap[qualityKey];
|
|
60
|
+
formats.push({
|
|
61
|
+
format_id: `mp4-${qualityKey}`,
|
|
62
|
+
url: stream.url,
|
|
63
|
+
ext: "mp4",
|
|
64
|
+
height,
|
|
65
|
+
resolution: height ? `${height}p` : qualityKey,
|
|
66
|
+
quality: height ?? 0,
|
|
67
|
+
});
|
|
68
|
+
} else if ((stream.type === "application/x-mpegURL" || stream.type === "application/vnd.apple.mpegurl") && stream.url) {
|
|
69
|
+
formats.push({
|
|
70
|
+
format_id: `hls-${qualityKey}`,
|
|
71
|
+
url: stream.url,
|
|
72
|
+
ext: "mp4",
|
|
73
|
+
protocol: "m3u8",
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const thumbnails: Thumbnail[] = data.thumbnail_url
|
|
80
|
+
? [{ url: data.thumbnail_url }]
|
|
81
|
+
: [];
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
id: videoId,
|
|
85
|
+
title: data.title,
|
|
86
|
+
description: data.description,
|
|
87
|
+
duration: data.duration,
|
|
88
|
+
uploader: data.owner?.screenname,
|
|
89
|
+
uploader_id: data.owner?.id,
|
|
90
|
+
timestamp: data.created_time,
|
|
91
|
+
view_count: data.views_total,
|
|
92
|
+
like_count: data.likes_total,
|
|
93
|
+
thumbnails,
|
|
94
|
+
formats,
|
|
95
|
+
webpage_url: url,
|
|
96
|
+
extractor: this._NAME,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../core/types";
|
|
2
|
+
import type { InfoDict } from "../core/types";
|
|
3
|
+
|
|
4
|
+
export class DropboxExtractor extends BaseExtractor {
|
|
5
|
+
readonly _VALID_URL = /https?:\/\/(?:www\.)?dropbox\.com\/(?:s|sh|scl\/fo)\/[^?#]+/;
|
|
6
|
+
readonly _NAME = "dropbox";
|
|
7
|
+
|
|
8
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
9
|
+
const urlObj = new URL(url);
|
|
10
|
+
|
|
11
|
+
urlObj.searchParams.set("dl", "1");
|
|
12
|
+
urlObj.searchParams.delete("rlkey");
|
|
13
|
+
|
|
14
|
+
const directUrl = urlObj.toString();
|
|
15
|
+
|
|
16
|
+
const headResponse = await fetch(directUrl, {
|
|
17
|
+
method: "HEAD",
|
|
18
|
+
redirect: "follow",
|
|
19
|
+
headers: { "User-Agent": "Mozilla/5.0" },
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
if (!headResponse.ok && headResponse.status !== 302 && headResponse.status !== 301) {
|
|
23
|
+
throw new ExtractorError(`Dropbox: could not access file (status ${headResponse.status})`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const finalUrl = headResponse.url || directUrl;
|
|
27
|
+
const pathname = urlObj.pathname;
|
|
28
|
+
const filename = pathname.split("/").pop() ?? "file";
|
|
29
|
+
const ext = filename.includes(".") ? filename.split(".").pop()!.toLowerCase() : "mp4";
|
|
30
|
+
const title = filename.replace(/\.[^.]+$/, "").replace(/[_-]/g, " ");
|
|
31
|
+
|
|
32
|
+
const contentLength = headResponse.headers.get("content-length");
|
|
33
|
+
const filesize = contentLength ? parseInt(contentLength) : undefined;
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
id: urlObj.searchParams.get("id") ?? pathname.split("/").slice(-2, -1)[0] ?? "dropbox",
|
|
37
|
+
title,
|
|
38
|
+
url: finalUrl,
|
|
39
|
+
ext,
|
|
40
|
+
formats: [
|
|
41
|
+
{
|
|
42
|
+
format_id: "direct",
|
|
43
|
+
url: finalUrl,
|
|
44
|
+
ext,
|
|
45
|
+
filesize,
|
|
46
|
+
},
|
|
47
|
+
],
|
|
48
|
+
webpage_url: url,
|
|
49
|
+
extractor: this._NAME,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { BaseExtractor } from "../core/types";
|
|
2
|
+
import type { InfoDict, Format } from "../core/types";
|
|
3
|
+
|
|
4
|
+
const MEDIA_EXTENSIONS = /\.(mp4|webm|mkv|flv|avi|mov|wmv|mp3|aac|flac|opus|ogg|wav|m4a)(\?|$)/i;
|
|
5
|
+
const MANIFEST_EXTENSIONS = /\.(m3u8|mpd)(\?|$)/i;
|
|
6
|
+
|
|
7
|
+
export class GenericExtractor extends BaseExtractor {
|
|
8
|
+
readonly _VALID_URL = /^https?:\/\/.+/;
|
|
9
|
+
readonly _NAME = "generic";
|
|
10
|
+
|
|
11
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
12
|
+
if (MEDIA_EXTENSIONS.test(url)) {
|
|
13
|
+
return this.extractDirectMedia(url);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (MANIFEST_EXTENSIONS.test(url)) {
|
|
17
|
+
return this.extractManifest(url);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return this.extractFromPage(url);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
private extractDirectMedia(url: string): InfoDict {
|
|
24
|
+
const urlObj = new URL(url);
|
|
25
|
+
const filename = urlObj.pathname.split("/").pop() ?? "media";
|
|
26
|
+
const ext = filename.split(".").pop() ?? "mp4";
|
|
27
|
+
const title = filename.replace(/\.[^.]+$/, "").replace(/[_-]/g, " ");
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
id: this.generateId(url),
|
|
31
|
+
title,
|
|
32
|
+
url,
|
|
33
|
+
ext,
|
|
34
|
+
formats: [
|
|
35
|
+
{
|
|
36
|
+
format_id: "direct",
|
|
37
|
+
url,
|
|
38
|
+
ext,
|
|
39
|
+
},
|
|
40
|
+
],
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
private extractManifest(url: string): InfoDict {
|
|
45
|
+
const isHLS = /\.m3u8/i.test(url);
|
|
46
|
+
const ext = isHLS ? "mp4" : "webm";
|
|
47
|
+
const protocol = isHLS ? "m3u8" : "dash";
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
id: this.generateId(url),
|
|
51
|
+
title: "Media",
|
|
52
|
+
url,
|
|
53
|
+
ext,
|
|
54
|
+
formats: [
|
|
55
|
+
{
|
|
56
|
+
format_id: protocol,
|
|
57
|
+
url,
|
|
58
|
+
ext,
|
|
59
|
+
protocol,
|
|
60
|
+
},
|
|
61
|
+
],
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
private async extractFromPage(url: string): Promise<InfoDict> {
|
|
66
|
+
const response = await fetch(url);
|
|
67
|
+
const html = await response.text();
|
|
68
|
+
|
|
69
|
+
const formats: Format[] = [];
|
|
70
|
+
|
|
71
|
+
const mediaMatches = html.matchAll(
|
|
72
|
+
/(?:src|href)=["']([^"']*?\.(mp4|webm|m3u8|mpd)(?:\?[^"']*?)?)["']/gi,
|
|
73
|
+
);
|
|
74
|
+
for (const match of mediaMatches) {
|
|
75
|
+
const mediaUrl = new URL(match[1], url).href;
|
|
76
|
+
const ext = match[2].toLowerCase();
|
|
77
|
+
formats.push({
|
|
78
|
+
format_id: `generic-${formats.length}`,
|
|
79
|
+
url: mediaUrl,
|
|
80
|
+
ext: ext === "m3u8" ? "mp4" : ext === "mpd" ? "webm" : ext,
|
|
81
|
+
protocol: ext === "m3u8" ? "m3u8" : ext === "mpd" ? "dash" : undefined,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const ogVideo = html.match(
|
|
86
|
+
/<meta[^>]+property=["']og:video(?::url)?["'][^>]+content=["']([^"']+)["']/i,
|
|
87
|
+
);
|
|
88
|
+
if (ogVideo) {
|
|
89
|
+
const videoUrl = new URL(ogVideo[1], url).href;
|
|
90
|
+
formats.push({
|
|
91
|
+
format_id: "og-video",
|
|
92
|
+
url: videoUrl,
|
|
93
|
+
ext: "mp4",
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
98
|
+
const ogTitle = html.match(
|
|
99
|
+
/<meta[^>]+property=["']og:title["'][^>]+content=["']([^"']+)["']/i,
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
id: this.generateId(url),
|
|
104
|
+
title: ogTitle?.[1] ?? titleMatch?.[1] ?? "Unknown",
|
|
105
|
+
webpage_url: url,
|
|
106
|
+
formats,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
private generateId(url: string): string {
|
|
111
|
+
let hash = 0;
|
|
112
|
+
for (let i = 0; i < url.length; i++) {
|
|
113
|
+
const char = url.charCodeAt(i);
|
|
114
|
+
hash = ((hash << 5) - hash + char) | 0;
|
|
115
|
+
}
|
|
116
|
+
return Math.abs(hash).toString(36);
|
|
117
|
+
}
|
|
118
|
+
}
|