getraw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +4 -0
- package/CLAUDE.md +57 -0
- package/README.md +166 -0
- package/RESEARCH.md +109 -0
- package/STATUS.md +23 -0
- package/bun.lock +50 -0
- package/bunfig.toml +3 -0
- package/docs/plugin-guide.md +166 -0
- package/docs/supported-sites.md +41 -0
- package/package.json +30 -0
- package/src/cli/index.ts +52 -0
- package/src/cli/options.ts +97 -0
- package/src/core/format-sorter.ts +208 -0
- package/src/core/logger.ts +101 -0
- package/src/core/orchestrator.ts +140 -0
- package/src/core/output-template.ts +58 -0
- package/src/core/types.ts +237 -0
- package/src/downloaders/base.ts +25 -0
- package/src/downloaders/dash.ts +287 -0
- package/src/downloaders/fragment.ts +226 -0
- package/src/downloaders/hls.ts +170 -0
- package/src/downloaders/http.ts +260 -0
- package/src/extractors/archive-org.ts +126 -0
- package/src/extractors/bandcamp.ts +130 -0
- package/src/extractors/base.ts +29 -0
- package/src/extractors/bilibili/bangumi.ts +205 -0
- package/src/extractors/bilibili/index.ts +233 -0
- package/src/extractors/bilibili/wbi.ts +60 -0
- package/src/extractors/coub.ts +137 -0
- package/src/extractors/dailymotion.ts +99 -0
- package/src/extractors/dropbox.ts +52 -0
- package/src/extractors/generic.ts +118 -0
- package/src/extractors/google-drive.ts +106 -0
- package/src/extractors/imgur.ts +156 -0
- package/src/extractors/instagram/index.ts +263 -0
- package/src/extractors/instagram/reels.ts +166 -0
- package/src/extractors/kick/clips.ts +91 -0
- package/src/extractors/kick/index.ts +118 -0
- package/src/extractors/kick/live.ts +89 -0
- package/src/extractors/niconico/index.ts +209 -0
- package/src/extractors/odysee.ts +126 -0
- package/src/extractors/peertube.ts +143 -0
- package/src/extractors/reddit/gallery.ts +124 -0
- package/src/extractors/reddit/index.ts +203 -0
- package/src/extractors/rumble.ts +127 -0
- package/src/extractors/soundcloud/index.ts +161 -0
- package/src/extractors/soundcloud/playlist.ts +129 -0
- package/src/extractors/spotify.ts +97 -0
- package/src/extractors/streamable.ts +121 -0
- package/src/extractors/ted.ts +151 -0
- package/src/extractors/tiktok/index.ts +207 -0
- package/src/extractors/tiktok/user.ts +176 -0
- package/src/extractors/twitch/clips.ts +125 -0
- package/src/extractors/twitch/index.ts +136 -0
- package/src/extractors/twitch/live.ts +132 -0
- package/src/extractors/twitter/index.ts +140 -0
- package/src/extractors/twitter/spaces.ts +200 -0
- package/src/extractors/vimeo/index.ts +187 -0
- package/src/extractors/youtube/captions.ts +111 -0
- package/src/extractors/youtube/index.ts +252 -0
- package/src/extractors/youtube/innertube.ts +364 -0
- package/src/extractors/youtube/nsig.ts +105 -0
- package/src/extractors/youtube/playlist.ts +227 -0
- package/src/extractors/youtube/signature.ts +163 -0
- package/src/networking/client.ts +311 -0
- package/src/networking/cookies.ts +138 -0
- package/src/networking/proxy.ts +132 -0
- package/src/networking/tls.ts +67 -0
- package/src/networking/user-agents.ts +88 -0
- package/src/postprocessors/base.ts +44 -0
- package/src/postprocessors/extract-audio.ts +98 -0
- package/src/postprocessors/ffmpeg.ts +146 -0
- package/src/postprocessors/merge.ts +102 -0
- package/src/postprocessors/metadata.ts +73 -0
- package/src/postprocessors/sponsorblock.ts +162 -0
- package/src/postprocessors/subtitles.ts +285 -0
- package/src/postprocessors/thumbnails.ts +194 -0
- package/src/utils/sanitize.ts +36 -0
- package/src/utils/traverse.ts +68 -0
- package/tests/core/format-sorter.test.ts +96 -0
- package/tests/core/output-template.test.ts +56 -0
- package/tests/core/types.test.ts +79 -0
- package/tests/unit/downloaders/dash.test.ts +57 -0
- package/tests/unit/downloaders/hls.test.ts +120 -0
- package/tests/unit/downloaders/http.test.ts +114 -0
- package/tests/unit/extractors/bilibili.test.ts +83 -0
- package/tests/unit/extractors/instagram.test.ts +273 -0
- package/tests/unit/extractors/kick.test.ts +85 -0
- package/tests/unit/extractors/misc.test.ts +942 -0
- package/tests/unit/extractors/niconico.test.ts +61 -0
- package/tests/unit/extractors/reddit.test.ts +222 -0
- package/tests/unit/extractors/soundcloud.test.ts +299 -0
- package/tests/unit/extractors/tiktok.test.ts +260 -0
- package/tests/unit/extractors/twitch.test.ts +250 -0
- package/tests/unit/extractors/twitter.test.ts +181 -0
- package/tests/unit/extractors/vimeo.test.ts +253 -0
- package/tests/unit/extractors/youtube.test.ts +259 -0
- package/tests/unit/networking/client.test.ts +272 -0
- package/tests/unit/networking/cookies.test.ts +256 -0
- package/tests/unit/networking/proxy.test.ts +137 -0
- package/tests/unit/postprocessors/extract-audio.test.ts +63 -0
- package/tests/unit/postprocessors/merge.test.ts +61 -0
- package/tests/unit/postprocessors/subtitles.test.ts +89 -0
- package/tools/dashboard.ts +112 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../core/types";
|
|
2
|
+
import type { InfoDict, Format, Thumbnail, Subtitle } from "../core/types";
|
|
3
|
+
|
|
4
|
+
interface PeerTubeFile {
|
|
5
|
+
fileUrl?: string;
|
|
6
|
+
fileDownloadUrl?: string;
|
|
7
|
+
torrentUrl?: string;
|
|
8
|
+
resolution?: { id?: number; label?: string };
|
|
9
|
+
size?: number;
|
|
10
|
+
fps?: number;
|
|
11
|
+
width?: number;
|
|
12
|
+
height?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
interface PeerTubeStreamingPlaylist {
|
|
16
|
+
playlistUrl?: string;
|
|
17
|
+
type?: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface PeerTubeVideo {
|
|
21
|
+
uuid?: string;
|
|
22
|
+
name?: string;
|
|
23
|
+
description?: string;
|
|
24
|
+
duration?: number;
|
|
25
|
+
views?: number;
|
|
26
|
+
likes?: number;
|
|
27
|
+
dislikes?: number;
|
|
28
|
+
publishedAt?: string;
|
|
29
|
+
thumbnailUrl?: string;
|
|
30
|
+
previewUrl?: string;
|
|
31
|
+
isLive?: boolean;
|
|
32
|
+
account?: { displayName?: string; name?: string; url?: string };
|
|
33
|
+
channel?: { displayName?: string; name?: string; url?: string };
|
|
34
|
+
files?: PeerTubeFile[];
|
|
35
|
+
streamingPlaylists?: PeerTubeStreamingPlaylist[];
|
|
36
|
+
captions?: Array<{ language?: { id?: string; label?: string }; captionPath?: string; fileUrl?: string }>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const PEERTUBE_INSTANCE_REGEX = /https?:\/\/([^/]+)\/(?:videos\/watch|w)\/([a-zA-Z0-9-]+)/;
|
|
40
|
+
const PEERTUBE_EMBED_REGEX = /https?:\/\/([^/]+)\/videos\/embed\/([a-zA-Z0-9-]+)/;
|
|
41
|
+
|
|
42
|
+
export class PeerTubeExtractor extends BaseExtractor {
|
|
43
|
+
readonly _VALID_URL = /https?:\/\/[^/]+\/(?:videos\/(?:watch|embed)|w)\/[a-zA-Z0-9-]+/;
|
|
44
|
+
readonly _NAME = "peertube";
|
|
45
|
+
|
|
46
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
47
|
+
const match = url.match(PEERTUBE_INSTANCE_REGEX) ?? url.match(PEERTUBE_EMBED_REGEX);
|
|
48
|
+
if (!match) throw new ExtractorError(`Invalid PeerTube URL: ${url}`);
|
|
49
|
+
|
|
50
|
+
const instance = match[1];
|
|
51
|
+
const videoId = match[2];
|
|
52
|
+
const apiUrl = `https://${instance}/api/v1/videos/${videoId}`;
|
|
53
|
+
|
|
54
|
+
const response = await fetch(apiUrl, {
|
|
55
|
+
headers: { "User-Agent": "Mozilla/5.0" },
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
if (!response.ok) {
|
|
59
|
+
throw new ExtractorError(`PeerTube API error: ${response.status} for ${apiUrl}`);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const data = (await response.json()) as PeerTubeVideo;
|
|
63
|
+
|
|
64
|
+
const formats: Format[] = [];
|
|
65
|
+
|
|
66
|
+
for (const file of data.files ?? []) {
|
|
67
|
+
const fileUrl = file.fileUrl ?? file.fileDownloadUrl;
|
|
68
|
+
if (!fileUrl) continue;
|
|
69
|
+
|
|
70
|
+
const height = file.resolution?.id;
|
|
71
|
+
formats.push({
|
|
72
|
+
format_id: `mp4-${file.resolution?.label ?? height ?? "unknown"}`,
|
|
73
|
+
url: fileUrl,
|
|
74
|
+
ext: "mp4",
|
|
75
|
+
height,
|
|
76
|
+
fps: file.fps,
|
|
77
|
+
filesize: file.size,
|
|
78
|
+
resolution: file.resolution?.label,
|
|
79
|
+
quality: height ?? 0,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for (const playlist of data.streamingPlaylists ?? []) {
|
|
84
|
+
if (playlist.playlistUrl) {
|
|
85
|
+
formats.push({
|
|
86
|
+
format_id: "hls",
|
|
87
|
+
url: playlist.playlistUrl,
|
|
88
|
+
ext: "mp4",
|
|
89
|
+
protocol: "m3u8",
|
|
90
|
+
quality: -1,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const thumbnails: Thumbnail[] = [];
|
|
96
|
+
if (data.thumbnailUrl) {
|
|
97
|
+
thumbnails.push({
|
|
98
|
+
url: data.thumbnailUrl.startsWith("http")
|
|
99
|
+
? data.thumbnailUrl
|
|
100
|
+
: `https://${instance}${data.thumbnailUrl}`,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
if (data.previewUrl) {
|
|
104
|
+
thumbnails.push({
|
|
105
|
+
url: data.previewUrl.startsWith("http")
|
|
106
|
+
? data.previewUrl
|
|
107
|
+
: `https://${instance}${data.previewUrl}`,
|
|
108
|
+
preference: 1,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const subtitles: Record<string, Subtitle[]> = {};
|
|
113
|
+
for (const caption of data.captions ?? []) {
|
|
114
|
+
const lang = caption.language?.id ?? "und";
|
|
115
|
+
const captionUrl = caption.fileUrl ?? (caption.captionPath
|
|
116
|
+
? `https://${instance}${caption.captionPath}`
|
|
117
|
+
: undefined);
|
|
118
|
+
if (captionUrl) {
|
|
119
|
+
subtitles[lang] = [{ url: captionUrl, ext: "vtt", name: caption.language?.label }];
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
id: data.uuid ?? videoId,
|
|
125
|
+
title: data.name ?? videoId,
|
|
126
|
+
description: data.description,
|
|
127
|
+
duration: data.duration,
|
|
128
|
+
view_count: data.views,
|
|
129
|
+
like_count: data.likes,
|
|
130
|
+
uploader: data.account?.displayName ?? data.account?.name,
|
|
131
|
+
uploader_url: data.account?.url,
|
|
132
|
+
channel: data.channel?.displayName ?? data.channel?.name,
|
|
133
|
+
channel_url: data.channel?.url,
|
|
134
|
+
upload_date: data.publishedAt?.slice(0, 10).replace(/-/g, ""),
|
|
135
|
+
thumbnails,
|
|
136
|
+
formats,
|
|
137
|
+
subtitles,
|
|
138
|
+
live_status: data.isLive ? "is_live" : "not_live",
|
|
139
|
+
webpage_url: url,
|
|
140
|
+
extractor: this._NAME,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../../core/types";
|
|
2
|
+
import type { InfoDict, Format } from "../../core/types";
|
|
3
|
+
|
|
4
|
+
interface GalleryItem {
|
|
5
|
+
media_id: string;
|
|
6
|
+
id: number;
|
|
7
|
+
caption?: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface GalleryMedia {
|
|
11
|
+
[mediaId: string]: {
|
|
12
|
+
e: string;
|
|
13
|
+
m?: string;
|
|
14
|
+
p?: Array<{ u: string; x: number; y: number }>;
|
|
15
|
+
s?: { u?: string; mp4?: string; gif?: string; x?: number; y?: number };
|
|
16
|
+
id: string;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface RedditGalleryPost {
|
|
21
|
+
id: string;
|
|
22
|
+
title: string;
|
|
23
|
+
author: string;
|
|
24
|
+
url: string;
|
|
25
|
+
score?: number;
|
|
26
|
+
created_utc?: number;
|
|
27
|
+
gallery_data?: { items?: GalleryItem[] };
|
|
28
|
+
media_metadata?: GalleryMedia;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface RedditApiResponse {
|
|
32
|
+
data: {
|
|
33
|
+
children: Array<{ data: RedditGalleryPost }>;
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export class RedditGalleryExtractor extends BaseExtractor {
|
|
38
|
+
readonly _VALID_URL =
|
|
39
|
+
/^https?:\/\/(?:www\.|old\.)?reddit\.com\/(?:r\/[^/]+\/comments\/[^/]+|gallery\/[^/]+)/;
|
|
40
|
+
readonly _NAME = "reddit:gallery";
|
|
41
|
+
|
|
42
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
43
|
+
const normalized = url.replace(/(?:www\.|old\.)?reddit\.com/, "www.reddit.com");
|
|
44
|
+
const jsonUrl = normalized.replace(/\?.*$/, "").replace(/\/$/, "") + ".json";
|
|
45
|
+
|
|
46
|
+
const response = await fetch(jsonUrl, {
|
|
47
|
+
headers: { "User-Agent": "dlpx/1.0" },
|
|
48
|
+
});
|
|
49
|
+
if (!response.ok) {
|
|
50
|
+
throw new ExtractorError(`Reddit API returned ${response.status}`);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const data = (await response.json()) as RedditApiResponse[];
|
|
54
|
+
const post = data?.[0]?.data?.children?.[0]?.data;
|
|
55
|
+
if (!post) {
|
|
56
|
+
throw new ExtractorError("Could not parse Reddit gallery response");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (!post.gallery_data || !post.media_metadata) {
|
|
60
|
+
throw new ExtractorError("No gallery data found in this post");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const items = post.gallery_data.items ?? [];
|
|
64
|
+
const metadata = post.media_metadata;
|
|
65
|
+
|
|
66
|
+
const entries: InfoDict[] = items.map((item, idx) => {
|
|
67
|
+
const media = metadata[item.media_id];
|
|
68
|
+
const formats: Format[] = [];
|
|
69
|
+
|
|
70
|
+
if (media) {
|
|
71
|
+
if (media.e === "AnimatedImage" && media.s?.mp4) {
|
|
72
|
+
formats.push({
|
|
73
|
+
format_id: "mp4",
|
|
74
|
+
url: media.s.mp4.replace(/&/g, "&"),
|
|
75
|
+
ext: "mp4",
|
|
76
|
+
width: media.s.x,
|
|
77
|
+
height: media.s.y,
|
|
78
|
+
});
|
|
79
|
+
} else if (media.e === "Image" && media.s?.u) {
|
|
80
|
+
formats.push({
|
|
81
|
+
format_id: "image",
|
|
82
|
+
url: media.s.u.replace(/&/g, "&"),
|
|
83
|
+
ext: media.m?.split("/")?.[1] ?? "jpg",
|
|
84
|
+
width: media.s.x,
|
|
85
|
+
height: media.s.y,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (media.p && media.p.length > 0) {
|
|
90
|
+
for (const preview of media.p) {
|
|
91
|
+
formats.push({
|
|
92
|
+
format_id: `preview-${preview.x}x${preview.y}`,
|
|
93
|
+
url: preview.u.replace(/&/g, "&"),
|
|
94
|
+
ext: "jpg",
|
|
95
|
+
width: preview.x,
|
|
96
|
+
height: preview.y,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
id: item.media_id,
|
|
104
|
+
title: item.caption ?? `${post.title} [${idx + 1}/${items.length}]`,
|
|
105
|
+
webpage_url: url,
|
|
106
|
+
uploader: post.author,
|
|
107
|
+
playlist_index: idx + 1,
|
|
108
|
+
formats,
|
|
109
|
+
};
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
id: post.id,
|
|
114
|
+
title: post.title,
|
|
115
|
+
webpage_url: url,
|
|
116
|
+
uploader: post.author,
|
|
117
|
+
timestamp: post.created_utc,
|
|
118
|
+
like_count: post.score,
|
|
119
|
+
_type: "playlist",
|
|
120
|
+
entries,
|
|
121
|
+
playlist_count: entries.length,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../../core/types";
|
|
2
|
+
import type { InfoDict, Format, Thumbnail } from "../../core/types";
|
|
3
|
+
|
|
4
|
+
interface RedditVideoData {
|
|
5
|
+
dash_url?: string;
|
|
6
|
+
fallback_url?: string;
|
|
7
|
+
width?: number;
|
|
8
|
+
height?: number;
|
|
9
|
+
duration?: number;
|
|
10
|
+
is_gif?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface RedditPostData {
|
|
14
|
+
id: string;
|
|
15
|
+
title: string;
|
|
16
|
+
author: string;
|
|
17
|
+
url: string;
|
|
18
|
+
thumbnail?: string;
|
|
19
|
+
subreddit?: string;
|
|
20
|
+
score?: number;
|
|
21
|
+
created_utc?: number;
|
|
22
|
+
is_video?: boolean;
|
|
23
|
+
secure_media?: {
|
|
24
|
+
reddit_video?: RedditVideoData;
|
|
25
|
+
};
|
|
26
|
+
media?: {
|
|
27
|
+
reddit_video?: RedditVideoData;
|
|
28
|
+
};
|
|
29
|
+
preview?: {
|
|
30
|
+
images?: Array<{
|
|
31
|
+
source?: { url: string; width?: number; height?: number };
|
|
32
|
+
}>;
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface RedditApiResponse {
|
|
37
|
+
data: {
|
|
38
|
+
children: Array<{
|
|
39
|
+
data: RedditPostData;
|
|
40
|
+
}>;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function normalizeRedditUrl(url: string): string {
|
|
45
|
+
const urlObj = new URL(url);
|
|
46
|
+
if (urlObj.hostname === "v.redd.it") {
|
|
47
|
+
return url;
|
|
48
|
+
}
|
|
49
|
+
urlObj.hostname = "www.reddit.com";
|
|
50
|
+
return urlObj.href;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async function fetchAudioUrlFromDash(dashUrl: string): Promise<string | null> {
|
|
54
|
+
try {
|
|
55
|
+
const response = await fetch(dashUrl);
|
|
56
|
+
if (!response.ok) return null;
|
|
57
|
+
const text = await response.text();
|
|
58
|
+
const audioMatch = text.match(/<AdaptationSet[^>]*mimeType="audio[^"]*"[\s\S]*?<BaseURL>([^<]+)<\/BaseURL>/i);
|
|
59
|
+
if (audioMatch) {
|
|
60
|
+
const base = new URL(dashUrl);
|
|
61
|
+
return new URL(audioMatch[1], base.origin + base.pathname.replace(/[^/]+$/, "")).href;
|
|
62
|
+
}
|
|
63
|
+
const audioInitMatch = text.match(/initialization="([^"]*audio[^"]*)"/i);
|
|
64
|
+
if (audioInitMatch) {
|
|
65
|
+
const base = new URL(dashUrl);
|
|
66
|
+
return new URL(audioInitMatch[1], base.origin + base.pathname.replace(/[^/]+$/, "")).href;
|
|
67
|
+
}
|
|
68
|
+
} catch {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export class RedditExtractor extends BaseExtractor {
|
|
75
|
+
readonly _VALID_URL =
|
|
76
|
+
/^https?:\/\/(?:www\.|old\.)?reddit\.com\/r\/[^/]+\/comments\/[^/]+|^https?:\/\/v\.redd\.it\/[^/]+/;
|
|
77
|
+
readonly _NAME = "reddit";
|
|
78
|
+
|
|
79
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
80
|
+
if (/^https?:\/\/v\.redd\.it\//.test(url)) {
|
|
81
|
+
return this._extractVReddIt(url);
|
|
82
|
+
}
|
|
83
|
+
return this._extractRedditPost(url);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
private async _extractVReddIt(url: string): Promise<InfoDict> {
|
|
87
|
+
const jsonUrl = url.endsWith("/") ? url + ".json" : url + "/.json";
|
|
88
|
+
const response = await fetch(jsonUrl, {
|
|
89
|
+
headers: { "User-Agent": "dlpx/1.0" },
|
|
90
|
+
});
|
|
91
|
+
if (!response.ok) {
|
|
92
|
+
throw new ExtractorError(`Reddit API returned ${response.status}`);
|
|
93
|
+
}
|
|
94
|
+
const data = (await response.json()) as RedditApiResponse | RedditApiResponse[];
|
|
95
|
+
const apiData = Array.isArray(data) ? data[0] : data;
|
|
96
|
+
const post = apiData?.data?.children?.[0]?.data;
|
|
97
|
+
if (!post) {
|
|
98
|
+
throw new ExtractorError("Could not parse Reddit API response");
|
|
99
|
+
}
|
|
100
|
+
return this._buildInfoFromPost(url, post);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private async _extractRedditPost(url: string): Promise<InfoDict> {
|
|
104
|
+
const normalized = normalizeRedditUrl(url);
|
|
105
|
+
const jsonUrl = normalized.replace(/\?.*$/, "").replace(/\/$/, "") + ".json";
|
|
106
|
+
const response = await fetch(jsonUrl, {
|
|
107
|
+
headers: { "User-Agent": "dlpx/1.0" },
|
|
108
|
+
});
|
|
109
|
+
if (!response.ok) {
|
|
110
|
+
throw new ExtractorError(`Reddit API returned ${response.status}`);
|
|
111
|
+
}
|
|
112
|
+
const data = (await response.json()) as RedditApiResponse[];
|
|
113
|
+
const post = data?.[0]?.data?.children?.[0]?.data;
|
|
114
|
+
if (!post) {
|
|
115
|
+
throw new ExtractorError("Could not parse Reddit API response");
|
|
116
|
+
}
|
|
117
|
+
return this._buildInfoFromPost(url, post);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
private async _buildInfoFromPost(url: string, post: RedditPostData): Promise<InfoDict> {
|
|
121
|
+
const videoData = post.secure_media?.reddit_video ?? post.media?.reddit_video;
|
|
122
|
+
|
|
123
|
+
if (!videoData) {
|
|
124
|
+
throw new ExtractorError("No Reddit video found in this post");
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const formats: Format[] = [];
|
|
128
|
+
|
|
129
|
+
if (videoData.fallback_url) {
|
|
130
|
+
formats.push({
|
|
131
|
+
format_id: "mp4-video-only",
|
|
132
|
+
url: videoData.fallback_url,
|
|
133
|
+
ext: "mp4",
|
|
134
|
+
vcodec: "h264",
|
|
135
|
+
acodec: "none",
|
|
136
|
+
width: videoData.width,
|
|
137
|
+
height: videoData.height,
|
|
138
|
+
format_note: "video only",
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (videoData.dash_url) {
|
|
143
|
+
let audioUrl: string | null = null;
|
|
144
|
+
if (videoData.fallback_url) {
|
|
145
|
+
audioUrl = videoData.fallback_url.replace(/DASH_\d+\.mp4/, "DASH_audio.mp4");
|
|
146
|
+
}
|
|
147
|
+
if (!audioUrl) {
|
|
148
|
+
audioUrl = await fetchAudioUrlFromDash(videoData.dash_url);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
formats.push({
|
|
152
|
+
format_id: "dash",
|
|
153
|
+
url: videoData.dash_url,
|
|
154
|
+
ext: "mp4",
|
|
155
|
+
protocol: "dash",
|
|
156
|
+
width: videoData.width,
|
|
157
|
+
height: videoData.height,
|
|
158
|
+
format_note: "DASH manifest",
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
if (audioUrl && videoData.fallback_url) {
|
|
162
|
+
formats.push({
|
|
163
|
+
format_id: "mp4-with-audio",
|
|
164
|
+
url: videoData.fallback_url,
|
|
165
|
+
ext: "mp4",
|
|
166
|
+
vcodec: "h264",
|
|
167
|
+
acodec: "aac",
|
|
168
|
+
width: videoData.width,
|
|
169
|
+
height: videoData.height,
|
|
170
|
+
format_note: "video+audio (merged)",
|
|
171
|
+
http_headers: { "User-Agent": "dlpx/1.0" },
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const thumbnails: Thumbnail[] = [];
|
|
177
|
+
if (post.thumbnail && post.thumbnail !== "default" && post.thumbnail !== "self") {
|
|
178
|
+
thumbnails.push({ url: post.thumbnail });
|
|
179
|
+
}
|
|
180
|
+
const previewSource = post.preview?.images?.[0]?.source;
|
|
181
|
+
if (previewSource?.url) {
|
|
182
|
+
thumbnails.push({
|
|
183
|
+
url: previewSource.url.replace(/&/g, "&"),
|
|
184
|
+
width: previewSource.width,
|
|
185
|
+
height: previewSource.height,
|
|
186
|
+
preference: 1,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
id: post.id,
|
|
192
|
+
title: post.title,
|
|
193
|
+
webpage_url: url,
|
|
194
|
+
uploader: post.author,
|
|
195
|
+
duration: videoData.duration,
|
|
196
|
+
timestamp: post.created_utc,
|
|
197
|
+
like_count: post.score,
|
|
198
|
+
formats,
|
|
199
|
+
thumbnails,
|
|
200
|
+
extractor: this._NAME,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import { BaseExtractor, ExtractorError } from "../core/types";
|
|
2
|
+
import type { InfoDict, Format, Thumbnail } from "../core/types";
|
|
3
|
+
|
|
4
|
+
interface RumbleVideoSource {
|
|
5
|
+
url: string;
|
|
6
|
+
w?: number;
|
|
7
|
+
h?: number;
|
|
8
|
+
fps?: number;
|
|
9
|
+
bitrate?: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
interface RumbleVideoInfo {
|
|
13
|
+
title?: string;
|
|
14
|
+
description?: string;
|
|
15
|
+
author?: string;
|
|
16
|
+
pubDate?: string;
|
|
17
|
+
duration?: number;
|
|
18
|
+
mainAncestorId?: string;
|
|
19
|
+
ua?: Record<string, Record<string, RumbleVideoSource>>;
|
|
20
|
+
t?: Array<{ i?: string; u?: string }>;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export class RumbleExtractor extends BaseExtractor {
|
|
24
|
+
readonly _VALID_URL = /https?:\/\/(?:www\.)?rumble\.com\/(?:v[a-zA-Z0-9]+-[^/?]+\.html|embed\/([a-zA-Z0-9]+))/;
|
|
25
|
+
readonly _NAME = "rumble";
|
|
26
|
+
|
|
27
|
+
protected async _real_extract(url: string): Promise<InfoDict> {
|
|
28
|
+
const response = await fetch(url, {
|
|
29
|
+
headers: { "User-Agent": "Mozilla/5.0" },
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
if (!response.ok) {
|
|
33
|
+
throw new ExtractorError(`Rumble fetch error: ${response.status}`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const html = await response.text();
|
|
37
|
+
|
|
38
|
+
const embedIdMatch = html.match(/rumble\.com\/embed\/([a-zA-Z0-9]+)/);
|
|
39
|
+
let embedHtml = html;
|
|
40
|
+
|
|
41
|
+
if (embedIdMatch && !url.includes("/embed/")) {
|
|
42
|
+
const embedUrl = `https://rumble.com/embed/${embedIdMatch[1]}/`;
|
|
43
|
+
const embedResponse = await fetch(embedUrl, {
|
|
44
|
+
headers: { "User-Agent": "Mozilla/5.0" },
|
|
45
|
+
});
|
|
46
|
+
embedHtml = await embedResponse.text();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const configMatch = embedHtml.match(/RumblePlayer\.render\([^,]+,\s*(\{[\s\S]*?\})\s*\)/);
|
|
50
|
+
const jsonMatch = embedHtml.match(/"video"\s*:\s*(\{[\s\S]*?\})\s*(?:,\s*"|\})/);
|
|
51
|
+
|
|
52
|
+
let videoData: RumbleVideoInfo = {};
|
|
53
|
+
|
|
54
|
+
if (configMatch) {
|
|
55
|
+
try {
|
|
56
|
+
videoData = JSON.parse(configMatch[1]) as RumbleVideoInfo;
|
|
57
|
+
} catch {
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (!videoData.ua && jsonMatch) {
|
|
62
|
+
try {
|
|
63
|
+
videoData = JSON.parse(jsonMatch[1]) as RumbleVideoInfo;
|
|
64
|
+
} catch {
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (!videoData.ua) {
|
|
69
|
+
const scriptMatch = embedHtml.match(/<script[^>]*>\s*var\s+videoConfig\s*=\s*(\{[\s\S]*?\});\s*<\/script>/);
|
|
70
|
+
if (scriptMatch) {
|
|
71
|
+
try {
|
|
72
|
+
videoData = JSON.parse(scriptMatch[1]) as RumbleVideoInfo;
|
|
73
|
+
} catch {
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const ua = videoData.ua ?? {};
|
|
79
|
+
const formats: Format[] = [];
|
|
80
|
+
|
|
81
|
+
for (const [formatKey, sourceMap] of Object.entries(ua)) {
|
|
82
|
+
for (const [, source] of Object.entries(sourceMap)) {
|
|
83
|
+
if (source.url) {
|
|
84
|
+
const isHLS = source.url.includes(".m3u8");
|
|
85
|
+
formats.push({
|
|
86
|
+
format_id: formatKey,
|
|
87
|
+
url: source.url,
|
|
88
|
+
ext: isHLS ? "mp4" : "mp4",
|
|
89
|
+
protocol: isHLS ? "m3u8" : undefined,
|
|
90
|
+
width: source.w,
|
|
91
|
+
height: source.h,
|
|
92
|
+
fps: source.fps,
|
|
93
|
+
tbr: source.bitrate,
|
|
94
|
+
resolution: source.h ? `${source.h}p` : undefined,
|
|
95
|
+
quality: source.h ?? 0,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const thumbnails: Thumbnail[] = [];
|
|
102
|
+
if (videoData.t) {
|
|
103
|
+
for (const thumb of videoData.t) {
|
|
104
|
+
if (thumb.u) thumbnails.push({ url: thumb.u });
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
109
|
+
const ogTitle = html.match(/<meta[^>]+property=["']og:title["'][^>]+content=["']([^"']+)["']/i);
|
|
110
|
+
const title = videoData.title ?? ogTitle?.[1] ?? titleMatch?.[1] ?? "Rumble Video";
|
|
111
|
+
|
|
112
|
+
const idMatch = url.match(/\/(v[a-zA-Z0-9]+)-/) ?? url.match(/embed\/([a-zA-Z0-9]+)/);
|
|
113
|
+
const id = idMatch?.[1] ?? "unknown";
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
id,
|
|
117
|
+
title,
|
|
118
|
+
description: videoData.description,
|
|
119
|
+
uploader: videoData.author,
|
|
120
|
+
duration: videoData.duration,
|
|
121
|
+
thumbnails,
|
|
122
|
+
formats,
|
|
123
|
+
webpage_url: url,
|
|
124
|
+
extractor: this._NAME,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
}
|