getraw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.gitattributes +4 -0
  2. package/CLAUDE.md +57 -0
  3. package/README.md +166 -0
  4. package/RESEARCH.md +109 -0
  5. package/STATUS.md +23 -0
  6. package/bun.lock +50 -0
  7. package/bunfig.toml +3 -0
  8. package/docs/plugin-guide.md +166 -0
  9. package/docs/supported-sites.md +41 -0
  10. package/package.json +30 -0
  11. package/src/cli/index.ts +52 -0
  12. package/src/cli/options.ts +97 -0
  13. package/src/core/format-sorter.ts +208 -0
  14. package/src/core/logger.ts +101 -0
  15. package/src/core/orchestrator.ts +140 -0
  16. package/src/core/output-template.ts +58 -0
  17. package/src/core/types.ts +237 -0
  18. package/src/downloaders/base.ts +25 -0
  19. package/src/downloaders/dash.ts +287 -0
  20. package/src/downloaders/fragment.ts +226 -0
  21. package/src/downloaders/hls.ts +170 -0
  22. package/src/downloaders/http.ts +260 -0
  23. package/src/extractors/archive-org.ts +126 -0
  24. package/src/extractors/bandcamp.ts +130 -0
  25. package/src/extractors/base.ts +29 -0
  26. package/src/extractors/bilibili/bangumi.ts +205 -0
  27. package/src/extractors/bilibili/index.ts +233 -0
  28. package/src/extractors/bilibili/wbi.ts +60 -0
  29. package/src/extractors/coub.ts +137 -0
  30. package/src/extractors/dailymotion.ts +99 -0
  31. package/src/extractors/dropbox.ts +52 -0
  32. package/src/extractors/generic.ts +118 -0
  33. package/src/extractors/google-drive.ts +106 -0
  34. package/src/extractors/imgur.ts +156 -0
  35. package/src/extractors/instagram/index.ts +263 -0
  36. package/src/extractors/instagram/reels.ts +166 -0
  37. package/src/extractors/kick/clips.ts +91 -0
  38. package/src/extractors/kick/index.ts +118 -0
  39. package/src/extractors/kick/live.ts +89 -0
  40. package/src/extractors/niconico/index.ts +209 -0
  41. package/src/extractors/odysee.ts +126 -0
  42. package/src/extractors/peertube.ts +143 -0
  43. package/src/extractors/reddit/gallery.ts +124 -0
  44. package/src/extractors/reddit/index.ts +203 -0
  45. package/src/extractors/rumble.ts +127 -0
  46. package/src/extractors/soundcloud/index.ts +161 -0
  47. package/src/extractors/soundcloud/playlist.ts +129 -0
  48. package/src/extractors/spotify.ts +97 -0
  49. package/src/extractors/streamable.ts +121 -0
  50. package/src/extractors/ted.ts +151 -0
  51. package/src/extractors/tiktok/index.ts +207 -0
  52. package/src/extractors/tiktok/user.ts +176 -0
  53. package/src/extractors/twitch/clips.ts +125 -0
  54. package/src/extractors/twitch/index.ts +136 -0
  55. package/src/extractors/twitch/live.ts +132 -0
  56. package/src/extractors/twitter/index.ts +140 -0
  57. package/src/extractors/twitter/spaces.ts +200 -0
  58. package/src/extractors/vimeo/index.ts +187 -0
  59. package/src/extractors/youtube/captions.ts +111 -0
  60. package/src/extractors/youtube/index.ts +252 -0
  61. package/src/extractors/youtube/innertube.ts +364 -0
  62. package/src/extractors/youtube/nsig.ts +105 -0
  63. package/src/extractors/youtube/playlist.ts +227 -0
  64. package/src/extractors/youtube/signature.ts +163 -0
  65. package/src/networking/client.ts +311 -0
  66. package/src/networking/cookies.ts +138 -0
  67. package/src/networking/proxy.ts +132 -0
  68. package/src/networking/tls.ts +67 -0
  69. package/src/networking/user-agents.ts +88 -0
  70. package/src/postprocessors/base.ts +44 -0
  71. package/src/postprocessors/extract-audio.ts +98 -0
  72. package/src/postprocessors/ffmpeg.ts +146 -0
  73. package/src/postprocessors/merge.ts +102 -0
  74. package/src/postprocessors/metadata.ts +73 -0
  75. package/src/postprocessors/sponsorblock.ts +162 -0
  76. package/src/postprocessors/subtitles.ts +285 -0
  77. package/src/postprocessors/thumbnails.ts +194 -0
  78. package/src/utils/sanitize.ts +36 -0
  79. package/src/utils/traverse.ts +68 -0
  80. package/tests/core/format-sorter.test.ts +96 -0
  81. package/tests/core/output-template.test.ts +56 -0
  82. package/tests/core/types.test.ts +79 -0
  83. package/tests/unit/downloaders/dash.test.ts +57 -0
  84. package/tests/unit/downloaders/hls.test.ts +120 -0
  85. package/tests/unit/downloaders/http.test.ts +114 -0
  86. package/tests/unit/extractors/bilibili.test.ts +83 -0
  87. package/tests/unit/extractors/instagram.test.ts +273 -0
  88. package/tests/unit/extractors/kick.test.ts +85 -0
  89. package/tests/unit/extractors/misc.test.ts +942 -0
  90. package/tests/unit/extractors/niconico.test.ts +61 -0
  91. package/tests/unit/extractors/reddit.test.ts +222 -0
  92. package/tests/unit/extractors/soundcloud.test.ts +299 -0
  93. package/tests/unit/extractors/tiktok.test.ts +260 -0
  94. package/tests/unit/extractors/twitch.test.ts +250 -0
  95. package/tests/unit/extractors/twitter.test.ts +181 -0
  96. package/tests/unit/extractors/vimeo.test.ts +253 -0
  97. package/tests/unit/extractors/youtube.test.ts +259 -0
  98. package/tests/unit/networking/client.test.ts +272 -0
  99. package/tests/unit/networking/cookies.test.ts +256 -0
  100. package/tests/unit/networking/proxy.test.ts +137 -0
  101. package/tests/unit/postprocessors/extract-audio.test.ts +63 -0
  102. package/tests/unit/postprocessors/merge.test.ts +61 -0
  103. package/tests/unit/postprocessors/subtitles.test.ts +89 -0
  104. package/tools/dashboard.ts +112 -0
  105. package/tsconfig.json +17 -0
@@ -0,0 +1,132 @@
1
+ import { BaseExtractor, ExtractorError } from "../../core/types";
2
+ import type { InfoDict, Format, Thumbnail } from "../../core/types";
3
+
4
+ const TWITCH_CLIENT_ID = "kimne78kx3ncx6brgo4mv6wki5h1ko";
5
+ const GQL_ENDPOINT = "https://gql.twitch.tv/gql";
6
+
7
+ interface StreamAccessToken {
8
+ value: string;
9
+ signature: string;
10
+ }
11
+
12
+ interface StreamNode {
13
+ id?: string;
14
+ title?: string;
15
+ viewersCount?: number;
16
+ previewImageURL?: string;
17
+ broadcaster?: { displayName?: string; login?: string; id?: string };
18
+ game?: { name?: string };
19
+ }
20
+
21
+ interface GQLResponse<T> {
22
+ data: T;
23
+ errors?: Array<{ message: string }>;
24
+ }
25
+
26
+ async function gqlRequest<T>(query: object): Promise<T> {
27
+ const response = await fetch(GQL_ENDPOINT, {
28
+ method: "POST",
29
+ headers: {
30
+ "Client-ID": TWITCH_CLIENT_ID,
31
+ "Content-Type": "application/json",
32
+ },
33
+ body: JSON.stringify(query),
34
+ });
35
+ if (!response.ok) {
36
+ throw new ExtractorError(`Twitch GQL request failed: ${response.status}`);
37
+ }
38
+ const result = (await response.json()) as GQLResponse<T>;
39
+ if (result.errors?.length) {
40
+ throw new ExtractorError(`Twitch GQL error: ${result.errors[0].message}`);
41
+ }
42
+ return result.data;
43
+ }
44
+
45
+ export class TwitchLiveExtractor extends BaseExtractor {
46
+ readonly _VALID_URL =
47
+ /^https?:\/\/(?:www\.)?twitch\.tv\/([a-zA-Z0-9_]+)(?:\/)?(?:\?.*)?$/;
48
+ readonly _NAME = "twitch:live";
49
+
50
+ protected async _real_extract(url: string): Promise<InfoDict> {
51
+ const match = this._VALID_URL.exec(url);
52
+ if (!match) throw new ExtractorError("Invalid Twitch channel URL");
53
+ const login = match[1].toLowerCase();
54
+
55
+ const [tokenData, streamData] = await Promise.all([
56
+ gqlRequest<{ streamPlaybackAccessToken: StreamAccessToken }>({
57
+ operationName: "PlaybackAccessToken",
58
+ variables: {
59
+ isLive: true,
60
+ login,
61
+ isVod: false,
62
+ vodID: "",
63
+ playerType: "site",
64
+ },
65
+ extensions: {
66
+ persistedQuery: {
67
+ version: 1,
68
+ sha256Hash: "0828119ded1c13477966434e15800ff57ddacf13ba1911c129dc2200705b0712",
69
+ },
70
+ },
71
+ }),
72
+ gqlRequest<{ user?: { stream?: StreamNode } }>({
73
+ operationName: "StreamMetadata",
74
+ variables: { channelLogin: login },
75
+ extensions: {
76
+ persistedQuery: {
77
+ version: 1,
78
+ sha256Hash: "1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e",
79
+ },
80
+ },
81
+ }),
82
+ ]);
83
+
84
+ const token = tokenData.streamPlaybackAccessToken;
85
+ if (!token) throw new ExtractorError("Could not get stream access token — channel may be offline");
86
+
87
+ const hlsUrl =
88
+ `https://usher.twitchapps.com/api/channel/hls/${login}.m3u8?` +
89
+ new URLSearchParams({
90
+ sig: token.signature,
91
+ token: token.value,
92
+ allow_source: "true",
93
+ allow_audio_only: "true",
94
+ allow_spectre: "true",
95
+ p: String(Math.floor(Math.random() * 999999)),
96
+ platform: "web",
97
+ play_session_id: crypto.randomUUID().replace(/-/g, ""),
98
+ supported_codecs: "avc1",
99
+ });
100
+
101
+ const stream = streamData?.user?.stream;
102
+
103
+ const formats: Format[] = [
104
+ {
105
+ format_id: "hls-live",
106
+ url: hlsUrl,
107
+ ext: "mp4",
108
+ protocol: "m3u8",
109
+ http_headers: { "Client-ID": TWITCH_CLIENT_ID },
110
+ format_note: "live stream",
111
+ },
112
+ ];
113
+
114
+ const thumbnails: Thumbnail[] = [];
115
+ if (stream?.previewImageURL) {
116
+ thumbnails.push({ url: stream.previewImageURL });
117
+ }
118
+
119
+ return {
120
+ id: login,
121
+ title: stream?.title ?? `${login} live stream`,
122
+ webpage_url: url,
123
+ uploader: login,
124
+ uploader_id: login,
125
+ view_count: stream?.viewersCount,
126
+ live_status: "is_live",
127
+ categories: stream?.game?.name ? [stream.game.name] : undefined,
128
+ formats,
129
+ thumbnails,
130
+ };
131
+ }
132
+ }
@@ -0,0 +1,140 @@
1
+ import { BaseExtractor, ExtractorError } from "../../core/types";
2
+ import type { InfoDict, Format, Thumbnail } from "../../core/types";
3
+
4
+ const VALID_URL = /https?:\/\/(?:www\.)?(?:twitter|x)\.com\/\w+\/status\/(\d+)/;
5
+
6
+ interface VideoVariant {
7
+ content_type: string;
8
+ url: string;
9
+ bitrate?: number;
10
+ }
11
+
12
+ interface VideoInfo {
13
+ variants: VideoVariant[];
14
+ duration_millis?: number;
15
+ }
16
+
17
+ interface MediaDetail {
18
+ type: string;
19
+ video_info?: VideoInfo;
20
+ media_url_https?: string;
21
+ original_info?: { width: number; height: number };
22
+ }
23
+
24
+ interface TweetResult {
25
+ id_str: string;
26
+ full_text: string;
27
+ user?: { name: string; screen_name: string; id_str: string };
28
+ created_at?: string;
29
+ favorite_count?: number;
30
+ retweet_count?: number;
31
+ views?: { count?: string };
32
+ mediaDetails?: MediaDetail[];
33
+ }
34
+
35
+ export class TwitterExtractor extends BaseExtractor {
36
+ readonly _VALID_URL = VALID_URL;
37
+ readonly _NAME = "twitter";
38
+
39
+ protected async _real_extract(url: string): Promise<InfoDict> {
40
+ const match = VALID_URL.exec(url);
41
+ if (!match) throw new ExtractorError(`twitter: invalid URL: ${url}`);
42
+ const tweetId = match[1];
43
+
44
+ const apiUrl = `https://cdn.syndication.twimg.com/tweet-result?id=${tweetId}&lang=en&features=tfw_timeline_list%3A%3Btfw_follower_count_sunset%3Atrue&token=0`;
45
+ const resp = await fetch(apiUrl, {
46
+ headers: {
47
+ "User-Agent": "Mozilla/5.0 (compatible; dlpx/1.0)",
48
+ Accept: "application/json",
49
+ Referer: "https://platform.twitter.com/",
50
+ Origin: "https://platform.twitter.com",
51
+ },
52
+ });
53
+
54
+ if (!resp.ok) {
55
+ throw new ExtractorError(`twitter: API request failed: ${resp.status} ${resp.statusText}`);
56
+ }
57
+
58
+ const data = (await resp.json()) as TweetResult;
59
+
60
+ if (!data || !data.id_str) {
61
+ throw new ExtractorError(`twitter: tweet not found or protected: ${tweetId}`);
62
+ }
63
+
64
+ const formats: Format[] = [];
65
+ const thumbnails: Thumbnail[] = [];
66
+
67
+ if (data.mediaDetails && data.mediaDetails.length > 0) {
68
+ for (const media of data.mediaDetails) {
69
+ if (media.media_url_https) {
70
+ thumbnails.push({
71
+ url: media.media_url_https,
72
+ width: media.original_info?.width,
73
+ height: media.original_info?.height,
74
+ });
75
+ }
76
+
77
+ if (media.type === "video" || media.type === "animated_gif") {
78
+ const videoInfo = media.video_info;
79
+ if (videoInfo?.variants) {
80
+ const mp4Variants = videoInfo.variants
81
+ .filter((v) => v.content_type === "video/mp4")
82
+ .sort((a, b) => (b.bitrate ?? 0) - (a.bitrate ?? 0));
83
+
84
+ for (const variant of mp4Variants) {
85
+ formats.push({
86
+ format_id: `mp4-${variant.bitrate ?? 0}`,
87
+ url: variant.url,
88
+ ext: "mp4",
89
+ tbr: variant.bitrate ? variant.bitrate / 1000 : undefined,
90
+ vcodec: "h264",
91
+ acodec: mp4Variants.length > 0 ? "aac" : undefined,
92
+ });
93
+ }
94
+
95
+ const hlsVariant = videoInfo.variants.find(
96
+ (v) => v.content_type === "application/x-mpegURL",
97
+ );
98
+ if (hlsVariant) {
99
+ formats.push({
100
+ format_id: "hls",
101
+ url: hlsVariant.url,
102
+ ext: "mp4",
103
+ protocol: "m3u8",
104
+ });
105
+ }
106
+ }
107
+ }
108
+ }
109
+ }
110
+
111
+ const uploadDate = data.created_at
112
+ ? new Date(data.created_at).toISOString().slice(0, 10).replace(/-/g, "")
113
+ : undefined;
114
+
115
+ const viewCount = data.views?.count ? parseInt(data.views.count, 10) : undefined;
116
+
117
+ const duration = data.mediaDetails?.[0]?.video_info?.duration_millis
118
+ ? data.mediaDetails[0].video_info!.duration_millis / 1000
119
+ : undefined;
120
+
121
+ return {
122
+ id: tweetId,
123
+ title: data.full_text ? data.full_text.slice(0, 100) : `Tweet ${tweetId}`,
124
+ description: data.full_text,
125
+ uploader: data.user?.name,
126
+ uploader_id: data.user?.screen_name,
127
+ uploader_url: data.user?.screen_name
128
+ ? `https://twitter.com/${data.user.screen_name}`
129
+ : undefined,
130
+ upload_date: uploadDate,
131
+ view_count: viewCount,
132
+ like_count: data.favorite_count,
133
+ duration,
134
+ formats,
135
+ thumbnails,
136
+ webpage_url: url,
137
+ _type: "video",
138
+ };
139
+ }
140
+ }
@@ -0,0 +1,200 @@
1
+ import { BaseExtractor, ExtractorError } from "../../core/types";
2
+ import type { InfoDict, Format } from "../../core/types";
3
+
4
+ const VALID_URL = /https?:\/\/(?:www\.)?(?:twitter|x)\.com\/i\/spaces\/([A-Za-z0-9]+)/;
5
+
6
+ const BEARER_TOKEN =
7
+ "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs=";
8
+
9
+ interface GuestTokenResponse {
10
+ guest_token: string;
11
+ }
12
+
13
+ interface AudioSpaceMetadata {
14
+ rest_id?: string;
15
+ metadata?: {
16
+ title?: string;
17
+ creator_results?: {
18
+ result?: {
19
+ legacy?: { name?: string; screen_name?: string };
20
+ rest_id?: string;
21
+ };
22
+ };
23
+ started_at?: number;
24
+ state?: string;
25
+ };
26
+ sharings?: unknown;
27
+ }
28
+
29
+ interface AudioSpaceResponse {
30
+ data?: {
31
+ audioSpace?: AudioSpaceMetadata;
32
+ };
33
+ }
34
+
35
+ interface LiveVideoStreamStatus {
36
+ source?: { location?: string; noRedirectPlaybackUrl?: string };
37
+ sessionId?: string;
38
+ }
39
+
40
+ export class TwitterSpacesExtractor extends BaseExtractor {
41
+ readonly _VALID_URL = VALID_URL;
42
+ readonly _NAME = "twitter:spaces";
43
+
44
+ private async getGuestToken(): Promise<string> {
45
+ const resp = await fetch("https://api.x.com/1.1/guest/activate.json", {
46
+ method: "POST",
47
+ headers: {
48
+ Authorization: `Bearer ${BEARER_TOKEN}`,
49
+ "Content-Type": "application/x-www-form-urlencoded",
50
+ "User-Agent": "TwitterAndroid/9.99.0-release.0",
51
+ },
52
+ });
53
+
54
+ if (!resp.ok) {
55
+ throw new ExtractorError(
56
+ `twitter:spaces: failed to get guest token: ${resp.status}`,
57
+ );
58
+ }
59
+
60
+ const data = (await resp.json()) as GuestTokenResponse;
61
+ if (!data.guest_token) {
62
+ throw new ExtractorError("twitter:spaces: no guest token in response");
63
+ }
64
+ return data.guest_token;
65
+ }
66
+
67
+ private async getSpaceMetadata(
68
+ spaceId: string,
69
+ guestToken: string,
70
+ ): Promise<AudioSpaceMetadata> {
71
+ const variables = JSON.stringify({
72
+ id: spaceId,
73
+ isMetatagsQuery: false,
74
+ withReplays: true,
75
+ withListeners: true,
76
+ });
77
+
78
+ const features = JSON.stringify({
79
+ spaces_2022_h2_spaces_communities: true,
80
+ spaces_2022_h2_clipping: true,
81
+ creator_subscriptions_tweet_preview_api_enabled: true,
82
+ });
83
+
84
+ const url =
85
+ `https://api.x.com/graphql/kY7JFQmAeBaVp4UBdrK-wA/AudioSpaceById?` +
86
+ `variables=${encodeURIComponent(variables)}&features=${encodeURIComponent(features)}`;
87
+
88
+ const resp = await fetch(url, {
89
+ headers: {
90
+ Authorization: `Bearer ${BEARER_TOKEN}`,
91
+ "x-guest-token": guestToken,
92
+ "User-Agent": "TwitterAndroid/9.99.0-release.0",
93
+ "Content-Type": "application/json",
94
+ },
95
+ });
96
+
97
+ if (!resp.ok) {
98
+ throw new ExtractorError(
99
+ `twitter:spaces: GraphQL request failed: ${resp.status}`,
100
+ );
101
+ }
102
+
103
+ const data = (await resp.json()) as AudioSpaceResponse;
104
+ const space = data?.data?.audioSpace;
105
+ if (!space) {
106
+ throw new ExtractorError(
107
+ `twitter:spaces: space not found or unavailable: ${spaceId}`,
108
+ );
109
+ }
110
+ return space;
111
+ }
112
+
113
+ private async getStreamUrl(
114
+ mediaKey: string,
115
+ guestToken: string,
116
+ ): Promise<string> {
117
+ const resp = await fetch(
118
+ `https://twitter.com/i/api/1.1/live_video_stream/status/${mediaKey}`,
119
+ {
120
+ headers: {
121
+ Authorization: `Bearer ${BEARER_TOKEN}`,
122
+ "x-guest-token": guestToken,
123
+ "User-Agent": "TwitterAndroid/9.99.0-release.0",
124
+ },
125
+ },
126
+ );
127
+
128
+ if (!resp.ok) {
129
+ throw new ExtractorError(
130
+ `twitter:spaces: stream status request failed: ${resp.status}`,
131
+ );
132
+ }
133
+
134
+ const data = (await resp.json()) as LiveVideoStreamStatus;
135
+ const location =
136
+ data?.source?.noRedirectPlaybackUrl ?? data?.source?.location;
137
+ if (!location) {
138
+ throw new ExtractorError(
139
+ "twitter:spaces: no stream location in response",
140
+ );
141
+ }
142
+ return location;
143
+ }
144
+
145
+ protected async _real_extract(url: string): Promise<InfoDict> {
146
+ const match = VALID_URL.exec(url);
147
+ if (!match) throw new ExtractorError(`twitter:spaces: invalid URL: ${url}`);
148
+ const spaceId = match[1];
149
+
150
+ const guestToken = await this.getGuestToken();
151
+ const space = await this.getSpaceMetadata(spaceId, guestToken);
152
+
153
+ const metadata = space.metadata;
154
+ const creatorLegacy = metadata?.creator_results?.result?.legacy;
155
+ const creatorId = metadata?.creator_results?.result?.rest_id;
156
+
157
+ const formats: Format[] = [];
158
+
159
+ if (space.rest_id) {
160
+ try {
161
+ const streamUrl = await this.getStreamUrl(space.rest_id, guestToken);
162
+ formats.push({
163
+ format_id: "hls-audio",
164
+ url: streamUrl,
165
+ ext: "m4a",
166
+ protocol: "m3u8",
167
+ acodec: "aac",
168
+ vcodec: "none",
169
+ });
170
+ } catch {
171
+ }
172
+ }
173
+
174
+ const startedAt = metadata?.started_at;
175
+
176
+ return {
177
+ id: spaceId,
178
+ title: metadata?.title ?? `Twitter Space ${spaceId}`,
179
+ uploader: creatorLegacy?.name,
180
+ uploader_id: creatorLegacy?.screen_name,
181
+ uploader_url: creatorLegacy?.screen_name
182
+ ? `https://twitter.com/${creatorLegacy.screen_name}`
183
+ : undefined,
184
+ channel_id: creatorId,
185
+ timestamp: startedAt,
186
+ upload_date: startedAt
187
+ ? new Date(startedAt).toISOString().slice(0, 10).replace(/-/g, "")
188
+ : undefined,
189
+ live_status:
190
+ metadata?.state === "Running"
191
+ ? "is_live"
192
+ : metadata?.state === "Ended"
193
+ ? "was_live"
194
+ : "not_live",
195
+ formats,
196
+ webpage_url: url,
197
+ _type: "video",
198
+ };
199
+ }
200
+ }
@@ -0,0 +1,187 @@
1
+ import { BaseExtractor, ExtractorError } from "../../core/types";
2
+ import type { InfoDict, Format, Thumbnail } from "../../core/types";
3
+
4
+ interface VimeoHLSCDN {
5
+ url: string;
6
+ avc_url?: string;
7
+ }
8
+
9
+ interface VimeoDashCDN {
10
+ url: string;
11
+ }
12
+
13
+ interface VimeoProgressiveFile {
14
+ quality: string;
15
+ mime: string;
16
+ width?: number;
17
+ height?: number;
18
+ fps?: number;
19
+ url: string;
20
+ size?: number;
21
+ }
22
+
23
+ interface VimeoConfig {
24
+ video: {
25
+ id: number;
26
+ title: string;
27
+ description?: string;
28
+ duration?: number;
29
+ owner?: {
30
+ name?: string;
31
+ url?: string;
32
+ account_type?: string;
33
+ };
34
+ thumbs?: Record<string, string>;
35
+ embed_code?: string;
36
+ width?: number;
37
+ height?: number;
38
+ live_event?: { status?: string };
39
+ };
40
+ request: {
41
+ files: {
42
+ hls?: { cdns?: Record<string, VimeoHLSCDN>; default_cdn?: string };
43
+ dash?: { cdns?: Record<string, VimeoDashCDN>; default_cdn?: string };
44
+ progressive?: VimeoProgressiveFile[];
45
+ };
46
+ cookie?: Record<string, string>;
47
+ };
48
+ }
49
+
50
+ function extractVimeoId(url: string): string | null {
51
+ const patterns = [
52
+ /vimeo\.com\/(\d+)/,
53
+ /player\.vimeo\.com\/video\/(\d+)/,
54
+ /vimeo\.com\/channels\/[^/]+\/(\d+)/,
55
+ /vimeo\.com\/groups\/[^/]+\/videos\/(\d+)/,
56
+ ];
57
+ for (const pattern of patterns) {
58
+ const match = pattern.exec(url);
59
+ if (match) return match[1];
60
+ }
61
+ return null;
62
+ }
63
+
64
+ export class VimeoExtractor extends BaseExtractor {
65
+ readonly _VALID_URL =
66
+ /^https?:\/\/(?:(?:www|player)\.)?vimeo\.com\/(?:video\/|channels\/[^/]+\/|groups\/[^/]+\/videos\/)?(\d+)/;
67
+ readonly _NAME = "vimeo";
68
+
69
+ protected async _real_extract(url: string): Promise<InfoDict> {
70
+ const videoId = extractVimeoId(url);
71
+ if (!videoId) throw new ExtractorError("Could not extract Vimeo video ID");
72
+
73
+ const configUrl = `https://player.vimeo.com/video/${videoId}/config`;
74
+ const response = await fetch(configUrl, {
75
+ headers: {
76
+ Referer: `https://vimeo.com/${videoId}`,
77
+ "User-Agent":
78
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
79
+ },
80
+ });
81
+
82
+ if (!response.ok) {
83
+ throw new ExtractorError(`Vimeo config request failed: ${response.status}`);
84
+ }
85
+
86
+ const config = (await response.json()) as VimeoConfig;
87
+ const video = config.video;
88
+ const files = config.request?.files;
89
+
90
+ if (!files) throw new ExtractorError("No media files found in Vimeo config");
91
+
92
+ const formats: Format[] = [];
93
+
94
+ if (files.progressive && files.progressive.length > 0) {
95
+ for (const prog of files.progressive) {
96
+ formats.push({
97
+ format_id: `http-${prog.quality}`,
98
+ url: prog.url,
99
+ ext: prog.mime?.split("/")?.[1] ?? "mp4",
100
+ width: prog.width,
101
+ height: prog.height,
102
+ fps: prog.fps,
103
+ filesize: prog.size,
104
+ format_note: prog.quality,
105
+ vcodec: "h264",
106
+ acodec: "aac",
107
+ quality: prog.height ?? 0,
108
+ });
109
+ }
110
+ }
111
+
112
+ if (files.hls?.cdns) {
113
+ const cdns = files.hls.cdns;
114
+ const defaultCdn = files.hls.default_cdn;
115
+ const cdnNames = defaultCdn
116
+ ? [defaultCdn, ...Object.keys(cdns).filter((k) => k !== defaultCdn)]
117
+ : Object.keys(cdns);
118
+
119
+ for (const cdnName of cdnNames) {
120
+ const cdn = cdns[cdnName];
121
+ const hlsUrl = cdn.avc_url ?? cdn.url;
122
+ if (hlsUrl) {
123
+ formats.push({
124
+ format_id: `hls-${cdnName}`,
125
+ url: hlsUrl,
126
+ ext: "mp4",
127
+ protocol: "m3u8",
128
+ format_note: `HLS (${cdnName})`,
129
+ source_preference: cdnName === defaultCdn ? 1 : 0,
130
+ });
131
+ }
132
+ }
133
+ }
134
+
135
+ if (files.dash?.cdns) {
136
+ const cdns = files.dash.cdns;
137
+ const defaultCdn = files.dash.default_cdn;
138
+ const cdnNames = defaultCdn
139
+ ? [defaultCdn, ...Object.keys(cdns).filter((k) => k !== defaultCdn)]
140
+ : Object.keys(cdns);
141
+
142
+ for (const cdnName of cdnNames) {
143
+ const cdn = cdns[cdnName];
144
+ if (cdn.url) {
145
+ formats.push({
146
+ format_id: `dash-${cdnName}`,
147
+ url: cdn.url,
148
+ ext: "mp4",
149
+ protocol: "dash",
150
+ format_note: `DASH (${cdnName})`,
151
+ source_preference: cdnName === defaultCdn ? 1 : 0,
152
+ });
153
+ }
154
+ }
155
+ }
156
+
157
+ if (formats.length === 0) {
158
+ throw new ExtractorError("No playable formats found for this Vimeo video");
159
+ }
160
+
161
+ const thumbnails: Thumbnail[] = [];
162
+ if (video.thumbs) {
163
+ for (const [size, thumbUrl] of Object.entries(video.thumbs)) {
164
+ const dim = parseInt(size, 10);
165
+ thumbnails.push({
166
+ url: thumbUrl,
167
+ width: isNaN(dim) ? undefined : dim,
168
+ id: size,
169
+ });
170
+ }
171
+ }
172
+
173
+ return {
174
+ id: videoId,
175
+ title: video.title,
176
+ description: video.description,
177
+ uploader: video.owner?.name,
178
+ uploader_url: video.owner?.url,
179
+ duration: video.duration,
180
+ webpage_url: `https://vimeo.com/${videoId}`,
181
+ width: video.width,
182
+ height: video.height,
183
+ thumbnails,
184
+ formats,
185
+ };
186
+ }
187
+ }