getraw 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bun.lock CHANGED
@@ -5,7 +5,9 @@
5
5
  "": {
6
6
  "name": "dlpx",
7
7
  "dependencies": {
8
+ "googlevideo": "^4.0.4",
8
9
  "hls-parser": "^0.13.6",
10
+ "meriyah": "^6.0.7",
9
11
  "mpd-parser": "^1.3.0",
10
12
  "youtubei.js": "^17.0.1",
11
13
  },
@@ -36,6 +38,8 @@
36
38
 
37
39
  "global": ["global@4.4.0", "", { "dependencies": { "min-document": "^2.19.0", "process": "^0.11.10" } }, "sha512-wv/LAoHdRE3BeTGz53FAamhGlPLhlssK45usmGFThIi4XqnBmjKQ16u+RNbP7WvigRZDxUsM0J3gcQ5yicaL0w=="],
38
40
 
41
+ "googlevideo": ["googlevideo@4.0.4", "", { "dependencies": { "@bufbuild/protobuf": "^2.0.0" } }, "sha512-S/rfuoPBI+qXCEUPJeVhXsHoISMgVhOz8hHSpGWa0OztfHhh+g9EKaEcqAb/+ttO7meoNQNqIy9dfIpz7HPc4g=="],
42
+
39
43
  "hls-parser": ["hls-parser@0.13.6", "", {}, "sha512-I40sl22E2muqeSTpG8kMN2dAegAhubkXPXtnsUXFwdKwZK47d1Q+XwuX32VMZ++AZU5oeQIZqAnGNHxSG1sWaw=="],
40
44
 
41
45
  "meriyah": ["meriyah@6.1.4", "", {}, "sha512-Sz8FzjzI0kN13GK/6MVEsVzMZEPvOhnmmI1lU5+/1cGOiK3QUahntrNNtdVeihrO7t9JpoH75iMNXg6R6uWflQ=="],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "getraw",
3
- "version": "0.2.2",
3
+ "version": "0.3.0",
4
4
  "description": "Fast media downloader CLI built natively in Bun/TypeScript",
5
5
  "type": "module",
6
6
  "bin": {
@@ -10,11 +10,12 @@
10
10
  "dev": "bun run src/cli/index.ts",
11
11
  "test": "bun test",
12
12
  "build": "bun build src/cli/index.ts --compile --outfile=getraw",
13
- "dashboard": "bun run tools/dashboard.ts",
14
- "postinstall": "bun run scripts/patch-youtubei.js"
13
+ "dashboard": "bun run tools/dashboard.ts"
15
14
  },
16
15
  "dependencies": {
16
+ "googlevideo": "^4.0.4",
17
17
  "hls-parser": "^0.13.6",
18
+ "meriyah": "^6.0.7",
18
19
  "mpd-parser": "^1.3.0",
19
20
  "youtubei.js": "^17.0.1"
20
21
  },
@@ -1,28 +1,50 @@
1
1
  import { BaseExtractor, ExtractorError } from "../../core/types";
2
2
  import type { InfoDict, Format, Thumbnail } from "../../core/types";
3
3
  import { parseCaptionTracks } from "./captions";
4
+ import { InnerTubeClient } from "./innertube";
5
+ import type { RawFormat, PlayerResponse, StreamingData } from "./innertube";
6
+ import { decipherStreamUrl, setPageHtmlForPlayerExtraction } from "./player";
4
7
 
5
8
  const VALID_URL = /^https?:\/\/(?:(?:www|m|music)\.)?(?:youtube\.com\/(?:watch\?.*v=|shorts\/|live\/|embed\/|v\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
6
9
  const PLAYLIST_URL = /^https?:\/\/(?:(?:www|m|music)\.)?youtube\.com\/playlist\?.*list=([a-zA-Z0-9_-]+)/;
7
10
  const CHANNEL_URL = /^https?:\/\/(?:(?:www|m|music)\.)?youtube\.com\/(?:channel\/|@)([a-zA-Z0-9_-]+)/;
8
11
 
12
+ const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36";
13
+
9
14
  function generateCpn(): string {
10
15
  const chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_";
11
16
  return Array.from({ length: 16 }, () => chars[Math.floor(Math.random() * 64)]).join("");
12
17
  }
13
18
 
14
- let _innertube: Awaited<ReturnType<typeof createInnertube>> | null = null;
15
-
16
- async function createInnertube() {
17
- const { Innertube } = await import("youtubei.js");
18
- return Innertube.create({ generate_session_locally: true });
19
+ interface PageData {
20
+ playerResponse: PlayerResponse;
21
+ html: string;
19
22
  }
20
23
 
21
- async function getInnertube() {
22
- if (!_innertube) {
23
- _innertube = await createInnertube();
24
+ async function fetchPageData(videoId: string): Promise<PageData> {
25
+ const resp = await fetch(`https://www.youtube.com/watch?v=${videoId}`, {
26
+ headers: {
27
+ "User-Agent": USER_AGENT,
28
+ "Accept-Language": "en-US,en;q=0.9",
29
+ },
30
+ });
31
+
32
+ if (!resp.ok) {
33
+ throw new ExtractorError(`Failed to fetch YouTube page: ${resp.status}`);
24
34
  }
25
- return _innertube;
35
+
36
+ const html = await resp.text();
37
+ setPageHtmlForPlayerExtraction(html);
38
+
39
+ const prMatch = html.match(/var\s+ytInitialPlayerResponse\s*=\s*(\{.+?\});/s);
40
+ if (!prMatch) {
41
+ throw new ExtractorError("Could not extract player response from page");
42
+ }
43
+
44
+ return {
45
+ playerResponse: JSON.parse(prMatch[1]) as PlayerResponse,
46
+ html,
47
+ };
26
48
  }
27
49
 
28
50
  export class YouTubeExtractor extends BaseExtractor {
@@ -41,16 +63,22 @@ export class YouTubeExtractor extends BaseExtractor {
41
63
  }
42
64
 
43
65
  private async extractVideo(videoId: string): Promise<InfoDict> {
44
- const yt = await getInnertube();
45
- const info = await yt.getInfo(videoId);
66
+ const pageData = await fetchPageData(videoId);
67
+ const playerResponse = pageData.playerResponse;
68
+
69
+ const status = playerResponse.playabilityStatus;
70
+ if (status?.status !== "OK") {
71
+ throw new ExtractorError(status?.reason ?? "Video unavailable");
72
+ }
46
73
 
47
- if (!info.basic_info.title) {
74
+ const details = playerResponse.videoDetails;
75
+ if (!details?.title) {
48
76
  throw new ExtractorError("Could not extract video info");
49
77
  }
50
78
 
51
- const formats = await this.extractFormats(info, yt);
79
+ const formats = await this.extractFormats(playerResponse.streamingData, pageData.html, videoId);
52
80
 
53
- const thumbnails: Thumbnail[] = (info.basic_info.thumbnail ?? []).map((t: { url: string; width: number; height: number }) => ({
81
+ const thumbnails: Thumbnail[] = (details.thumbnail?.thumbnails ?? []).map((t) => ({
54
82
  url: t.url,
55
83
  width: t.width,
56
84
  height: t.height,
@@ -58,104 +86,146 @@ export class YouTubeExtractor extends BaseExtractor {
58
86
 
59
87
  const result: InfoDict = {
60
88
  id: videoId,
61
- title: info.basic_info.title,
89
+ title: details.title,
62
90
  formats,
63
91
  thumbnails,
64
- description: info.basic_info.short_description,
65
- channel: info.basic_info.author,
66
- channel_id: info.basic_info.channel_id,
67
- duration: info.basic_info.duration,
68
- view_count: info.basic_info.view_count,
92
+ description: details.shortDescription,
93
+ channel: details.author,
94
+ channel_id: details.channelId,
95
+ duration: parseInt(details.lengthSeconds, 10) || undefined,
96
+ view_count: parseInt(details.viewCount, 10) || undefined,
69
97
  webpage_url: `https://www.youtube.com/watch?v=${videoId}`,
70
- live_status: info.basic_info.is_live ? "is_live" : "not_live",
98
+ live_status: details.isLive ? "is_live" : "not_live",
71
99
  };
72
100
 
73
- // Extract captions from page response
74
- const pageResponse = await this.fetchPagePlayerResponse(videoId);
75
- if (pageResponse) {
76
- const captionTracks = pageResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
77
- if (captionTracks?.length) {
78
- const { subtitles, automatic_captions } = parseCaptionTracks(captionTracks);
79
- result.subtitles = subtitles;
80
- result.automatic_captions = automatic_captions;
81
- }
101
+ const captionTracks = playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
102
+ if (captionTracks?.length) {
103
+ const { subtitles, automatic_captions } = parseCaptionTracks(captionTracks);
104
+ result.subtitles = subtitles;
105
+ result.automatic_captions = automatic_captions;
82
106
  }
83
107
 
84
108
  return result;
85
109
  }
86
110
 
87
- private async extractFormats(info: { streaming_data?: { formats?: unknown[]; adaptive_formats?: unknown[] }; chooseFormat: (opts: { type: string; quality: string }) => unknown }, yt: { session: { player: unknown } }): Promise<Format[]> {
88
- const formats: Format[] = [];
89
- const player = yt.session.player;
111
+ private async extractFormats(streamingData: StreamingData | undefined, pageHtml: string, videoId?: string): Promise<Format[]> {
90
112
  const cpn = generateCpn();
113
+ const formats: Format[] = [];
91
114
 
92
- const allFormats = [
93
- ...(info.streaming_data?.formats ?? []),
94
- ...(info.streaming_data?.adaptive_formats ?? []),
95
- ];
115
+ // First: get formats from page response (muxed formats with signatureCipher)
116
+ if (streamingData) {
117
+ const pageFormats: RawFormat[] = [
118
+ ...(streamingData.formats ?? []),
119
+ ...(streamingData.adaptiveFormats ?? []),
120
+ ];
121
+
122
+ for (const raw of pageFormats) {
123
+ if (!raw.url && !raw.signatureCipher) continue;
124
+ try {
125
+ const url = await decipherStreamUrl(raw.url, raw.signatureCipher, pageHtml);
126
+ if (!url) continue;
127
+ const parsed = new URL(url);
128
+ parsed.searchParams.set("cpn", cpn);
129
+ formats.push(this.buildFormat(raw, parsed.toString()));
130
+ } catch {
131
+ continue;
132
+ }
133
+ }
134
+ }
96
135
 
97
- for (const raw of allFormats) {
98
- const f = raw as Record<string, unknown>;
136
+ if (videoId) {
99
137
  try {
100
- let url: string | undefined;
101
-
102
- if (typeof (f as { decipher?: unknown }).decipher === "function") {
103
- const deciphered = await (f as { decipher: (p: unknown) => Promise<unknown> }).decipher(player);
104
- if (typeof deciphered === "string") {
105
- const parsed = new URL(deciphered);
106
- parsed.searchParams.set("cpn", cpn);
107
- url = parsed.toString();
138
+ const iosFormats = await this.fetchIosFormats(videoId, pageHtml, cpn);
139
+ const existingItags = new Set(formats.map((f) => f.format_id));
140
+ for (const f of iosFormats) {
141
+ if (!existingItags.has(f.format_id)) {
142
+ formats.push(f);
108
143
  }
109
144
  }
110
-
111
- if (!url) continue;
112
-
113
- const mime = String(f.mime_type ?? "");
114
- const mimeMatch = mime.match(/^(video|audio)\/(\w+);\s*codecs="([^"]+)"/);
115
- const ext = mimeMatch?.[2] ?? "mp4";
116
- const codecs = mimeMatch?.[3] ?? "";
117
- const isVideo = mime.startsWith("video");
118
- const isAudio = mime.startsWith("audio");
119
-
120
- formats.push({
121
- format_id: String(f.itag ?? ""),
122
- url,
123
- ext,
124
- vcodec: isVideo ? codecs.split(",")[0]?.trim() : "none",
125
- acodec: isAudio ? codecs : (isVideo && codecs.includes(",") ? codecs.split(",")[1]?.trim() : undefined),
126
- width: (f.width as number) ?? undefined,
127
- height: (f.height as number) ?? undefined,
128
- fps: (f.fps as number) ?? undefined,
129
- tbr: f.bitrate ? Math.round((f.bitrate as number) / 1000) : undefined,
130
- filesize: f.content_length ? parseInt(String(f.content_length), 10) : undefined,
131
- format_note: String(f.quality_label ?? f.quality ?? ""),
132
- audio_channels: (f.audio_channels as number) ?? undefined,
133
- http_headers: {
134
- "Origin": "https://www.youtube.com",
135
- "Referer": "https://www.youtube.com/",
136
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
137
- },
138
- });
139
145
  } catch {
140
- continue;
146
+ // IOS client failed, continue with page formats
141
147
  }
142
148
  }
143
149
 
144
150
  return formats;
145
151
  }
146
152
 
147
- private async fetchPagePlayerResponse(videoId: string): Promise<Record<string, unknown> | null> {
153
+ private async fetchIosFormats(videoId: string, pageHtml: string, cpn: string): Promise<Format[]> {
154
+ const iosClient = InnerTubeClient.withClient("IOS");
155
+ let response: PlayerResponse;
148
156
  try {
149
- const resp = await fetch(`https://www.youtube.com/watch?v=${videoId}`, {
150
- headers: {
151
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
152
- },
153
- });
154
- const html = await resp.text();
155
- const match = html.match(/var\s+ytInitialPlayerResponse\s*=\s*(\{.+?\});/s);
156
- return match ? JSON.parse(match[1]) : null;
157
+ response = await iosClient.getPlayerResponse(videoId);
157
158
  } catch {
158
- return null;
159
+ const androidClient = InnerTubeClient.withClient("ANDROID");
160
+ response = await androidClient.getPlayerResponse(videoId);
161
+ }
162
+
163
+ if (response.playabilityStatus?.status !== "OK") return [];
164
+
165
+ const allRaw: RawFormat[] = [
166
+ ...(response.streamingData?.formats ?? []),
167
+ ...(response.streamingData?.adaptiveFormats ?? []),
168
+ ];
169
+
170
+ const formats: Format[] = [];
171
+ const IOS_UA = "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)";
172
+
173
+ for (const raw of allRaw) {
174
+ if (!raw.url) continue;
175
+ let finalUrl: string;
176
+ try {
177
+ const deciphered = await decipherStreamUrl(raw.url, undefined, pageHtml);
178
+ finalUrl = deciphered ?? raw.url;
179
+ } catch {
180
+ finalUrl = raw.url;
181
+ }
182
+
183
+ const parsed = new URL(finalUrl);
184
+ parsed.searchParams.set("cpn", cpn);
185
+ const format = this.buildFormat(raw, parsed.toString());
186
+ format.http_headers = {
187
+ "User-Agent": IOS_UA,
188
+ "Origin": "https://www.youtube.com",
189
+ "Referer": "https://www.youtube.com/",
190
+ };
191
+ formats.push(format);
159
192
  }
193
+
194
+ return formats;
195
+ }
196
+
197
+ private buildFormat(raw: RawFormat, url: string): Format {
198
+ const mime = raw.mimeType;
199
+ const mimeMatch = mime.match(/^(video|audio)\/(\w+);\s*codecs="([^"]+)"/);
200
+ const ext = mimeMatch?.[2] ?? "mp4";
201
+ const codecs = mimeMatch?.[3] ?? "";
202
+ const isVideo = mime.startsWith("video");
203
+ const isAudio = mime.startsWith("audio");
204
+
205
+ const format: Format = {
206
+ format_id: String(raw.itag),
207
+ url,
208
+ ext: isAudio && ext === "mp4" ? "m4a" : ext,
209
+ vcodec: isVideo ? codecs.split(",")[0]?.trim() : "none",
210
+ acodec: isAudio ? codecs : (isVideo && codecs.includes(",") ? codecs.split(",")[1]?.trim() : undefined),
211
+ width: raw.width,
212
+ height: raw.height,
213
+ fps: raw.fps,
214
+ tbr: raw.bitrate ? Math.round(raw.bitrate / 1000) : undefined,
215
+ filesize: raw.contentLength ? parseInt(raw.contentLength, 10) : undefined,
216
+ format_note: raw.qualityLabel ?? raw.quality ?? undefined,
217
+ audio_channels: raw.audioChannels,
218
+ http_headers: {
219
+ "Origin": "https://www.youtube.com",
220
+ "Referer": "https://www.youtube.com/",
221
+ "User-Agent": USER_AGENT,
222
+ },
223
+ };
224
+
225
+ if (raw.width && raw.height) {
226
+ format.resolution = `${raw.width}x${raw.height}`;
227
+ }
228
+
229
+ return format;
160
230
  }
161
231
  }
@@ -122,6 +122,13 @@ const CLIENTS: Record<string, ClientContext> = {
122
122
  apiKey: "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
123
123
  clientId: 85,
124
124
  },
125
+ IOS: {
126
+ clientName: "IOS",
127
+ clientVersion: "19.45.4",
128
+ userAgent: "com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)",
129
+ apiKey: "AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc",
130
+ clientId: 5,
131
+ },
125
132
  };
126
133
 
127
134
  const PLAYER_ENDPOINT = "https://www.youtube.com/youtubei/v1/player";
@@ -130,22 +137,28 @@ const BROWSE_ENDPOINT = "https://www.youtube.com/youtubei/v1/browse";
130
137
  export class InnerTubeClient {
131
138
  private clientName: string;
132
139
  private context: ClientContext;
140
+ private signatureTimestamp: number;
133
141
 
134
- constructor(clientName: "WEB" | "ANDROID" | "TVHTML5_EMBED" = "WEB") {
142
+ constructor(clientName: "WEB" | "ANDROID" | "TVHTML5_EMBED" | "IOS" = "WEB", signatureTimestamp = 20073) {
135
143
  this.clientName = clientName;
136
144
  this.context = CLIENTS[clientName];
145
+ this.signatureTimestamp = signatureTimestamp;
137
146
  }
138
147
 
139
- async getPlayerResponse(videoId: string, embedUrl?: string): Promise<PlayerResponse> {
140
- const body = this.buildPlayerBody(videoId, embedUrl);
148
+ async getPlayerResponse(videoId: string, embedUrl?: string, visitorData?: string): Promise<PlayerResponse> {
149
+ const body = this.buildPlayerBody(videoId, embedUrl, visitorData);
150
+ const headers: Record<string, string> = {
151
+ "Content-Type": "application/json",
152
+ "User-Agent": this.context.userAgent,
153
+ "X-YouTube-Client-Name": String(this.context.clientId ?? 1),
154
+ "X-YouTube-Client-Version": this.context.clientVersion,
155
+ };
156
+ if (visitorData) {
157
+ headers["X-Goog-Visitor-Id"] = visitorData;
158
+ }
141
159
  const response = await fetch(`${PLAYER_ENDPOINT}?key=${this.context.apiKey}&prettyPrint=false`, {
142
160
  method: "POST",
143
- headers: {
144
- "Content-Type": "application/json",
145
- "User-Agent": this.context.userAgent,
146
- "X-YouTube-Client-Name": String(this.context.clientId ?? 1),
147
- "X-YouTube-Client-Version": this.context.clientVersion,
148
- },
161
+ headers,
149
162
  body: JSON.stringify(body),
150
163
  });
151
164
 
@@ -193,26 +206,37 @@ export class InnerTubeClient {
193
206
  return response.json() as Promise<BrowseResponse>;
194
207
  }
195
208
 
196
- private buildPlayerBody(videoId: string, embedUrl?: string): Record<string, unknown> {
209
+ private buildPlayerBody(videoId: string, embedUrl?: string, visitorData?: string): Record<string, unknown> {
210
+ const clientContext: Record<string, unknown> = {
211
+ clientName: this.context.clientName,
212
+ clientVersion: this.context.clientVersion,
213
+ hl: "en",
214
+ gl: "US",
215
+ };
216
+ if (visitorData) {
217
+ clientContext.visitorData = visitorData;
218
+ }
197
219
  const body: Record<string, unknown> = {
198
220
  videoId,
199
221
  context: {
200
- client: {
201
- clientName: this.context.clientName,
202
- clientVersion: this.context.clientVersion,
203
- hl: "en",
204
- gl: "US",
205
- },
222
+ client: clientContext,
206
223
  },
207
224
  playbackContext: {
208
225
  contentPlaybackContext: {
209
- signatureTimestamp: 20073,
226
+ signatureTimestamp: this.signatureTimestamp,
210
227
  },
211
228
  },
212
229
  contentCheckOk: true,
213
230
  racyCheckOk: true,
214
231
  };
215
232
 
233
+ if (this.clientName === "IOS") {
234
+ clientContext.deviceMake = "Apple";
235
+ clientContext.deviceModel = "iPhone16,2";
236
+ clientContext.osName = "iPhone";
237
+ clientContext.osVersion = "17.5.1.21F90";
238
+ }
239
+
216
240
  if (this.clientName === "TVHTML5_EMBED" && embedUrl) {
217
241
  (body.context as Record<string, unknown>).thirdParty = {
218
242
  embedUrl,
@@ -310,8 +334,12 @@ export class InnerTubeClient {
310
334
  return { subtitles, automatic_captions };
311
335
  }
312
336
 
313
- static withClient(clientName: "WEB" | "ANDROID" | "TVHTML5_EMBED"): InnerTubeClient {
314
- return new InnerTubeClient(clientName);
337
+ setSignatureTimestamp(sts: number): void {
338
+ this.signatureTimestamp = sts;
339
+ }
340
+
341
+ static withClient(clientName: "WEB" | "ANDROID" | "TVHTML5_EMBED" | "IOS", signatureTimestamp?: number): InnerTubeClient {
342
+ return new InnerTubeClient(clientName, signatureTimestamp);
315
343
  }
316
344
  }
317
345