@zetagoaurum-dev/straw 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,14 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [1.2.0] "Deep Metadata & Formats Engine" - 2026-02-27
6
+ - **Feat:** Integrated extracting `subscribers`, `likes`, and `comments` directly from YouTube's `ytInitialData` payload without external parsing overhead.
7
+ - **Feat:** Segregated `formats` array into three exact categorical bins: `video` (combined), `videoOnly`, and `audio` (audio-only), ensuring zero-ambiguity when downloading specific streams.
8
+
9
+ ## [1.1.1] "Performance Patch" - 2026-02-27
10
+ - **Perf:** Re-engineered the YouTube scraper in Node.js and Python to use the `IOS` InnerTube API directly, injecting localized `visitorData` tokens to seamlessly bypass bot checks and cipher encryption. Video format lists are returned instantaneously for optimal downloading infrastructure.
11
+ - **Fix:** Fixed HTML parser blocking on high-volume deployed servers by upgrading to the direct `POST /youtubei/v1/player` endpoints.
12
+
5
13
  ## [1.1.0] - "Milk Tea" Release - 2026-02-27
6
14
 
7
15
  ### Changed
package/dist/index.d.mts CHANGED
@@ -51,11 +51,18 @@ interface YouTubeFormats {
51
51
  interface YouTubeResult {
52
52
  title: string;
53
53
  author: string;
54
+ subscribers: string;
54
55
  description: string;
55
56
  views: string;
57
+ likes: string;
58
+ comments: string;
56
59
  durationSeconds: string;
57
60
  thumbnail: string;
58
- formats: YouTubeFormats[];
61
+ formats: {
62
+ video: YouTubeFormats[];
63
+ videoOnly: YouTubeFormats[];
64
+ audio: YouTubeFormats[];
65
+ };
59
66
  }
60
67
  declare class YouTubeScraper {
61
68
  private client;
package/dist/index.d.ts CHANGED
@@ -51,11 +51,18 @@ interface YouTubeFormats {
51
51
  interface YouTubeResult {
52
52
  title: string;
53
53
  author: string;
54
+ subscribers: string;
54
55
  description: string;
55
56
  views: string;
57
+ likes: string;
58
+ comments: string;
56
59
  durationSeconds: string;
57
60
  thumbnail: string;
58
- formats: YouTubeFormats[];
61
+ formats: {
62
+ video: YouTubeFormats[];
63
+ videoOnly: YouTubeFormats[];
64
+ audio: YouTubeFormats[];
65
+ };
59
66
  }
60
67
  declare class YouTubeScraper {
61
68
  private client;
package/dist/index.js CHANGED
@@ -185,28 +185,94 @@ var YouTubeScraper = class {
185
185
  * Parses the ytInitialPlayerResponse object embedded in the watch HTML.
186
186
  */
187
187
  async scrapeVideo(url) {
188
+ const videoIdMatch = url.match(/(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))([^"&?\/\s]{11})/);
189
+ if (!videoIdMatch || !videoIdMatch[1]) {
190
+ throw new Error("Invalid YouTube URL");
191
+ }
192
+ const videoId = videoIdMatch[1];
188
193
  const html = await this.client.getText(url, {
189
- headers: {
190
- "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430"
191
- }
194
+ headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
192
195
  });
193
196
  const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
194
197
  const match = html.match(regex);
195
- if (!match || !match[1]) {
196
- throw new Error("ytInitialPlayerResponse not found. YouTube might have changed their layout or the IP is blocked.");
198
+ let visitorData = "";
199
+ let details = {};
200
+ let initialData = {};
201
+ const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
202
+ if (dataMatch && dataMatch[1]) {
203
+ try {
204
+ initialData = JSON.parse(dataMatch[1]);
205
+ } catch (e) {
206
+ }
197
207
  }
198
- const data = JSON.parse(match[1]);
199
- const details = data?.videoDetails;
200
- const streamingData = data?.streamingData;
208
+ if (match && match[1]) {
209
+ const data = JSON.parse(match[1]);
210
+ details = data?.videoDetails || {};
211
+ visitorData = data?.responseContext?.visitorData || "";
212
+ }
213
+ if (!visitorData) {
214
+ const vdMatch = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
215
+ if (vdMatch) visitorData = vdMatch[1];
216
+ }
217
+ const payload = {
218
+ context: {
219
+ client: {
220
+ hl: "en",
221
+ gl: "US",
222
+ clientName: "IOS",
223
+ clientVersion: "19.28.1",
224
+ osName: "iOS",
225
+ osVersion: "17.5.1",
226
+ deviceMake: "Apple",
227
+ deviceModel: "iPhone16,2",
228
+ visitorData
229
+ }
230
+ },
231
+ videoId
232
+ };
233
+ const res = await this.client.request("https://www.youtube.com/youtubei/v1/player", {
234
+ method: "POST",
235
+ headers: {
236
+ "Accept": "application/json",
237
+ "Content-Type": "application/json",
238
+ "User-Agent": "com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)"
239
+ },
240
+ body: JSON.stringify(payload)
241
+ });
242
+ const apiData = await res.json();
243
+ console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
244
+ if (!details.title) {
245
+ details = apiData?.videoDetails || {};
246
+ }
247
+ const streamingData = apiData?.streamingData;
201
248
  if (!details) {
202
249
  throw new Error("Video details not found inside player response.");
203
250
  }
204
- const formats = [];
251
+ let subscribers = "";
252
+ let likes = "";
253
+ let comments = "";
254
+ try {
255
+ const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
256
+ if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
257
+ subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
258
+ }
259
+ const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
260
+ const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
261
+ if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
262
+ const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
263
+ if (commentsPanel) {
264
+ comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
265
+ }
266
+ } catch (e) {
267
+ }
268
+ const video = [];
269
+ const videoOnly = [];
270
+ const audio = [];
205
271
  const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
206
272
  for (const format of rawFormats) {
207
273
  if (format.url) {
208
274
  const mimeType = format.mimeType || "";
209
- formats.push({
275
+ const formatObj = {
210
276
  url: format.url,
211
277
  mimeType,
212
278
  width: format.width,
@@ -215,19 +281,27 @@ var YouTubeScraper = class {
215
281
  bitrate: format.bitrate,
216
282
  hasAudio: mimeType.includes("audio/"),
217
283
  hasVideo: mimeType.includes("video/")
218
- });
219
- } else if (format.signatureCipher) {
220
- continue;
284
+ };
285
+ if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
286
+ else if (formatObj.hasVideo) videoOnly.push(formatObj);
287
+ else if (formatObj.hasAudio) audio.push(formatObj);
221
288
  }
222
289
  }
223
290
  return {
224
291
  title: details.title || "",
225
292
  author: details.author || "",
293
+ subscribers,
226
294
  description: details.shortDescription || "",
227
295
  views: details.viewCount || "0",
296
+ likes,
297
+ comments,
228
298
  durationSeconds: details.lengthSeconds || "0",
229
299
  thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
230
- formats
300
+ formats: {
301
+ video,
302
+ videoOnly,
303
+ audio
304
+ }
231
305
  };
232
306
  }
233
307
  };
package/dist/index.mjs CHANGED
@@ -145,28 +145,94 @@ var YouTubeScraper = class {
145
145
  * Parses the ytInitialPlayerResponse object embedded in the watch HTML.
146
146
  */
147
147
  async scrapeVideo(url) {
148
+ const videoIdMatch = url.match(/(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))([^"&?\/\s]{11})/);
149
+ if (!videoIdMatch || !videoIdMatch[1]) {
150
+ throw new Error("Invalid YouTube URL");
151
+ }
152
+ const videoId = videoIdMatch[1];
148
153
  const html = await this.client.getText(url, {
149
- headers: {
150
- "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430"
151
- }
154
+ headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
152
155
  });
153
156
  const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
154
157
  const match = html.match(regex);
155
- if (!match || !match[1]) {
156
- throw new Error("ytInitialPlayerResponse not found. YouTube might have changed their layout or the IP is blocked.");
158
+ let visitorData = "";
159
+ let details = {};
160
+ let initialData = {};
161
+ const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
162
+ if (dataMatch && dataMatch[1]) {
163
+ try {
164
+ initialData = JSON.parse(dataMatch[1]);
165
+ } catch (e) {
166
+ }
157
167
  }
158
- const data = JSON.parse(match[1]);
159
- const details = data?.videoDetails;
160
- const streamingData = data?.streamingData;
168
+ if (match && match[1]) {
169
+ const data = JSON.parse(match[1]);
170
+ details = data?.videoDetails || {};
171
+ visitorData = data?.responseContext?.visitorData || "";
172
+ }
173
+ if (!visitorData) {
174
+ const vdMatch = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
175
+ if (vdMatch) visitorData = vdMatch[1];
176
+ }
177
+ const payload = {
178
+ context: {
179
+ client: {
180
+ hl: "en",
181
+ gl: "US",
182
+ clientName: "IOS",
183
+ clientVersion: "19.28.1",
184
+ osName: "iOS",
185
+ osVersion: "17.5.1",
186
+ deviceMake: "Apple",
187
+ deviceModel: "iPhone16,2",
188
+ visitorData
189
+ }
190
+ },
191
+ videoId
192
+ };
193
+ const res = await this.client.request("https://www.youtube.com/youtubei/v1/player", {
194
+ method: "POST",
195
+ headers: {
196
+ "Accept": "application/json",
197
+ "Content-Type": "application/json",
198
+ "User-Agent": "com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)"
199
+ },
200
+ body: JSON.stringify(payload)
201
+ });
202
+ const apiData = await res.json();
203
+ console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
204
+ if (!details.title) {
205
+ details = apiData?.videoDetails || {};
206
+ }
207
+ const streamingData = apiData?.streamingData;
161
208
  if (!details) {
162
209
  throw new Error("Video details not found inside player response.");
163
210
  }
164
- const formats = [];
211
+ let subscribers = "";
212
+ let likes = "";
213
+ let comments = "";
214
+ try {
215
+ const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
216
+ if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
217
+ subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
218
+ }
219
+ const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
220
+ const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
221
+ if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
222
+ const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
223
+ if (commentsPanel) {
224
+ comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
225
+ }
226
+ } catch (e) {
227
+ }
228
+ const video = [];
229
+ const videoOnly = [];
230
+ const audio = [];
165
231
  const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
166
232
  for (const format of rawFormats) {
167
233
  if (format.url) {
168
234
  const mimeType = format.mimeType || "";
169
- formats.push({
235
+ const formatObj = {
170
236
  url: format.url,
171
237
  mimeType,
172
238
  width: format.width,
@@ -175,19 +241,27 @@ var YouTubeScraper = class {
175
241
  bitrate: format.bitrate,
176
242
  hasAudio: mimeType.includes("audio/"),
177
243
  hasVideo: mimeType.includes("video/")
178
- });
179
- } else if (format.signatureCipher) {
180
- continue;
244
+ };
245
+ if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
246
+ else if (formatObj.hasVideo) videoOnly.push(formatObj);
247
+ else if (formatObj.hasAudio) audio.push(formatObj);
181
248
  }
182
249
  }
183
250
  return {
184
251
  title: details.title || "",
185
252
  author: details.author || "",
253
+ subscribers,
186
254
  description: details.shortDescription || "",
187
255
  views: details.viewCount || "0",
256
+ likes,
257
+ comments,
188
258
  durationSeconds: details.lengthSeconds || "0",
189
259
  thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
190
- formats
260
+ formats: {
261
+ video,
262
+ videoOnly,
263
+ audio
264
+ }
191
265
  };
192
266
  }
193
267
  };
@@ -0,0 +1,46 @@
1
+ const fs = require('fs');
2
+ const { fetch } = require('undici');
3
+ const straw = require('./dist/index.js');
4
+
5
+ async function download() {
6
+ const yt = new straw.YouTubeScraper();
7
+ console.log('Scraping metadata and direct links...');
8
+ const res = await yt.scrapeVideo('https://youtu.be/_4j1Abt_AiM?si=_dA2lroz096f1cYp');
9
+
10
+ // Find a combined video+audio format, or fallback to the highest quality video format
11
+ const combined = res.formats.find(f => f.hasVideo && f.hasAudio);
12
+ const bestVideo = res.formats.filter(f => f.hasVideo).sort((a, b) => (b.width || 0) - (a.width || 0))[0];
13
+
14
+ const target = combined || bestVideo;
15
+
16
+ if (!target) {
17
+ console.log('No suitable downloadable format found.');
18
+ return;
19
+ }
20
+
21
+ console.log(`Downloading: ${res.title}`);
22
+ console.log(`Format: ${target.mimeType} (${target.width || 'unknown'}x${target.height || 'unknown'})`);
23
+
24
+ // To avoid buffering the whole video in memory, we stream it to the file
25
+ const outPath = 'downloaded_video.mp4';
26
+ const outStream = fs.createWriteStream(outPath);
27
+
28
+ console.log('Initiating download stream...');
29
+ const response = await fetch(target.url);
30
+ if (!response.body) throw new Error('No response body');
31
+
32
+ const reader = response.body.getReader();
33
+ let downloaded = 0;
34
+
35
+ while (true) {
36
+ const { done, value } = await reader.read();
37
+ if (done) break;
38
+ outStream.write(value);
39
+ downloaded += value.length;
40
+ process.stdout.write(`\rDownloaded: ${(downloaded / 1024 / 1024).toFixed(2)} MB`);
41
+ }
42
+ outStream.end();
43
+ console.log(`\nDownload complete! Saved to ${outPath}`);
44
+ }
45
+
46
+ download().catch(console.error);
File without changes
package/find_keys.js ADDED
@@ -0,0 +1,25 @@
1
+ const fs = require('fs');
2
+
3
+ const data = JSON.parse(fs.readFileSync('next_api_dump.json', 'utf-8'));
4
+
5
+ function findKey(obj, key, path = '') {
6
+ if (obj === null || typeof obj !== 'object') return;
7
+ if (Array.isArray(obj)) {
8
+ for (let i = 0; i < obj.length; i++) {
9
+ findKey(obj[i], key, `${path}[${i}]`);
10
+ }
11
+ } else {
12
+ for (const k in obj) {
13
+ if (k === key) {
14
+ console.log(`Found ${key} at ${path}.${k} =`, JSON.stringify(obj[k]).substring(0, 100));
15
+ }
16
+ findKey(obj[k], key, `${path}.${k}`);
17
+ }
18
+ }
19
+ }
20
+
21
+ findKey(data, 'subscriberCountText');
22
+ findKey(data, 'likeCount');
23
+ findKey(data, 'likeCountWithLikeText');
24
+ findKey(data, 'description');
25
+ findKey(data, 'commentCount');