@zetagoaurum-dev/straw 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,10 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [1.2.0] "Deep Metadata & Formats Engine" - 2026-02-27
6
+ - **Feat:** Integrated extracting `subscribers`, `likes`, and `comments` directly from YouTube's `ytInitialData` payload without external parsing overhead.
7
+ - **Feat:** Segregated `formats` array into three exact categorical bins: `video` (combined), `videoOnly`, and `audio` (audio-only), ensuring zero-ambiguity when downloading specific streams.
8
+
5
9
  ## [1.1.1] "Performance Patch" - 2026-02-27
6
10
  - **Perf:** Re-engineered the YouTube scraper in Node.js and Python to use the `IOS` InnerTube API directly, injecting localized `visitorData` tokens to seamlessly bypass bot checks and cipher encryption. Video format lists are returned instantaneously for optimal downloading infrastructure.
7
11
  - **Fix:** Fixed HTML parser blocking on high-volume deployed servers by upgrading to the direct `POST /youtubei/v1/player` endpoints.
package/dist/index.d.mts CHANGED
@@ -51,11 +51,18 @@ interface YouTubeFormats {
51
51
  interface YouTubeResult {
52
52
  title: string;
53
53
  author: string;
54
+ subscribers: string;
54
55
  description: string;
55
56
  views: string;
57
+ likes: string;
58
+ comments: string;
56
59
  durationSeconds: string;
57
60
  thumbnail: string;
58
- formats: YouTubeFormats[];
61
+ formats: {
62
+ video: YouTubeFormats[];
63
+ videoOnly: YouTubeFormats[];
64
+ audio: YouTubeFormats[];
65
+ };
59
66
  }
60
67
  declare class YouTubeScraper {
61
68
  private client;
package/dist/index.d.ts CHANGED
@@ -51,11 +51,18 @@ interface YouTubeFormats {
51
51
  interface YouTubeResult {
52
52
  title: string;
53
53
  author: string;
54
+ subscribers: string;
54
55
  description: string;
55
56
  views: string;
57
+ likes: string;
58
+ comments: string;
56
59
  durationSeconds: string;
57
60
  thumbnail: string;
58
- formats: YouTubeFormats[];
61
+ formats: {
62
+ video: YouTubeFormats[];
63
+ videoOnly: YouTubeFormats[];
64
+ audio: YouTubeFormats[];
65
+ };
59
66
  }
60
67
  declare class YouTubeScraper {
61
68
  private client;
package/dist/index.js CHANGED
@@ -191,12 +191,20 @@ var YouTubeScraper = class {
191
191
  }
192
192
  const videoId = videoIdMatch[1];
193
193
  const html = await this.client.getText(url, {
194
- headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
194
+ headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
195
195
  });
196
196
  const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
197
197
  const match = html.match(regex);
198
198
  let visitorData = "";
199
199
  let details = {};
200
+ let initialData = {};
201
+ const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
202
+ if (dataMatch && dataMatch[1]) {
203
+ try {
204
+ initialData = JSON.parse(dataMatch[1]);
205
+ } catch (e) {
206
+ }
207
+ }
200
208
  if (match && match[1]) {
201
209
  const data = JSON.parse(match[1]);
202
210
  details = data?.videoDetails || {};
@@ -232,6 +240,7 @@ var YouTubeScraper = class {
232
240
  body: JSON.stringify(payload)
233
241
  });
234
242
  const apiData = await res.json();
243
+ console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
235
244
  if (!details.title) {
236
245
  details = apiData?.videoDetails || {};
237
246
  }
@@ -239,12 +248,31 @@ var YouTubeScraper = class {
239
248
  if (!details) {
240
249
  throw new Error("Video details not found inside player response.");
241
250
  }
242
- const formats = [];
251
+ let subscribers = "";
252
+ let likes = "";
253
+ let comments = "";
254
+ try {
255
+ const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
256
+ if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
257
+ subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
258
+ }
259
+ const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
260
+ const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
261
+ if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
262
+ const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
263
+ if (commentsPanel) {
264
+ comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
265
+ }
266
+ } catch (e) {
267
+ }
268
+ const video = [];
269
+ const videoOnly = [];
270
+ const audio = [];
243
271
  const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
244
272
  for (const format of rawFormats) {
245
273
  if (format.url) {
246
274
  const mimeType = format.mimeType || "";
247
- formats.push({
275
+ const formatObj = {
248
276
  url: format.url,
249
277
  mimeType,
250
278
  width: format.width,
@@ -253,19 +281,27 @@ var YouTubeScraper = class {
253
281
  bitrate: format.bitrate,
254
282
  hasAudio: mimeType.includes("audio/"),
255
283
  hasVideo: mimeType.includes("video/")
256
- });
257
- } else if (format.signatureCipher) {
258
- continue;
284
+ };
285
+ if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
286
+ else if (formatObj.hasVideo) videoOnly.push(formatObj);
287
+ else if (formatObj.hasAudio) audio.push(formatObj);
259
288
  }
260
289
  }
261
290
  return {
262
291
  title: details.title || "",
263
292
  author: details.author || "",
293
+ subscribers,
264
294
  description: details.shortDescription || "",
265
295
  views: details.viewCount || "0",
296
+ likes,
297
+ comments,
266
298
  durationSeconds: details.lengthSeconds || "0",
267
299
  thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
268
- formats
300
+ formats: {
301
+ video,
302
+ videoOnly,
303
+ audio
304
+ }
269
305
  };
270
306
  }
271
307
  };
package/dist/index.mjs CHANGED
@@ -151,12 +151,20 @@ var YouTubeScraper = class {
151
151
  }
152
152
  const videoId = videoIdMatch[1];
153
153
  const html = await this.client.getText(url, {
154
- headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
154
+ headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
155
155
  });
156
156
  const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
157
157
  const match = html.match(regex);
158
158
  let visitorData = "";
159
159
  let details = {};
160
+ let initialData = {};
161
+ const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
162
+ if (dataMatch && dataMatch[1]) {
163
+ try {
164
+ initialData = JSON.parse(dataMatch[1]);
165
+ } catch (e) {
166
+ }
167
+ }
160
168
  if (match && match[1]) {
161
169
  const data = JSON.parse(match[1]);
162
170
  details = data?.videoDetails || {};
@@ -192,6 +200,7 @@ var YouTubeScraper = class {
192
200
  body: JSON.stringify(payload)
193
201
  });
194
202
  const apiData = await res.json();
203
+ console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
195
204
  if (!details.title) {
196
205
  details = apiData?.videoDetails || {};
197
206
  }
@@ -199,12 +208,31 @@ var YouTubeScraper = class {
199
208
  if (!details) {
200
209
  throw new Error("Video details not found inside player response.");
201
210
  }
202
- const formats = [];
211
+ let subscribers = "";
212
+ let likes = "";
213
+ let comments = "";
214
+ try {
215
+ const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
216
+ if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
217
+ subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
218
+ }
219
+ const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
220
+ const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
221
+ if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
222
+ const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
223
+ if (commentsPanel) {
224
+ comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
225
+ }
226
+ } catch (e) {
227
+ }
228
+ const video = [];
229
+ const videoOnly = [];
230
+ const audio = [];
203
231
  const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
204
232
  for (const format of rawFormats) {
205
233
  if (format.url) {
206
234
  const mimeType = format.mimeType || "";
207
- formats.push({
235
+ const formatObj = {
208
236
  url: format.url,
209
237
  mimeType,
210
238
  width: format.width,
@@ -213,19 +241,27 @@ var YouTubeScraper = class {
213
241
  bitrate: format.bitrate,
214
242
  hasAudio: mimeType.includes("audio/"),
215
243
  hasVideo: mimeType.includes("video/")
216
- });
217
- } else if (format.signatureCipher) {
218
- continue;
244
+ };
245
+ if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
246
+ else if (formatObj.hasVideo) videoOnly.push(formatObj);
247
+ else if (formatObj.hasAudio) audio.push(formatObj);
219
248
  }
220
249
  }
221
250
  return {
222
251
  title: details.title || "",
223
252
  author: details.author || "",
253
+ subscribers,
224
254
  description: details.shortDescription || "",
225
255
  views: details.viewCount || "0",
256
+ likes,
257
+ comments,
226
258
  durationSeconds: details.lengthSeconds || "0",
227
259
  thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
228
- formats
260
+ formats: {
261
+ video,
262
+ videoOnly,
263
+ audio
264
+ }
229
265
  };
230
266
  }
231
267
  };
@@ -0,0 +1,46 @@
1
+ const fs = require('fs');
2
+ const { fetch } = require('undici');
3
+ const straw = require('./dist/index.js');
4
+
5
+ async function download() {
6
+ const yt = new straw.YouTubeScraper();
7
+ console.log('Scraping metadata and direct links...');
8
+ const res = await yt.scrapeVideo('https://youtu.be/_4j1Abt_AiM?si=_dA2lroz096f1cYp');
9
+
10
+ // Find a combined video+audio format, or fallback to the highest quality video format
11
+ const combined = res.formats.find(f => f.hasVideo && f.hasAudio);
12
+ const bestVideo = res.formats.filter(f => f.hasVideo).sort((a, b) => (b.width || 0) - (a.width || 0))[0];
13
+
14
+ const target = combined || bestVideo;
15
+
16
+ if (!target) {
17
+ console.log('No suitable downloadable format found.');
18
+ return;
19
+ }
20
+
21
+ console.log(`Downloading: ${res.title}`);
22
+ console.log(`Format: ${target.mimeType} (${target.width || 'unknown'}x${target.height || 'unknown'})`);
23
+
24
+ // To avoid buffering the whole video in memory, we stream it to the file
25
+ const outPath = 'downloaded_video.mp4';
26
+ const outStream = fs.createWriteStream(outPath);
27
+
28
+ console.log('Initiating download stream...');
29
+ const response = await fetch(target.url);
30
+ if (!response.body) throw new Error('No response body');
31
+
32
+ const reader = response.body.getReader();
33
+ let downloaded = 0;
34
+
35
+ while (true) {
36
+ const { done, value } = await reader.read();
37
+ if (done) break;
38
+ outStream.write(value);
39
+ downloaded += value.length;
40
+ process.stdout.write(`\rDownloaded: ${(downloaded / 1024 / 1024).toFixed(2)} MB`);
41
+ }
42
+ outStream.end();
43
+ console.log(`\nDownload complete! Saved to ${outPath}`);
44
+ }
45
+
46
+ download().catch(console.error);
File without changes
package/find_keys.js ADDED
@@ -0,0 +1,25 @@
1
+ const fs = require('fs');
2
+
3
+ const data = JSON.parse(fs.readFileSync('next_api_dump.json', 'utf-8'));
4
+
5
+ function findKey(obj, key, path = '') {
6
+ if (obj === null || typeof obj !== 'object') return;
7
+ if (Array.isArray(obj)) {
8
+ for (let i = 0; i < obj.length; i++) {
9
+ findKey(obj[i], key, `${path}[${i}]`);
10
+ }
11
+ } else {
12
+ for (const k in obj) {
13
+ if (k === key) {
14
+ console.log(`Found ${key} at ${path}.${k} =`, JSON.stringify(obj[k]).substring(0, 100));
15
+ }
16
+ findKey(obj[k], key, `${path}.${k}`);
17
+ }
18
+ }
19
+ }
20
+
21
+ findKey(data, 'subscriberCountText');
22
+ findKey(data, 'likeCount');
23
+ findKey(data, 'likeCountWithLikeText');
24
+ findKey(data, 'description');
25
+ findKey(data, 'commentCount');