npm - @zetagoaurum-dev/straw - Versions diffs - 1.1.1 → 1.2.0 - Mend

@zetagoaurum-dev/straw 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +4 -0
package/dist/index.d.mts +8 -1
package/dist/index.d.ts +8 -1
package/dist/index.js +43 -7
package/dist/index.mjs +43 -7
package/download test/Blue Archive - Maki (Camp) Live2D_HD.webm +0 -0
package/download_test.js +46 -0
package/downloaded_video.mp4 +0 -0
package/find_keys.js +25 -0
package/next_api_dump.json +34987 -0
package/package.json +1 -1
package/player_api_dump.json +2131 -0
package/release.bat +1 -1
package/src/scrapers/youtube.ts +54 -11
package/straw/youtube.py +72 -10
package/test_aqz.js +54 -0
package/test_extract.js +41 -0
package/test_metadata.js +33 -0
package/{test_visitor.js → test_next.js} +10 -13
package/tests/test.py +7 -3
package/tests/test.ts +6 -3
package/ytInitialData_dump.json +17156 -0
package/test_api.js +0 -42
package/test_api_clients.js +0 -39
package/test_client.js +0 -37
package/test_embed.js +0 -26
package/test_html.js +0 -26
package/test_vr.js +0 -27
package/test_yt.js +0 -17

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,10 @@
 All notable changes to this project will be documented in this file.
+## [1.2.0] "Deep Metadata & Formats Engine" - 2026-02-27
+- **Feat:** Integrated extracting `subscribers`, `likes`, and `comments` directly from YouTube's `ytInitialData` payload without external parsing overhead.
+- **Feat:** Segregated `formats` array into three exact categorical bins: `video` (combined), `videoOnly`, and `audio` (audio-only), ensuring zero-ambiguity when downloading specific streams.
 ## [1.1.1] "Performance Patch" - 2026-02-27
 - **Perf:** Re-engineered the YouTube scraper in Node.js and Python to use the `IOS` InnerTube API directly, injecting localized `visitorData` tokens to seamlessly bypass bot checks and cipher encryption. Video format lists are returned instantaneously for optimal downloading infrastructure.
 - **Fix:** Fixed HTML parser blocking on high-volume deployed servers by upgrading to the direct `POST /youtubei/v1/player` endpoints.

package/dist/index.d.mts CHANGED Viewed

@@ -51,11 +51,18 @@ interface YouTubeFormats {
 interface YouTubeResult {
     title: string;
     author: string;
+    subscribers: string;
     description: string;
     views: string;
+    likes: string;
+    comments: string;
     durationSeconds: string;
     thumbnail: string;
-    formats: YouTubeFormats[];
+    formats: {
+        video: YouTubeFormats[];
+        videoOnly: YouTubeFormats[];
+        audio: YouTubeFormats[];
+    };
 }
 declare class YouTubeScraper {
     private client;

package/dist/index.d.ts CHANGED Viewed

@@ -51,11 +51,18 @@ interface YouTubeFormats {
 interface YouTubeResult {
     title: string;
     author: string;
+    subscribers: string;
     description: string;
     views: string;
+    likes: string;
+    comments: string;
     durationSeconds: string;
     thumbnail: string;
-    formats: YouTubeFormats[];
+    formats: {
+        video: YouTubeFormats[];
+        videoOnly: YouTubeFormats[];
+        audio: YouTubeFormats[];
+    };
 }
 declare class YouTubeScraper {
     private client;

package/dist/index.js CHANGED Viewed

@@ -191,12 +191,20 @@ var YouTubeScraper = class {
     }
     const videoId = videoIdMatch[1];
     const html = await this.client.getText(url, {
-      headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
+      headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
     });
     const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
     const match = html.match(regex);
     let visitorData = "";
     let details = {};
+    let initialData = {};
+    const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
+    if (dataMatch && dataMatch[1]) {
+      try {
+        initialData = JSON.parse(dataMatch[1]);
+      } catch (e) {
+      }
+    }
     if (match && match[1]) {
       const data = JSON.parse(match[1]);
       details = data?.videoDetails || {};
@@ -232,6 +240,7 @@ var YouTubeScraper = class {
       body: JSON.stringify(payload)
     });
     const apiData = await res.json();
+    console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
     if (!details.title) {
       details = apiData?.videoDetails || {};
     }
@@ -239,12 +248,31 @@ var YouTubeScraper = class {
     if (!details) {
       throw new Error("Video details not found inside player response.");
     }
-    const formats = [];
+    let subscribers = "";
+    let likes = "";
+    let comments = "";
+    try {
+      const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
+      if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
+        subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
+      }
+      const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
+      const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
+      if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
+      const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
+      if (commentsPanel) {
+        comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
+      }
+    } catch (e) {
+    }
+    const video = [];
+    const videoOnly = [];
+    const audio = [];
     const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
     for (const format of rawFormats) {
       if (format.url) {
         const mimeType = format.mimeType || "";
-        formats.push({
+        const formatObj = {
           url: format.url,
           mimeType,
           width: format.width,
@@ -253,19 +281,27 @@ var YouTubeScraper = class {
           bitrate: format.bitrate,
           hasAudio: mimeType.includes("audio/"),
           hasVideo: mimeType.includes("video/")
-        });
-      } else if (format.signatureCipher) {
-        continue;
+        };
+        if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
+        else if (formatObj.hasVideo) videoOnly.push(formatObj);
+        else if (formatObj.hasAudio) audio.push(formatObj);
       }
     }
     return {
       title: details.title || "",
       author: details.author || "",
+      subscribers,
       description: details.shortDescription || "",
       views: details.viewCount || "0",
+      likes,
+      comments,
       durationSeconds: details.lengthSeconds || "0",
       thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
-      formats
+      formats: {
+        video,
+        videoOnly,
+        audio
+      }
     };
   }
 };

package/dist/index.mjs CHANGED Viewed

@@ -151,12 +151,20 @@ var YouTubeScraper = class {
     }
     const videoId = videoIdMatch[1];
     const html = await this.client.getText(url, {
-      headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
+      headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
     });
     const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
     const match = html.match(regex);
     let visitorData = "";
     let details = {};
+    let initialData = {};
+    const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
+    if (dataMatch && dataMatch[1]) {
+      try {
+        initialData = JSON.parse(dataMatch[1]);
+      } catch (e) {
+      }
+    }
     if (match && match[1]) {
       const data = JSON.parse(match[1]);
       details = data?.videoDetails || {};
@@ -192,6 +200,7 @@ var YouTubeScraper = class {
       body: JSON.stringify(payload)
     });
     const apiData = await res.json();
+    console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
     if (!details.title) {
       details = apiData?.videoDetails || {};
     }
@@ -199,12 +208,31 @@ var YouTubeScraper = class {
     if (!details) {
       throw new Error("Video details not found inside player response.");
     }
-    const formats = [];
+    let subscribers = "";
+    let likes = "";
+    let comments = "";
+    try {
+      const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
+      if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
+        subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
+      }
+      const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
+      const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
+      if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
+      const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
+      if (commentsPanel) {
+        comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
+      }
+    } catch (e) {
+    }
+    const video = [];
+    const videoOnly = [];
+    const audio = [];
     const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
     for (const format of rawFormats) {
       if (format.url) {
         const mimeType = format.mimeType || "";
-        formats.push({
+        const formatObj = {
           url: format.url,
           mimeType,
           width: format.width,
@@ -213,19 +241,27 @@ var YouTubeScraper = class {
           bitrate: format.bitrate,
           hasAudio: mimeType.includes("audio/"),
           hasVideo: mimeType.includes("video/")
-        });
-      } else if (format.signatureCipher) {
-        continue;
+        };
+        if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
+        else if (formatObj.hasVideo) videoOnly.push(formatObj);
+        else if (formatObj.hasAudio) audio.push(formatObj);
       }
     }
     return {
       title: details.title || "",
       author: details.author || "",
+      subscribers,
       description: details.shortDescription || "",
       views: details.viewCount || "0",
+      likes,
+      comments,
       durationSeconds: details.lengthSeconds || "0",
       thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
-      formats
+      formats: {
+        video,
+        videoOnly,
+        audio
+      }
     };
   }
 };

package/download test/Blue Archive - Maki (Camp) Live2D_HD.webm ADDED Viewed

File without changes

package/download_test.js ADDED Viewed

@@ -0,0 +1,46 @@
+const fs = require('fs');
+const { fetch } = require('undici');
+const straw = require('./dist/index.js');
+async function download() {
+  const yt = new straw.YouTubeScraper();
+  console.log('Scraping metadata and direct links...');
+  const res = await yt.scrapeVideo('https://youtu.be/_4j1Abt_AiM?si=_dA2lroz096f1cYp');
+  // Find a combined video+audio format, or fallback to the highest quality video format
+  const combined = res.formats.find(f => f.hasVideo && f.hasAudio);
+  const bestVideo = res.formats.filter(f => f.hasVideo).sort((a, b) => (b.width || 0) - (a.width || 0))[0];
+  const target = combined || bestVideo;
+  if (!target) {
+    console.log('No suitable downloadable format found.');
+    return;
+  }
+  console.log(`Downloading: ${res.title}`);
+  console.log(`Format: ${target.mimeType} (${target.width || 'unknown'}x${target.height || 'unknown'})`);
+  // To avoid buffering the whole video in memory, we stream it to the file
+  const outPath = 'downloaded_video.mp4';
+  const outStream = fs.createWriteStream(outPath);
+  console.log('Initiating download stream...');
+  const response = await fetch(target.url);
+  if (!response.body) throw new Error('No response body');
+  const reader = response.body.getReader();
+  let downloaded = 0;
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    outStream.write(value);
+    downloaded += value.length;
+    process.stdout.write(`\rDownloaded: ${(downloaded / 1024 / 1024).toFixed(2)} MB`);
+  }
+  outStream.end();
+  console.log(`\nDownload complete! Saved to ${outPath}`);
+}
+download().catch(console.error);

package/downloaded_video.mp4 ADDED Viewed

File without changes

package/find_keys.js ADDED Viewed

@@ -0,0 +1,25 @@
+const fs = require('fs');
+const data = JSON.parse(fs.readFileSync('next_api_dump.json', 'utf-8'));
+function findKey(obj, key, path = '') {
+  if (obj === null || typeof obj !== 'object') return;
+  if (Array.isArray(obj)) {
+    for (let i = 0; i < obj.length; i++) {
+        findKey(obj[i], key, `${path}[${i}]`);
+    }
+  } else {
+    for (const k in obj) {
+      if (k === key) {
+        console.log(`Found ${key} at ${path}.${k} =`, JSON.stringify(obj[k]).substring(0, 100));
+      }
+      findKey(obj[k], key, `${path}.${k}`);
+    }
+  }
+}
+findKey(data, 'subscriberCountText');
+findKey(data, 'likeCount');
+findKey(data, 'likeCountWithLikeText');
+findKey(data, 'description');
+findKey(data, 'commentCount');