@zetagoaurum-dev/straw 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [1.2.1] "Hotfix" - 2026-02-27
6
+ - **Fix:** Removed accidentally tracked `ytInitialData_dump.json` and local `test_*.js` scripts from the previous NPM deployment bundle to ensure zero-bloat runtime.
7
+
8
+ ## [1.2.0] "Deep Metadata & Formats Engine" - 2026-02-27
9
+ - **Feat:** Integrated extracting `subscribers`, `likes`, and `comments` directly from YouTube's `ytInitialData` payload without external parsing overhead.
10
+ - **Feat:** Segregated `formats` array into three exact categorical bins: `video` (combined), `videoOnly`, and `audio` (audio-only), ensuring zero-ambiguity when downloading specific streams.
11
+
5
12
  ## [1.1.1] "Performance Patch" - 2026-02-27
6
13
  - **Perf:** Re-engineered the YouTube scraper in Node.js and Python to use the `IOS` InnerTube API directly, injecting localized `visitorData` tokens to seamlessly bypass bot checks and cipher encryption. Video format lists are returned instantaneously for optimal downloading infrastructure.
7
14
  - **Fix:** Fixed HTML parser blocking on high-volume deployed servers by upgrading to the direct `POST /youtubei/v1/player` endpoints.
package/dist/index.d.mts CHANGED
@@ -51,11 +51,18 @@ interface YouTubeFormats {
51
51
  interface YouTubeResult {
52
52
  title: string;
53
53
  author: string;
54
+ subscribers: string;
54
55
  description: string;
55
56
  views: string;
57
+ likes: string;
58
+ comments: string;
56
59
  durationSeconds: string;
57
60
  thumbnail: string;
58
- formats: YouTubeFormats[];
61
+ formats: {
62
+ video: YouTubeFormats[];
63
+ videoOnly: YouTubeFormats[];
64
+ audio: YouTubeFormats[];
65
+ };
59
66
  }
60
67
  declare class YouTubeScraper {
61
68
  private client;
package/dist/index.d.ts CHANGED
@@ -51,11 +51,18 @@ interface YouTubeFormats {
51
51
  interface YouTubeResult {
52
52
  title: string;
53
53
  author: string;
54
+ subscribers: string;
54
55
  description: string;
55
56
  views: string;
57
+ likes: string;
58
+ comments: string;
56
59
  durationSeconds: string;
57
60
  thumbnail: string;
58
- formats: YouTubeFormats[];
61
+ formats: {
62
+ video: YouTubeFormats[];
63
+ videoOnly: YouTubeFormats[];
64
+ audio: YouTubeFormats[];
65
+ };
59
66
  }
60
67
  declare class YouTubeScraper {
61
68
  private client;
package/dist/index.js CHANGED
@@ -191,12 +191,20 @@ var YouTubeScraper = class {
191
191
  }
192
192
  const videoId = videoIdMatch[1];
193
193
  const html = await this.client.getText(url, {
194
- headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
194
+ headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
195
195
  });
196
196
  const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
197
197
  const match = html.match(regex);
198
198
  let visitorData = "";
199
199
  let details = {};
200
+ let initialData = {};
201
+ const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
202
+ if (dataMatch && dataMatch[1]) {
203
+ try {
204
+ initialData = JSON.parse(dataMatch[1]);
205
+ } catch (e) {
206
+ }
207
+ }
200
208
  if (match && match[1]) {
201
209
  const data = JSON.parse(match[1]);
202
210
  details = data?.videoDetails || {};
@@ -232,6 +240,7 @@ var YouTubeScraper = class {
232
240
  body: JSON.stringify(payload)
233
241
  });
234
242
  const apiData = await res.json();
243
+ console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
235
244
  if (!details.title) {
236
245
  details = apiData?.videoDetails || {};
237
246
  }
@@ -239,12 +248,31 @@ var YouTubeScraper = class {
239
248
  if (!details) {
240
249
  throw new Error("Video details not found inside player response.");
241
250
  }
242
- const formats = [];
251
+ let subscribers = "";
252
+ let likes = "";
253
+ let comments = "";
254
+ try {
255
+ const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
256
+ if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
257
+ subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
258
+ }
259
+ const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
260
+ const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
261
+ if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
262
+ const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
263
+ if (commentsPanel) {
264
+ comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
265
+ }
266
+ } catch (e) {
267
+ }
268
+ const video = [];
269
+ const videoOnly = [];
270
+ const audio = [];
243
271
  const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
244
272
  for (const format of rawFormats) {
245
273
  if (format.url) {
246
274
  const mimeType = format.mimeType || "";
247
- formats.push({
275
+ const formatObj = {
248
276
  url: format.url,
249
277
  mimeType,
250
278
  width: format.width,
@@ -253,19 +281,27 @@ var YouTubeScraper = class {
253
281
  bitrate: format.bitrate,
254
282
  hasAudio: mimeType.includes("audio/"),
255
283
  hasVideo: mimeType.includes("video/")
256
- });
257
- } else if (format.signatureCipher) {
258
- continue;
284
+ };
285
+ if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
286
+ else if (formatObj.hasVideo) videoOnly.push(formatObj);
287
+ else if (formatObj.hasAudio) audio.push(formatObj);
259
288
  }
260
289
  }
261
290
  return {
262
291
  title: details.title || "",
263
292
  author: details.author || "",
293
+ subscribers,
264
294
  description: details.shortDescription || "",
265
295
  views: details.viewCount || "0",
296
+ likes,
297
+ comments,
266
298
  durationSeconds: details.lengthSeconds || "0",
267
299
  thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
268
- formats
300
+ formats: {
301
+ video,
302
+ videoOnly,
303
+ audio
304
+ }
269
305
  };
270
306
  }
271
307
  };
package/dist/index.mjs CHANGED
@@ -151,12 +151,20 @@ var YouTubeScraper = class {
151
151
  }
152
152
  const videoId = videoIdMatch[1];
153
153
  const html = await this.client.getText(url, {
154
- headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
154
+ headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
155
155
  });
156
156
  const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
157
157
  const match = html.match(regex);
158
158
  let visitorData = "";
159
159
  let details = {};
160
+ let initialData = {};
161
+ const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
162
+ if (dataMatch && dataMatch[1]) {
163
+ try {
164
+ initialData = JSON.parse(dataMatch[1]);
165
+ } catch (e) {
166
+ }
167
+ }
160
168
  if (match && match[1]) {
161
169
  const data = JSON.parse(match[1]);
162
170
  details = data?.videoDetails || {};
@@ -192,6 +200,7 @@ var YouTubeScraper = class {
192
200
  body: JSON.stringify(payload)
193
201
  });
194
202
  const apiData = await res.json();
203
+ console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
195
204
  if (!details.title) {
196
205
  details = apiData?.videoDetails || {};
197
206
  }
@@ -199,12 +208,31 @@ var YouTubeScraper = class {
199
208
  if (!details) {
200
209
  throw new Error("Video details not found inside player response.");
201
210
  }
202
- const formats = [];
211
+ let subscribers = "";
212
+ let likes = "";
213
+ let comments = "";
214
+ try {
215
+ const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
216
+ if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
217
+ subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
218
+ }
219
+ const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
220
+ const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
221
+ if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
222
+ const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
223
+ if (commentsPanel) {
224
+ comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
225
+ }
226
+ } catch (e) {
227
+ }
228
+ const video = [];
229
+ const videoOnly = [];
230
+ const audio = [];
203
231
  const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
204
232
  for (const format of rawFormats) {
205
233
  if (format.url) {
206
234
  const mimeType = format.mimeType || "";
207
- formats.push({
235
+ const formatObj = {
208
236
  url: format.url,
209
237
  mimeType,
210
238
  width: format.width,
@@ -213,19 +241,27 @@ var YouTubeScraper = class {
213
241
  bitrate: format.bitrate,
214
242
  hasAudio: mimeType.includes("audio/"),
215
243
  hasVideo: mimeType.includes("video/")
216
- });
217
- } else if (format.signatureCipher) {
218
- continue;
244
+ };
245
+ if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
246
+ else if (formatObj.hasVideo) videoOnly.push(formatObj);
247
+ else if (formatObj.hasAudio) audio.push(formatObj);
219
248
  }
220
249
  }
221
250
  return {
222
251
  title: details.title || "",
223
252
  author: details.author || "",
253
+ subscribers,
224
254
  description: details.shortDescription || "",
225
255
  views: details.viewCount || "0",
256
+ likes,
257
+ comments,
226
258
  durationSeconds: details.lengthSeconds || "0",
227
259
  thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
228
- formats
260
+ formats: {
261
+ video,
262
+ videoOnly,
263
+ audio
264
+ }
229
265
  };
230
266
  }
231
267
  };
File without changes
package/package.json CHANGED
@@ -1,44 +1,44 @@
1
- {
2
- "name": "@zetagoaurum-dev/straw",
3
- "version": "1.1.1",
4
- "description": "Enterprise-grade unified JS/TS and Python scraping library for Web, YouTube, and Media (Images, Audio, Video, Documents)",
5
- "main": "dist/index.js",
6
- "module": "dist/index.mjs",
7
- "types": "dist/index.d.ts",
8
- "exports": {
9
- ".": {
10
- "require": "./dist/index.js",
11
- "import": "./dist/index.mjs",
12
- "types": "./dist/index.d.ts"
13
- }
14
- },
15
- "scripts": {
16
- "build": "tsup src/index.ts --format cjs,esm --dts --clean",
17
- "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
18
- "test": "tsx tests/test.ts"
19
- },
20
- "keywords": [
21
- "scraping",
22
- "scraper",
23
- "youtube-scraper",
24
- "media-extractor",
25
- "anti-cors"
26
- ],
27
- "author": "ZetaGo-Aurum",
28
- "license": "MIT",
29
- "repository": {
30
- "type": "git",
31
- "url": "https://github.com/ZetaGo-Aurum/straw.git"
32
- },
33
- "devDependencies": {
34
- "@types/node": "^25.3.2",
35
- "ts-node": "^10.9.2",
36
- "tsup": "^8.5.1",
37
- "tsx": "^4.21.0",
38
- "typescript": "^5.9.3"
39
- },
40
- "dependencies": {
41
- "cheerio": "^1.2.0",
42
- "undici": "^7.22.0"
43
- }
44
- }
1
+ {
2
+ "name": "@zetagoaurum-dev/straw",
3
+ "version": "1.2.1",
4
+ "description": "Enterprise-grade unified JS/TS and Python scraping library for Web, YouTube, and Media (Images, Audio, Video, Documents)",
5
+ "main": "dist/index.js",
6
+ "module": "dist/index.mjs",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "require": "./dist/index.js",
11
+ "import": "./dist/index.mjs",
12
+ "types": "./dist/index.d.ts"
13
+ }
14
+ },
15
+ "scripts": {
16
+ "build": "tsup src/index.ts --format cjs,esm --dts --clean",
17
+ "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
18
+ "test": "tsx tests/test.ts"
19
+ },
20
+ "keywords": [
21
+ "scraping",
22
+ "scraper",
23
+ "youtube-scraper",
24
+ "media-extractor",
25
+ "anti-cors"
26
+ ],
27
+ "author": "ZetaGo-Aurum",
28
+ "license": "MIT",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "https://github.com/ZetaGo-Aurum/straw.git"
32
+ },
33
+ "devDependencies": {
34
+ "@types/node": "^25.3.2",
35
+ "ts-node": "^10.9.2",
36
+ "tsup": "^8.5.1",
37
+ "tsx": "^4.21.0",
38
+ "typescript": "^5.9.3"
39
+ },
40
+ "dependencies": {
41
+ "cheerio": "^1.2.0",
42
+ "undici": "^7.22.0"
43
+ }
44
+ }
package/release.bat CHANGED
@@ -1,4 +1,4 @@
1
1
  git add .
2
- git commit -m "v1.1.1 Performance Patch (InnerTube API Bypass)"
2
+ git commit -m "v1.2.0 Deep Metadata & Formats Engine"
3
3
  git push origin master -f
4
4
  npm publish
@@ -14,11 +14,18 @@ export interface YouTubeFormats {
14
14
  export interface YouTubeResult {
15
15
  title: string;
16
16
  author: string;
17
+ subscribers: string;
17
18
  description: string;
18
19
  views: string;
20
+ likes: string;
21
+ comments: string;
19
22
  durationSeconds: string;
20
23
  thumbnail: string;
21
- formats: YouTubeFormats[];
24
+ formats: {
25
+ video: YouTubeFormats[];
26
+ videoOnly: YouTubeFormats[];
27
+ audio: YouTubeFormats[];
28
+ };
22
29
  }
23
30
 
24
31
  export class YouTubeScraper {
@@ -40,7 +47,7 @@ export class YouTubeScraper {
40
47
  const videoId = videoIdMatch[1];
41
48
 
42
49
  const html = await this.client.getText(url, {
43
- headers: { 'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430' }
50
+ headers: { 'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430', 'Accept-Language': 'en-US,en;q=0.9' }
44
51
  });
45
52
 
46
53
  const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
@@ -48,6 +55,12 @@ export class YouTubeScraper {
48
55
  let visitorData = '';
49
56
  let details: any = {};
50
57
 
58
+ let initialData: any = {};
59
+ const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
60
+ if (dataMatch && dataMatch[1]) {
61
+ try { initialData = JSON.parse(dataMatch[1]); } catch(e) {}
62
+ }
63
+
51
64
  if (match && match[1]) {
52
65
  const data = JSON.parse(match[1]);
53
66
  details = data?.videoDetails || {};
@@ -96,13 +109,37 @@ export class YouTubeScraper {
96
109
  throw new Error('Video details not found inside player response.');
97
110
  }
98
111
 
99
- const formats: YouTubeFormats[] = [];
112
+ let subscribers = '';
113
+ let likes = '';
114
+ let comments = '';
115
+
116
+ try {
117
+ const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c: any) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
118
+ if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
119
+ subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
120
+ }
121
+
122
+ const factoids = initialData?.engagementPanels?.find((p: any) => p.engagementPanelSectionListRenderer?.targetId === 'engagement-panel-structured-description')
123
+ ?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i: any) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
124
+ const likesFactoid = factoids.find((f: any) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes('like'));
125
+ if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
126
+
127
+ const commentsPanel = initialData?.engagementPanels?.find((p: any) => p.engagementPanelSectionListRenderer?.panelIdentifier === 'engagement-panel-comments-section');
128
+ if (commentsPanel) {
129
+ comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || '';
130
+ }
131
+ } catch (e) {}
132
+
133
+ const video: YouTubeFormats[] = [];
134
+ const videoOnly: YouTubeFormats[] = [];
135
+ const audio: YouTubeFormats[] = [];
136
+
100
137
  const rawFormats = [...(streamingData?.formats || []), ...(streamingData?.adaptiveFormats || [])];
101
138
 
102
139
  for (const format of rawFormats) {
103
140
  if (format.url) {
104
141
  const mimeType = format.mimeType || '';
105
- formats.push({
142
+ const formatObj = {
106
143
  url: format.url,
107
144
  mimeType: mimeType,
108
145
  width: format.width,
@@ -111,23 +148,29 @@ export class YouTubeScraper {
111
148
  bitrate: format.bitrate,
112
149
  hasAudio: mimeType.includes('audio/'),
113
150
  hasVideo: mimeType.includes('video/')
114
- });
115
- } else if (format.signatureCipher) {
116
- // To avoid bloatware, we do not implement the complex decipher algorithm here.
117
- // Modern APIs usually provide the URL directly for lower qualities or we can fallback to other APIs.
118
- // Implementing decipher requires porting youtube-dl's sig logic or using ytdl-core.
119
- continue;
151
+ };
152
+
153
+ if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
154
+ else if (formatObj.hasVideo) videoOnly.push(formatObj);
155
+ else if (formatObj.hasAudio) audio.push(formatObj);
120
156
  }
121
157
  }
122
158
 
123
159
  return {
124
160
  title: details.title || '',
125
161
  author: details.author || '',
162
+ subscribers: subscribers,
126
163
  description: details.shortDescription || '',
127
164
  views: details.viewCount || '0',
165
+ likes: likes,
166
+ comments: comments,
128
167
  durationSeconds: details.lengthSeconds || '0',
129
168
  thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || '',
130
- formats
169
+ formats: {
170
+ video,
171
+ videoOnly,
172
+ audio
173
+ }
131
174
  };
132
175
  }
133
176
  }
package/straw/youtube.py CHANGED
@@ -13,17 +13,28 @@ class YouTubeScraper:
13
13
  raise Exception("Invalid YouTube URL")
14
14
  video_id = match.group(1)
15
15
 
16
- headers = {'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'}
16
+ headers = {'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430', 'Accept-Language': 'en-US,en;q=0.9'}
17
17
  html = await self.client.get_text(url, headers=headers)
18
18
 
19
19
  visitor_data = ""
20
20
  details = {}
21
+ initial_data = {}
21
22
 
22
23
  player_match = re.search(r'ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)', html)
23
24
  if player_match:
24
- data_html = json.loads(player_match.group(1))
25
- details = data_html.get('videoDetails', {})
26
- visitor_data = data_html.get('responseContext', {}).get('visitorData', '')
25
+ try:
26
+ data_html = json.loads(player_match.group(1))
27
+ details = data_html.get('videoDetails', {})
28
+ visitor_data = data_html.get('responseContext', {}).get('visitorData', '')
29
+ except:
30
+ pass
31
+
32
+ data_match = re.search(r'var ytInitialData\s*=\s*({.*?});(?:<\/script>)', html)
33
+ if data_match:
34
+ try:
35
+ initial_data = json.loads(data_match.group(1))
36
+ except:
37
+ pass
27
38
 
28
39
  if not visitor_data:
29
40
  vd_match = re.search(r'"visitorData"\s*:\s*"([^"]+)"', html)
@@ -64,22 +75,66 @@ class YouTubeScraper:
64
75
  if not details:
65
76
  raise Exception("Video details not found inside player response.")
66
77
 
67
- formats = []
78
+ subscribers = ""
79
+ likes = ""
80
+ comments = ""
81
+
82
+ try:
83
+ contents = initial_data.get('contents', {}).get('twoColumnWatchNextResults', {}).get('results', {}).get('results', {}).get('contents', [])
84
+ for c in contents:
85
+ sec_info = c.get('videoSecondaryInfoRenderer')
86
+ if sec_info:
87
+ stext = sec_info.get('owner', {}).get('videoOwnerRenderer', {}).get('subscriberCountText', {}).get('simpleText')
88
+ if stext: subscribers = stext
89
+
90
+ panels = initial_data.get('engagementPanels', [])
91
+ for p in panels:
92
+ sr = p.get('engagementPanelSectionListRenderer', {})
93
+ if sr.get('targetId') == 'engagement-panel-structured-description':
94
+ items = sr.get('content', {}).get('structuredDescriptionContentRenderer', {}).get('items', [])
95
+ for i in items:
96
+ factoids = i.get('videoDescriptionHeaderRenderer', {}).get('factoid', [])
97
+ for f in factoids:
98
+ acc = f.get('factoidRenderer', {}).get('accessibilityText', '')
99
+ if 'like' in acc.lower():
100
+ likes = acc
101
+
102
+ if sr.get('panelIdentifier') == 'engagement-panel-comments-section':
103
+ runs = sr.get('header', {}).get('engagementPanelTitleHeaderRenderer', {}).get('contextualInfo', {}).get('runs', [])
104
+ if runs:
105
+ comments = runs[0].get('text', '')
106
+ except:
107
+ pass
108
+
109
+ video_combined = []
110
+ video_only = []
111
+ audio_only = []
112
+
68
113
  raw_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', [])
69
114
 
70
115
  for f in raw_formats:
71
116
  if 'url' in f:
72
117
  mime_type = f.get('mimeType', '')
73
- formats.append({
118
+ has_audio = 'audio/' in mime_type
119
+ has_video = 'video/' in mime_type
120
+
121
+ f_obj = {
74
122
  'url': f['url'],
75
123
  'mimeType': mime_type,
76
124
  'width': f.get('width'),
77
125
  'height': f.get('height'),
78
126
  'quality': f.get('qualityLabel') or f.get('quality'),
79
127
  'bitrate': f.get('bitrate'),
80
- 'hasAudio': 'audio/' in mime_type,
81
- 'hasVideo': 'video/' in mime_type
82
- })
128
+ 'hasAudio': has_audio,
129
+ 'hasVideo': has_video
130
+ }
131
+
132
+ if has_video and has_audio:
133
+ video_combined.append(f_obj)
134
+ elif has_video:
135
+ video_only.append(f_obj)
136
+ elif has_audio:
137
+ audio_only.append(f_obj)
83
138
 
84
139
  thumbnails = details.get('thumbnail', {}).get('thumbnails', [])
85
140
  best_thumbnail = thumbnails[-1]['url'] if thumbnails else ''
@@ -87,9 +142,16 @@ class YouTubeScraper:
87
142
  return {
88
143
  'title': details.get('title', ''),
89
144
  'author': details.get('author', ''),
145
+ 'subscribers': subscribers,
90
146
  'description': details.get('shortDescription', ''),
91
147
  'views': details.get('viewCount', '0'),
148
+ 'likes': likes,
149
+ 'comments': comments,
92
150
  'durationSeconds': details.get('lengthSeconds', '0'),
93
151
  'thumbnail': best_thumbnail,
94
- 'formats': formats
152
+ 'formats': {
153
+ 'video': video_combined,
154
+ 'videoOnly': video_only,
155
+ 'audio': audio_only
156
+ }
95
157
  }
package/tests/test.py CHANGED
@@ -24,9 +24,13 @@ async def run_tests():
24
24
  print("2. Testing YouTube Scraper...")
25
25
  yt = YouTubeScraper()
26
26
  yt_res = await yt.scrape_video("https://www.youtube.com/watch?v=aqz-KE-bpKQ")
27
- print(f"YouTube Scraper Output: Title = {yt_res['title']}")
28
- print(f"YouTube Scraper Output: Duration = {yt_res['durationSeconds']} seconds")
29
- print(f"YouTube Scraper Output: Found {len(yt_res['formats'])} formats")
27
+ print(f"YouTube Scraper Output: Title = {yt_res.get('title')}")
28
+ print(f"YouTube Scraper Output: Subscribers = {yt_res.get('subscribers')}")
29
+ print(f"YouTube Scraper Output: Likes = {yt_res.get('likes')}")
30
+ print(f"YouTube Scraper Output: Comments = {yt_res.get('comments')}")
31
+ print(f"YouTube Scraper Output: Duration = {yt_res.get('durationSeconds')} seconds")
32
+ formats = yt_res.get('formats', {})
33
+ print(f"YouTube Scraper Output: Found {len(formats.get('video', []))} video, {len(formats.get('videoOnly', []))} video-only, and {len(formats.get('audio', []))} audio formats")
30
34
  await yt.client.close()
31
35
 
32
36
  print("\n" + "-" * 33)
package/tests/test.ts CHANGED
@@ -19,9 +19,12 @@ async function runTests() {
19
19
  const ytClient = straw.youtube();
20
20
  // Use a generic test video like Big Buck Bunny
21
21
  const ytResult = await ytClient.scrapeVideo('https://www.youtube.com/watch?v=aqz-KE-bpKQ');
22
- console.log(`YouTube Scraper Output: Title = ${ytResult.title}`);
23
- console.log(`YouTube Scraper Output: Duration = ${ytResult.durationSeconds} seconds`);
24
- console.log(`YouTube Scraper Output: Found ${ytResult.formats.length} formats`);
22
+ console.log('YouTube Scraper Output: Title =', ytResult.title);
23
+ console.log('YouTube Scraper Output: Subscribers =', ytResult.subscribers);
24
+ console.log('YouTube Scraper Output: Likes =', ytResult.likes);
25
+ console.log('YouTube Scraper Output: Comments =', ytResult.comments);
26
+ console.log('YouTube Scraper Output: Duration =', ytResult.durationSeconds, 'seconds');
27
+ console.log(`YouTube Scraper Output: Found ${ytResult.formats.video.length} video (combined), ${ytResult.formats.videoOnly.length} video-only, and ${ytResult.formats.audio.length} audio formats.`);
25
28
 
26
29
  console.log('\n---------------------------------');
27
30
 
package/test_api.js DELETED
@@ -1,42 +0,0 @@
1
- const undici = require('undici');
2
-
3
- async function testInnerTube() {
4
- const videoId = '_4j1Abt_AiM';
5
-
6
- const payload = {
7
- context: {
8
- client: {
9
- hl: 'en',
10
- gl: 'US',
11
- clientName: 'IOS',
12
- clientVersion: '19.28.1',
13
- osName: 'iOS',
14
- osVersion: '17.5.1',
15
- deviceMake: 'Apple',
16
- deviceModel: 'iPhone16,2'
17
- }
18
- },
19
- videoId: videoId
20
- };
21
-
22
- const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
23
- method: 'POST',
24
- headers: {
25
- 'Content-Type': 'application/json',
26
- 'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
27
- },
28
- body: JSON.stringify(payload)
29
- });
30
-
31
- const body = await res.body.json();
32
- console.log('Full JSON Response Keys:', Object.keys(body));
33
- console.log('Raw JSON String (Truncated):', JSON.stringify(body).slice(0, 1000));
34
- console.log('Playability:', body.playabilityStatus);
35
- console.log('Title:', body.videoDetails?.title);
36
-
37
- const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
38
- console.log('Total Formats:', formats.length);
39
-
40
- }
41
-
42
- testInnerTube();
@@ -1,39 +0,0 @@
1
- const undici = require('undici');
2
-
3
- async function testClient(clientName, clientVersion, userAgent, osName='', osVersion='') {
4
- const payload = {
5
- context: {
6
- client: {
7
- hl: 'en',
8
- gl: 'US',
9
- clientName,
10
- clientVersion,
11
- osName,
12
- osVersion
13
- }
14
- },
15
- videoId: '_4j1Abt_AiM'
16
- };
17
-
18
- const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
19
- method: 'POST',
20
- headers: {
21
- 'Content-Type': 'application/json',
22
- 'User-Agent': userAgent
23
- },
24
- body: JSON.stringify(payload)
25
- });
26
-
27
- const body = await res.body.json();
28
- const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
29
- console.log(`[${clientName}] Playability:`, body.playabilityStatus?.status, '| Formats:', formats.length);
30
- }
31
-
32
- async function runAll() {
33
- await testClient('WEB_EMBED', '1.20230209.00.00', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)');
34
- await testClient('TVHTML5', '7.20230209.00.00', 'Mozilla/5.0 (Web0S; Linux/SmartTV) AppleWebKit/537.36 (KHTML, like Gecko)');
35
- await testClient('ANDROID', '17.31.35', 'com.google.android.youtube/17.31.35 (Linux; U; Android 11)', 'Android', '11');
36
- await testClient('IOS', '19.28.1', 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)', 'iOS', '17.5.1');
37
- }
38
-
39
- runAll();
package/test_client.js DELETED
@@ -1,37 +0,0 @@
1
- const { StrawClient } = require('./dist/core/client.js');
2
-
3
- async function test() {
4
- const client = new StrawClient();
5
- const payload = {
6
- context: {
7
- client: {
8
- hl: 'en',
9
- gl: 'US',
10
- clientName: 'IOS',
11
- clientVersion: '19.28.1',
12
- osName: 'iOS',
13
- osVersion: '17.5.1',
14
- deviceMake: 'Apple',
15
- deviceModel: 'iPhone16,2'
16
- }
17
- },
18
- videoId: '_4j1Abt_AiM'
19
- };
20
-
21
- const res = await client.request('https://www.youtube.com/youtubei/v1/player', {
22
- method: 'POST',
23
- headers: {
24
- 'Content-Type': 'application/json',
25
- 'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
26
- },
27
- body: JSON.stringify(payload)
28
- });
29
-
30
- const data = await res.json();
31
- console.log(Object.keys(data));
32
- if (data.playabilityStatus) {
33
- console.log('Playability:', data.playabilityStatus);
34
- }
35
- }
36
-
37
- test();
package/test_embed.js DELETED
@@ -1,26 +0,0 @@
1
- const undici = require('undici');
2
-
3
- async function testEmbed() {
4
- const url = 'https://www.youtube.com/embed/_4j1Abt_AiM';
5
- const res = await undici.request(url, {
6
- headers: {
7
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
8
- 'Accept-Language': 'en-US,en;q=0.9',
9
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
10
- }
11
- });
12
- const html = await res.body.text();
13
-
14
- const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
15
- const match = html.match(regex);
16
- if (match) {
17
- const data = JSON.parse(match[1]);
18
- const formats = [...(data.streamingData?.formats || []), ...(data.streamingData?.adaptiveFormats || [])];
19
- console.log('Embed playability:', data.playabilityStatus?.status);
20
- console.log('Formats found:', formats.length);
21
- } else {
22
- console.log('No ytInitialPlayerResponse found in embed HTML');
23
- }
24
- }
25
-
26
- testEmbed();
package/test_html.js DELETED
@@ -1,26 +0,0 @@
1
- const undici = require('undici');
2
-
3
- async function testHtml() {
4
- const url = 'https://www.youtube.com/watch?v=_4j1Abt_AiM';
5
- const res = await undici.request(url, {
6
- method: 'GET',
7
- headers: {
8
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0',
9
- 'Accept-Language': 'en-US,en;q=0.9',
10
- 'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'
11
- }
12
- });
13
-
14
- const html = await res.body.text();
15
- const match = html.match(/ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/);
16
- if (match) {
17
- const data = JSON.parse(match[1]);
18
- const formats = [...(data.streamingData?.formats || []), ...(data.streamingData?.adaptiveFormats || [])];
19
- console.log('Got HTML Response with Player:', data.playabilityStatus?.status);
20
- console.log('Formats:', formats.length);
21
- } else {
22
- console.log('No ytInitialPlayerResponse found in direct HTML fetching.');
23
- }
24
- }
25
-
26
- testHtml();
package/test_visitor.js DELETED
@@ -1,56 +0,0 @@
1
- const undici = require('undici');
2
-
3
- async function testVisitor() {
4
- const videoId = '_4j1Abt_AiM';
5
- const url = `https://www.youtube.com/watch?v=${videoId}`;
6
-
7
- const htmlRes = await undici.request(url, {
8
- method: 'GET',
9
- headers: {
10
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/115.0.0.0 Safari/537.36',
11
- 'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'
12
- }
13
- });
14
-
15
- const html = await htmlRes.body.text();
16
-
17
- let visitorData = '';
18
- const match = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
19
- if (match) visitorData = match[1];
20
-
21
- console.log('Got Visitor Data:', visitorData);
22
-
23
- const payload = {
24
- context: {
25
- client: {
26
- hl: 'en',
27
- gl: 'US',
28
- clientName: 'IOS',
29
- clientVersion: '19.28.1',
30
- osName: 'iOS',
31
- osVersion: '17.5.1',
32
- deviceMake: 'Apple',
33
- deviceModel: 'iPhone16,2',
34
- visitorData: visitorData
35
- }
36
- },
37
- videoId: videoId
38
- };
39
-
40
- const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
41
- method: 'POST',
42
- headers: {
43
- 'Content-Type': 'application/json',
44
- 'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
45
- },
46
- body: JSON.stringify(payload)
47
- });
48
-
49
- const body = await res.body.json();
50
- const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
51
-
52
- console.log('Target Playability:', body.playabilityStatus?.status);
53
- console.log('Target Formats:', formats.length);
54
- }
55
-
56
- testVisitor();
package/test_vr.js DELETED
@@ -1,27 +0,0 @@
1
- const undici = require('undici');
2
-
3
- async function testVR() {
4
- const payload = {
5
- context: {
6
- client: {
7
- clientName: 'ANDROID_TESTSUITE',
8
- clientVersion: '1.9',
9
- androidSdkVersion: 30,
10
- hl: 'en',
11
- gl: 'US',
12
- utcOffsetMinutes: 0
13
- }
14
- },
15
- videoId: '_4j1Abt_AiM'
16
- };
17
- const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
18
- method: 'POST',
19
- headers: { 'Content-Type': 'application/json', 'User-Agent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11)' },
20
- body: JSON.stringify(payload)
21
- });
22
- const body = await res.body.json();
23
- const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
24
- console.log('Playability:', body.playabilityStatus?.status);
25
- console.log('Formats:', formats.length);
26
- }
27
- testVR();
package/test_yt.js DELETED
@@ -1,17 +0,0 @@
1
- const straw = require('./dist/index.js');
2
-
3
- async function run() {
4
- console.time('YouTube Scrape');
5
- const yt = new straw.YouTubeScraper();
6
- try {
7
- const res = await yt.scrapeVideo('https://youtu.be/_4j1Abt_AiM?si=qJY_gv4F_adBYMYP');
8
- console.log('Title:', res.title);
9
- console.log('Formats:', res.formats.length);
10
- console.log('First format URL (truncated):', res.formats[0]?.url?.substring(0, 100));
11
- } catch (e) {
12
- console.error('Scrape failed:', e);
13
- }
14
- console.timeEnd('YouTube Scrape');
15
- }
16
-
17
- run();