@zetagoaurum-dev/straw 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/dist/index.d.mts +8 -1
- package/dist/index.d.ts +8 -1
- package/dist/index.js +43 -7
- package/dist/index.mjs +43 -7
- package/download test/Blue Archive - Maki (Camp) Live2D_HD.webm +0 -0
- package/downloaded_video.mp4 +0 -0
- package/package.json +44 -44
- package/release.bat +1 -1
- package/src/scrapers/youtube.ts +54 -11
- package/straw/youtube.py +72 -10
- package/tests/test.py +7 -3
- package/tests/test.ts +6 -3
- package/test_api.js +0 -42
- package/test_api_clients.js +0 -39
- package/test_client.js +0 -37
- package/test_embed.js +0 -26
- package/test_html.js +0 -26
- package/test_visitor.js +0 -56
- package/test_vr.js +0 -27
- package/test_yt.js +0 -17
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.2.1] "Hotfix" - 2026-02-27
|
|
6
|
+
- **Fix:** Removed accidentally tracked `ytInitialData_dump.json` and local `test_*.js` scripts from the previous NPM deployment bundle to ensure zero-bloat runtime.
|
|
7
|
+
|
|
8
|
+
## [1.2.0] "Deep Metadata & Formats Engine" - 2026-02-27
|
|
9
|
+
- **Feat:** Integrated extracting `subscribers`, `likes`, and `comments` directly from YouTube's `ytInitialData` payload without external parsing overhead.
|
|
10
|
+
- **Feat:** Segregated `formats` array into three exact categorical bins: `video` (combined), `videoOnly`, and `audio` (audio-only), ensuring zero-ambiguity when downloading specific streams.
|
|
11
|
+
|
|
5
12
|
## [1.1.1] "Performance Patch" - 2026-02-27
|
|
6
13
|
- **Perf:** Re-engineered the YouTube scraper in Node.js and Python to use the `IOS` InnerTube API directly, injecting localized `visitorData` tokens to seamlessly bypass bot checks and cipher encryption. Video format lists are returned instantaneously for optimal downloading infrastructure.
|
|
7
14
|
- **Fix:** Fixed HTML parser blocking on high-volume deployed servers by upgrading to the direct `POST /youtubei/v1/player` endpoints.
|
package/dist/index.d.mts
CHANGED
|
@@ -51,11 +51,18 @@ interface YouTubeFormats {
|
|
|
51
51
|
interface YouTubeResult {
|
|
52
52
|
title: string;
|
|
53
53
|
author: string;
|
|
54
|
+
subscribers: string;
|
|
54
55
|
description: string;
|
|
55
56
|
views: string;
|
|
57
|
+
likes: string;
|
|
58
|
+
comments: string;
|
|
56
59
|
durationSeconds: string;
|
|
57
60
|
thumbnail: string;
|
|
58
|
-
formats:
|
|
61
|
+
formats: {
|
|
62
|
+
video: YouTubeFormats[];
|
|
63
|
+
videoOnly: YouTubeFormats[];
|
|
64
|
+
audio: YouTubeFormats[];
|
|
65
|
+
};
|
|
59
66
|
}
|
|
60
67
|
declare class YouTubeScraper {
|
|
61
68
|
private client;
|
package/dist/index.d.ts
CHANGED
|
@@ -51,11 +51,18 @@ interface YouTubeFormats {
|
|
|
51
51
|
interface YouTubeResult {
|
|
52
52
|
title: string;
|
|
53
53
|
author: string;
|
|
54
|
+
subscribers: string;
|
|
54
55
|
description: string;
|
|
55
56
|
views: string;
|
|
57
|
+
likes: string;
|
|
58
|
+
comments: string;
|
|
56
59
|
durationSeconds: string;
|
|
57
60
|
thumbnail: string;
|
|
58
|
-
formats:
|
|
61
|
+
formats: {
|
|
62
|
+
video: YouTubeFormats[];
|
|
63
|
+
videoOnly: YouTubeFormats[];
|
|
64
|
+
audio: YouTubeFormats[];
|
|
65
|
+
};
|
|
59
66
|
}
|
|
60
67
|
declare class YouTubeScraper {
|
|
61
68
|
private client;
|
package/dist/index.js
CHANGED
|
@@ -191,12 +191,20 @@ var YouTubeScraper = class {
|
|
|
191
191
|
}
|
|
192
192
|
const videoId = videoIdMatch[1];
|
|
193
193
|
const html = await this.client.getText(url, {
|
|
194
|
-
headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
|
|
194
|
+
headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
|
|
195
195
|
});
|
|
196
196
|
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
197
197
|
const match = html.match(regex);
|
|
198
198
|
let visitorData = "";
|
|
199
199
|
let details = {};
|
|
200
|
+
let initialData = {};
|
|
201
|
+
const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
|
|
202
|
+
if (dataMatch && dataMatch[1]) {
|
|
203
|
+
try {
|
|
204
|
+
initialData = JSON.parse(dataMatch[1]);
|
|
205
|
+
} catch (e) {
|
|
206
|
+
}
|
|
207
|
+
}
|
|
200
208
|
if (match && match[1]) {
|
|
201
209
|
const data = JSON.parse(match[1]);
|
|
202
210
|
details = data?.videoDetails || {};
|
|
@@ -232,6 +240,7 @@ var YouTubeScraper = class {
|
|
|
232
240
|
body: JSON.stringify(payload)
|
|
233
241
|
});
|
|
234
242
|
const apiData = await res.json();
|
|
243
|
+
console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
|
|
235
244
|
if (!details.title) {
|
|
236
245
|
details = apiData?.videoDetails || {};
|
|
237
246
|
}
|
|
@@ -239,12 +248,31 @@ var YouTubeScraper = class {
|
|
|
239
248
|
if (!details) {
|
|
240
249
|
throw new Error("Video details not found inside player response.");
|
|
241
250
|
}
|
|
242
|
-
|
|
251
|
+
let subscribers = "";
|
|
252
|
+
let likes = "";
|
|
253
|
+
let comments = "";
|
|
254
|
+
try {
|
|
255
|
+
const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
|
|
256
|
+
if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
|
|
257
|
+
subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
|
|
258
|
+
}
|
|
259
|
+
const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
|
|
260
|
+
const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
|
|
261
|
+
if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
|
|
262
|
+
const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
|
|
263
|
+
if (commentsPanel) {
|
|
264
|
+
comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
|
|
265
|
+
}
|
|
266
|
+
} catch (e) {
|
|
267
|
+
}
|
|
268
|
+
const video = [];
|
|
269
|
+
const videoOnly = [];
|
|
270
|
+
const audio = [];
|
|
243
271
|
const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
|
|
244
272
|
for (const format of rawFormats) {
|
|
245
273
|
if (format.url) {
|
|
246
274
|
const mimeType = format.mimeType || "";
|
|
247
|
-
|
|
275
|
+
const formatObj = {
|
|
248
276
|
url: format.url,
|
|
249
277
|
mimeType,
|
|
250
278
|
width: format.width,
|
|
@@ -253,19 +281,27 @@ var YouTubeScraper = class {
|
|
|
253
281
|
bitrate: format.bitrate,
|
|
254
282
|
hasAudio: mimeType.includes("audio/"),
|
|
255
283
|
hasVideo: mimeType.includes("video/")
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
|
|
284
|
+
};
|
|
285
|
+
if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
|
|
286
|
+
else if (formatObj.hasVideo) videoOnly.push(formatObj);
|
|
287
|
+
else if (formatObj.hasAudio) audio.push(formatObj);
|
|
259
288
|
}
|
|
260
289
|
}
|
|
261
290
|
return {
|
|
262
291
|
title: details.title || "",
|
|
263
292
|
author: details.author || "",
|
|
293
|
+
subscribers,
|
|
264
294
|
description: details.shortDescription || "",
|
|
265
295
|
views: details.viewCount || "0",
|
|
296
|
+
likes,
|
|
297
|
+
comments,
|
|
266
298
|
durationSeconds: details.lengthSeconds || "0",
|
|
267
299
|
thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
|
|
268
|
-
formats
|
|
300
|
+
formats: {
|
|
301
|
+
video,
|
|
302
|
+
videoOnly,
|
|
303
|
+
audio
|
|
304
|
+
}
|
|
269
305
|
};
|
|
270
306
|
}
|
|
271
307
|
};
|
package/dist/index.mjs
CHANGED
|
@@ -151,12 +151,20 @@ var YouTubeScraper = class {
|
|
|
151
151
|
}
|
|
152
152
|
const videoId = videoIdMatch[1];
|
|
153
153
|
const html = await this.client.getText(url, {
|
|
154
|
-
headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
|
|
154
|
+
headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430", "Accept-Language": "en-US,en;q=0.9" }
|
|
155
155
|
});
|
|
156
156
|
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
157
157
|
const match = html.match(regex);
|
|
158
158
|
let visitorData = "";
|
|
159
159
|
let details = {};
|
|
160
|
+
let initialData = {};
|
|
161
|
+
const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
|
|
162
|
+
if (dataMatch && dataMatch[1]) {
|
|
163
|
+
try {
|
|
164
|
+
initialData = JSON.parse(dataMatch[1]);
|
|
165
|
+
} catch (e) {
|
|
166
|
+
}
|
|
167
|
+
}
|
|
160
168
|
if (match && match[1]) {
|
|
161
169
|
const data = JSON.parse(match[1]);
|
|
162
170
|
details = data?.videoDetails || {};
|
|
@@ -192,6 +200,7 @@ var YouTubeScraper = class {
|
|
|
192
200
|
body: JSON.stringify(payload)
|
|
193
201
|
});
|
|
194
202
|
const apiData = await res.json();
|
|
203
|
+
console.log("Playability Status:", apiData?.playabilityStatus?.status, "StreamingData keys:", Object.keys(apiData?.streamingData || {}));
|
|
195
204
|
if (!details.title) {
|
|
196
205
|
details = apiData?.videoDetails || {};
|
|
197
206
|
}
|
|
@@ -199,12 +208,31 @@ var YouTubeScraper = class {
|
|
|
199
208
|
if (!details) {
|
|
200
209
|
throw new Error("Video details not found inside player response.");
|
|
201
210
|
}
|
|
202
|
-
|
|
211
|
+
let subscribers = "";
|
|
212
|
+
let likes = "";
|
|
213
|
+
let comments = "";
|
|
214
|
+
try {
|
|
215
|
+
const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
|
|
216
|
+
if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
|
|
217
|
+
subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
|
|
218
|
+
}
|
|
219
|
+
const factoids = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.targetId === "engagement-panel-structured-description")?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
|
|
220
|
+
const likesFactoid = factoids.find((f) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes("like"));
|
|
221
|
+
if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
|
|
222
|
+
const commentsPanel = initialData?.engagementPanels?.find((p) => p.engagementPanelSectionListRenderer?.panelIdentifier === "engagement-panel-comments-section");
|
|
223
|
+
if (commentsPanel) {
|
|
224
|
+
comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || "";
|
|
225
|
+
}
|
|
226
|
+
} catch (e) {
|
|
227
|
+
}
|
|
228
|
+
const video = [];
|
|
229
|
+
const videoOnly = [];
|
|
230
|
+
const audio = [];
|
|
203
231
|
const rawFormats = [...streamingData?.formats || [], ...streamingData?.adaptiveFormats || []];
|
|
204
232
|
for (const format of rawFormats) {
|
|
205
233
|
if (format.url) {
|
|
206
234
|
const mimeType = format.mimeType || "";
|
|
207
|
-
|
|
235
|
+
const formatObj = {
|
|
208
236
|
url: format.url,
|
|
209
237
|
mimeType,
|
|
210
238
|
width: format.width,
|
|
@@ -213,19 +241,27 @@ var YouTubeScraper = class {
|
|
|
213
241
|
bitrate: format.bitrate,
|
|
214
242
|
hasAudio: mimeType.includes("audio/"),
|
|
215
243
|
hasVideo: mimeType.includes("video/")
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
|
|
244
|
+
};
|
|
245
|
+
if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
|
|
246
|
+
else if (formatObj.hasVideo) videoOnly.push(formatObj);
|
|
247
|
+
else if (formatObj.hasAudio) audio.push(formatObj);
|
|
219
248
|
}
|
|
220
249
|
}
|
|
221
250
|
return {
|
|
222
251
|
title: details.title || "",
|
|
223
252
|
author: details.author || "",
|
|
253
|
+
subscribers,
|
|
224
254
|
description: details.shortDescription || "",
|
|
225
255
|
views: details.viewCount || "0",
|
|
256
|
+
likes,
|
|
257
|
+
comments,
|
|
226
258
|
durationSeconds: details.lengthSeconds || "0",
|
|
227
259
|
thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || "",
|
|
228
|
-
formats
|
|
260
|
+
formats: {
|
|
261
|
+
video,
|
|
262
|
+
videoOnly,
|
|
263
|
+
audio
|
|
264
|
+
}
|
|
229
265
|
};
|
|
230
266
|
}
|
|
231
267
|
};
|
|
File without changes
|
|
File without changes
|
package/package.json
CHANGED
|
@@ -1,44 +1,44 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@zetagoaurum-dev/straw",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Enterprise-grade unified JS/TS and Python scraping library for Web, YouTube, and Media (Images, Audio, Video, Documents)",
|
|
5
|
-
"main": "dist/index.js",
|
|
6
|
-
"module": "dist/index.mjs",
|
|
7
|
-
"types": "dist/index.d.ts",
|
|
8
|
-
"exports": {
|
|
9
|
-
".": {
|
|
10
|
-
"require": "./dist/index.js",
|
|
11
|
-
"import": "./dist/index.mjs",
|
|
12
|
-
"types": "./dist/index.d.ts"
|
|
13
|
-
}
|
|
14
|
-
},
|
|
15
|
-
"scripts": {
|
|
16
|
-
"build": "tsup src/index.ts --format cjs,esm --dts --clean",
|
|
17
|
-
"dev": "tsup src/index.ts --format cjs,esm --dts --watch",
|
|
18
|
-
"test": "tsx tests/test.ts"
|
|
19
|
-
},
|
|
20
|
-
"keywords": [
|
|
21
|
-
"scraping",
|
|
22
|
-
"scraper",
|
|
23
|
-
"youtube-scraper",
|
|
24
|
-
"media-extractor",
|
|
25
|
-
"anti-cors"
|
|
26
|
-
],
|
|
27
|
-
"author": "ZetaGo-Aurum",
|
|
28
|
-
"license": "MIT",
|
|
29
|
-
"repository": {
|
|
30
|
-
"type": "git",
|
|
31
|
-
"url": "https://github.com/ZetaGo-Aurum/straw.git"
|
|
32
|
-
},
|
|
33
|
-
"devDependencies": {
|
|
34
|
-
"@types/node": "^25.3.2",
|
|
35
|
-
"ts-node": "^10.9.2",
|
|
36
|
-
"tsup": "^8.5.1",
|
|
37
|
-
"tsx": "^4.21.0",
|
|
38
|
-
"typescript": "^5.9.3"
|
|
39
|
-
},
|
|
40
|
-
"dependencies": {
|
|
41
|
-
"cheerio": "^1.2.0",
|
|
42
|
-
"undici": "^7.22.0"
|
|
43
|
-
}
|
|
44
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@zetagoaurum-dev/straw",
|
|
3
|
+
"version": "1.2.1",
|
|
4
|
+
"description": "Enterprise-grade unified JS/TS and Python scraping library for Web, YouTube, and Media (Images, Audio, Video, Documents)",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"module": "dist/index.mjs",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"require": "./dist/index.js",
|
|
11
|
+
"import": "./dist/index.mjs",
|
|
12
|
+
"types": "./dist/index.d.ts"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "tsup src/index.ts --format cjs,esm --dts --clean",
|
|
17
|
+
"dev": "tsup src/index.ts --format cjs,esm --dts --watch",
|
|
18
|
+
"test": "tsx tests/test.ts"
|
|
19
|
+
},
|
|
20
|
+
"keywords": [
|
|
21
|
+
"scraping",
|
|
22
|
+
"scraper",
|
|
23
|
+
"youtube-scraper",
|
|
24
|
+
"media-extractor",
|
|
25
|
+
"anti-cors"
|
|
26
|
+
],
|
|
27
|
+
"author": "ZetaGo-Aurum",
|
|
28
|
+
"license": "MIT",
|
|
29
|
+
"repository": {
|
|
30
|
+
"type": "git",
|
|
31
|
+
"url": "https://github.com/ZetaGo-Aurum/straw.git"
|
|
32
|
+
},
|
|
33
|
+
"devDependencies": {
|
|
34
|
+
"@types/node": "^25.3.2",
|
|
35
|
+
"ts-node": "^10.9.2",
|
|
36
|
+
"tsup": "^8.5.1",
|
|
37
|
+
"tsx": "^4.21.0",
|
|
38
|
+
"typescript": "^5.9.3"
|
|
39
|
+
},
|
|
40
|
+
"dependencies": {
|
|
41
|
+
"cheerio": "^1.2.0",
|
|
42
|
+
"undici": "^7.22.0"
|
|
43
|
+
}
|
|
44
|
+
}
|
package/release.bat
CHANGED
package/src/scrapers/youtube.ts
CHANGED
|
@@ -14,11 +14,18 @@ export interface YouTubeFormats {
|
|
|
14
14
|
export interface YouTubeResult {
|
|
15
15
|
title: string;
|
|
16
16
|
author: string;
|
|
17
|
+
subscribers: string;
|
|
17
18
|
description: string;
|
|
18
19
|
views: string;
|
|
20
|
+
likes: string;
|
|
21
|
+
comments: string;
|
|
19
22
|
durationSeconds: string;
|
|
20
23
|
thumbnail: string;
|
|
21
|
-
formats:
|
|
24
|
+
formats: {
|
|
25
|
+
video: YouTubeFormats[];
|
|
26
|
+
videoOnly: YouTubeFormats[];
|
|
27
|
+
audio: YouTubeFormats[];
|
|
28
|
+
};
|
|
22
29
|
}
|
|
23
30
|
|
|
24
31
|
export class YouTubeScraper {
|
|
@@ -40,7 +47,7 @@ export class YouTubeScraper {
|
|
|
40
47
|
const videoId = videoIdMatch[1];
|
|
41
48
|
|
|
42
49
|
const html = await this.client.getText(url, {
|
|
43
|
-
headers: { 'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430' }
|
|
50
|
+
headers: { 'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430', 'Accept-Language': 'en-US,en;q=0.9' }
|
|
44
51
|
});
|
|
45
52
|
|
|
46
53
|
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
@@ -48,6 +55,12 @@ export class YouTubeScraper {
|
|
|
48
55
|
let visitorData = '';
|
|
49
56
|
let details: any = {};
|
|
50
57
|
|
|
58
|
+
let initialData: any = {};
|
|
59
|
+
const dataMatch = html.match(/var ytInitialData\s*=\s*({.*?});(?:<\/script>)/);
|
|
60
|
+
if (dataMatch && dataMatch[1]) {
|
|
61
|
+
try { initialData = JSON.parse(dataMatch[1]); } catch(e) {}
|
|
62
|
+
}
|
|
63
|
+
|
|
51
64
|
if (match && match[1]) {
|
|
52
65
|
const data = JSON.parse(match[1]);
|
|
53
66
|
details = data?.videoDetails || {};
|
|
@@ -96,13 +109,37 @@ export class YouTubeScraper {
|
|
|
96
109
|
throw new Error('Video details not found inside player response.');
|
|
97
110
|
}
|
|
98
111
|
|
|
99
|
-
|
|
112
|
+
let subscribers = '';
|
|
113
|
+
let likes = '';
|
|
114
|
+
let comments = '';
|
|
115
|
+
|
|
116
|
+
try {
|
|
117
|
+
const secInfo = initialData?.contents?.twoColumnWatchNextResults?.results?.results?.contents?.find((c: any) => c.videoSecondaryInfoRenderer)?.videoSecondaryInfoRenderer;
|
|
118
|
+
if (secInfo?.owner?.videoOwnerRenderer?.subscriberCountText?.simpleText) {
|
|
119
|
+
subscribers = secInfo.owner.videoOwnerRenderer.subscriberCountText.simpleText;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const factoids = initialData?.engagementPanels?.find((p: any) => p.engagementPanelSectionListRenderer?.targetId === 'engagement-panel-structured-description')
|
|
123
|
+
?.engagementPanelSectionListRenderer?.content?.structuredDescriptionContentRenderer?.items?.find((i: any) => i.videoDescriptionHeaderRenderer)?.videoDescriptionHeaderRenderer?.factoid || [];
|
|
124
|
+
const likesFactoid = factoids.find((f: any) => f.factoidRenderer?.accessibilityText?.toLowerCase().includes('like'));
|
|
125
|
+
if (likesFactoid) likes = likesFactoid.factoidRenderer.accessibilityText;
|
|
126
|
+
|
|
127
|
+
const commentsPanel = initialData?.engagementPanels?.find((p: any) => p.engagementPanelSectionListRenderer?.panelIdentifier === 'engagement-panel-comments-section');
|
|
128
|
+
if (commentsPanel) {
|
|
129
|
+
comments = commentsPanel.engagementPanelSectionListRenderer.header.engagementPanelTitleHeaderRenderer.contextualInfo?.runs?.[0]?.text || '';
|
|
130
|
+
}
|
|
131
|
+
} catch (e) {}
|
|
132
|
+
|
|
133
|
+
const video: YouTubeFormats[] = [];
|
|
134
|
+
const videoOnly: YouTubeFormats[] = [];
|
|
135
|
+
const audio: YouTubeFormats[] = [];
|
|
136
|
+
|
|
100
137
|
const rawFormats = [...(streamingData?.formats || []), ...(streamingData?.adaptiveFormats || [])];
|
|
101
138
|
|
|
102
139
|
for (const format of rawFormats) {
|
|
103
140
|
if (format.url) {
|
|
104
141
|
const mimeType = format.mimeType || '';
|
|
105
|
-
|
|
142
|
+
const formatObj = {
|
|
106
143
|
url: format.url,
|
|
107
144
|
mimeType: mimeType,
|
|
108
145
|
width: format.width,
|
|
@@ -111,23 +148,29 @@ export class YouTubeScraper {
|
|
|
111
148
|
bitrate: format.bitrate,
|
|
112
149
|
hasAudio: mimeType.includes('audio/'),
|
|
113
150
|
hasVideo: mimeType.includes('video/')
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
continue;
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
if (formatObj.hasVideo && formatObj.hasAudio) video.push(formatObj);
|
|
154
|
+
else if (formatObj.hasVideo) videoOnly.push(formatObj);
|
|
155
|
+
else if (formatObj.hasAudio) audio.push(formatObj);
|
|
120
156
|
}
|
|
121
157
|
}
|
|
122
158
|
|
|
123
159
|
return {
|
|
124
160
|
title: details.title || '',
|
|
125
161
|
author: details.author || '',
|
|
162
|
+
subscribers: subscribers,
|
|
126
163
|
description: details.shortDescription || '',
|
|
127
164
|
views: details.viewCount || '0',
|
|
165
|
+
likes: likes,
|
|
166
|
+
comments: comments,
|
|
128
167
|
durationSeconds: details.lengthSeconds || '0',
|
|
129
168
|
thumbnail: details.thumbnail?.thumbnails?.[details.thumbnail.thumbnails.length - 1]?.url || '',
|
|
130
|
-
formats
|
|
169
|
+
formats: {
|
|
170
|
+
video,
|
|
171
|
+
videoOnly,
|
|
172
|
+
audio
|
|
173
|
+
}
|
|
131
174
|
};
|
|
132
175
|
}
|
|
133
176
|
}
|
package/straw/youtube.py
CHANGED
|
@@ -13,17 +13,28 @@ class YouTubeScraper:
|
|
|
13
13
|
raise Exception("Invalid YouTube URL")
|
|
14
14
|
video_id = match.group(1)
|
|
15
15
|
|
|
16
|
-
headers = {'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'}
|
|
16
|
+
headers = {'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430', 'Accept-Language': 'en-US,en;q=0.9'}
|
|
17
17
|
html = await self.client.get_text(url, headers=headers)
|
|
18
18
|
|
|
19
19
|
visitor_data = ""
|
|
20
20
|
details = {}
|
|
21
|
+
initial_data = {}
|
|
21
22
|
|
|
22
23
|
player_match = re.search(r'ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)', html)
|
|
23
24
|
if player_match:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
try:
|
|
26
|
+
data_html = json.loads(player_match.group(1))
|
|
27
|
+
details = data_html.get('videoDetails', {})
|
|
28
|
+
visitor_data = data_html.get('responseContext', {}).get('visitorData', '')
|
|
29
|
+
except:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
data_match = re.search(r'var ytInitialData\s*=\s*({.*?});(?:<\/script>)', html)
|
|
33
|
+
if data_match:
|
|
34
|
+
try:
|
|
35
|
+
initial_data = json.loads(data_match.group(1))
|
|
36
|
+
except:
|
|
37
|
+
pass
|
|
27
38
|
|
|
28
39
|
if not visitor_data:
|
|
29
40
|
vd_match = re.search(r'"visitorData"\s*:\s*"([^"]+)"', html)
|
|
@@ -64,22 +75,66 @@ class YouTubeScraper:
|
|
|
64
75
|
if not details:
|
|
65
76
|
raise Exception("Video details not found inside player response.")
|
|
66
77
|
|
|
67
|
-
|
|
78
|
+
subscribers = ""
|
|
79
|
+
likes = ""
|
|
80
|
+
comments = ""
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
contents = initial_data.get('contents', {}).get('twoColumnWatchNextResults', {}).get('results', {}).get('results', {}).get('contents', [])
|
|
84
|
+
for c in contents:
|
|
85
|
+
sec_info = c.get('videoSecondaryInfoRenderer')
|
|
86
|
+
if sec_info:
|
|
87
|
+
stext = sec_info.get('owner', {}).get('videoOwnerRenderer', {}).get('subscriberCountText', {}).get('simpleText')
|
|
88
|
+
if stext: subscribers = stext
|
|
89
|
+
|
|
90
|
+
panels = initial_data.get('engagementPanels', [])
|
|
91
|
+
for p in panels:
|
|
92
|
+
sr = p.get('engagementPanelSectionListRenderer', {})
|
|
93
|
+
if sr.get('targetId') == 'engagement-panel-structured-description':
|
|
94
|
+
items = sr.get('content', {}).get('structuredDescriptionContentRenderer', {}).get('items', [])
|
|
95
|
+
for i in items:
|
|
96
|
+
factoids = i.get('videoDescriptionHeaderRenderer', {}).get('factoid', [])
|
|
97
|
+
for f in factoids:
|
|
98
|
+
acc = f.get('factoidRenderer', {}).get('accessibilityText', '')
|
|
99
|
+
if 'like' in acc.lower():
|
|
100
|
+
likes = acc
|
|
101
|
+
|
|
102
|
+
if sr.get('panelIdentifier') == 'engagement-panel-comments-section':
|
|
103
|
+
runs = sr.get('header', {}).get('engagementPanelTitleHeaderRenderer', {}).get('contextualInfo', {}).get('runs', [])
|
|
104
|
+
if runs:
|
|
105
|
+
comments = runs[0].get('text', '')
|
|
106
|
+
except:
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
video_combined = []
|
|
110
|
+
video_only = []
|
|
111
|
+
audio_only = []
|
|
112
|
+
|
|
68
113
|
raw_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', [])
|
|
69
114
|
|
|
70
115
|
for f in raw_formats:
|
|
71
116
|
if 'url' in f:
|
|
72
117
|
mime_type = f.get('mimeType', '')
|
|
73
|
-
|
|
118
|
+
has_audio = 'audio/' in mime_type
|
|
119
|
+
has_video = 'video/' in mime_type
|
|
120
|
+
|
|
121
|
+
f_obj = {
|
|
74
122
|
'url': f['url'],
|
|
75
123
|
'mimeType': mime_type,
|
|
76
124
|
'width': f.get('width'),
|
|
77
125
|
'height': f.get('height'),
|
|
78
126
|
'quality': f.get('qualityLabel') or f.get('quality'),
|
|
79
127
|
'bitrate': f.get('bitrate'),
|
|
80
|
-
'hasAudio':
|
|
81
|
-
'hasVideo':
|
|
82
|
-
}
|
|
128
|
+
'hasAudio': has_audio,
|
|
129
|
+
'hasVideo': has_video
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if has_video and has_audio:
|
|
133
|
+
video_combined.append(f_obj)
|
|
134
|
+
elif has_video:
|
|
135
|
+
video_only.append(f_obj)
|
|
136
|
+
elif has_audio:
|
|
137
|
+
audio_only.append(f_obj)
|
|
83
138
|
|
|
84
139
|
thumbnails = details.get('thumbnail', {}).get('thumbnails', [])
|
|
85
140
|
best_thumbnail = thumbnails[-1]['url'] if thumbnails else ''
|
|
@@ -87,9 +142,16 @@ class YouTubeScraper:
|
|
|
87
142
|
return {
|
|
88
143
|
'title': details.get('title', ''),
|
|
89
144
|
'author': details.get('author', ''),
|
|
145
|
+
'subscribers': subscribers,
|
|
90
146
|
'description': details.get('shortDescription', ''),
|
|
91
147
|
'views': details.get('viewCount', '0'),
|
|
148
|
+
'likes': likes,
|
|
149
|
+
'comments': comments,
|
|
92
150
|
'durationSeconds': details.get('lengthSeconds', '0'),
|
|
93
151
|
'thumbnail': best_thumbnail,
|
|
94
|
-
'formats':
|
|
152
|
+
'formats': {
|
|
153
|
+
'video': video_combined,
|
|
154
|
+
'videoOnly': video_only,
|
|
155
|
+
'audio': audio_only
|
|
156
|
+
}
|
|
95
157
|
}
|
package/tests/test.py
CHANGED
|
@@ -24,9 +24,13 @@ async def run_tests():
|
|
|
24
24
|
print("2. Testing YouTube Scraper...")
|
|
25
25
|
yt = YouTubeScraper()
|
|
26
26
|
yt_res = await yt.scrape_video("https://www.youtube.com/watch?v=aqz-KE-bpKQ")
|
|
27
|
-
print(f"YouTube Scraper Output: Title = {yt_res
|
|
28
|
-
print(f"YouTube Scraper Output:
|
|
29
|
-
print(f"YouTube Scraper Output:
|
|
27
|
+
print(f"YouTube Scraper Output: Title = {yt_res.get('title')}")
|
|
28
|
+
print(f"YouTube Scraper Output: Subscribers = {yt_res.get('subscribers')}")
|
|
29
|
+
print(f"YouTube Scraper Output: Likes = {yt_res.get('likes')}")
|
|
30
|
+
print(f"YouTube Scraper Output: Comments = {yt_res.get('comments')}")
|
|
31
|
+
print(f"YouTube Scraper Output: Duration = {yt_res.get('durationSeconds')} seconds")
|
|
32
|
+
formats = yt_res.get('formats', {})
|
|
33
|
+
print(f"YouTube Scraper Output: Found {len(formats.get('video', []))} video, {len(formats.get('videoOnly', []))} video-only, and {len(formats.get('audio', []))} audio formats")
|
|
30
34
|
await yt.client.close()
|
|
31
35
|
|
|
32
36
|
print("\n" + "-" * 33)
|
package/tests/test.ts
CHANGED
|
@@ -19,9 +19,12 @@ async function runTests() {
|
|
|
19
19
|
const ytClient = straw.youtube();
|
|
20
20
|
// Use a generic test video like Big Buck Bunny
|
|
21
21
|
const ytResult = await ytClient.scrapeVideo('https://www.youtube.com/watch?v=aqz-KE-bpKQ');
|
|
22
|
-
console.log(
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
console.log('YouTube Scraper Output: Title =', ytResult.title);
|
|
23
|
+
console.log('YouTube Scraper Output: Subscribers =', ytResult.subscribers);
|
|
24
|
+
console.log('YouTube Scraper Output: Likes =', ytResult.likes);
|
|
25
|
+
console.log('YouTube Scraper Output: Comments =', ytResult.comments);
|
|
26
|
+
console.log('YouTube Scraper Output: Duration =', ytResult.durationSeconds, 'seconds');
|
|
27
|
+
console.log(`YouTube Scraper Output: Found ${ytResult.formats.video.length} video (combined), ${ytResult.formats.videoOnly.length} video-only, and ${ytResult.formats.audio.length} audio formats.`);
|
|
25
28
|
|
|
26
29
|
console.log('\n---------------------------------');
|
|
27
30
|
|
package/test_api.js
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
const undici = require('undici');
|
|
2
|
-
|
|
3
|
-
async function testInnerTube() {
|
|
4
|
-
const videoId = '_4j1Abt_AiM';
|
|
5
|
-
|
|
6
|
-
const payload = {
|
|
7
|
-
context: {
|
|
8
|
-
client: {
|
|
9
|
-
hl: 'en',
|
|
10
|
-
gl: 'US',
|
|
11
|
-
clientName: 'IOS',
|
|
12
|
-
clientVersion: '19.28.1',
|
|
13
|
-
osName: 'iOS',
|
|
14
|
-
osVersion: '17.5.1',
|
|
15
|
-
deviceMake: 'Apple',
|
|
16
|
-
deviceModel: 'iPhone16,2'
|
|
17
|
-
}
|
|
18
|
-
},
|
|
19
|
-
videoId: videoId
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
23
|
-
method: 'POST',
|
|
24
|
-
headers: {
|
|
25
|
-
'Content-Type': 'application/json',
|
|
26
|
-
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
27
|
-
},
|
|
28
|
-
body: JSON.stringify(payload)
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
const body = await res.body.json();
|
|
32
|
-
console.log('Full JSON Response Keys:', Object.keys(body));
|
|
33
|
-
console.log('Raw JSON String (Truncated):', JSON.stringify(body).slice(0, 1000));
|
|
34
|
-
console.log('Playability:', body.playabilityStatus);
|
|
35
|
-
console.log('Title:', body.videoDetails?.title);
|
|
36
|
-
|
|
37
|
-
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
38
|
-
console.log('Total Formats:', formats.length);
|
|
39
|
-
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
testInnerTube();
|
package/test_api_clients.js
DELETED
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
const undici = require('undici');
|
|
2
|
-
|
|
3
|
-
async function testClient(clientName, clientVersion, userAgent, osName='', osVersion='') {
|
|
4
|
-
const payload = {
|
|
5
|
-
context: {
|
|
6
|
-
client: {
|
|
7
|
-
hl: 'en',
|
|
8
|
-
gl: 'US',
|
|
9
|
-
clientName,
|
|
10
|
-
clientVersion,
|
|
11
|
-
osName,
|
|
12
|
-
osVersion
|
|
13
|
-
}
|
|
14
|
-
},
|
|
15
|
-
videoId: '_4j1Abt_AiM'
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
19
|
-
method: 'POST',
|
|
20
|
-
headers: {
|
|
21
|
-
'Content-Type': 'application/json',
|
|
22
|
-
'User-Agent': userAgent
|
|
23
|
-
},
|
|
24
|
-
body: JSON.stringify(payload)
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
const body = await res.body.json();
|
|
28
|
-
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
29
|
-
console.log(`[${clientName}] Playability:`, body.playabilityStatus?.status, '| Formats:', formats.length);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
async function runAll() {
|
|
33
|
-
await testClient('WEB_EMBED', '1.20230209.00.00', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)');
|
|
34
|
-
await testClient('TVHTML5', '7.20230209.00.00', 'Mozilla/5.0 (Web0S; Linux/SmartTV) AppleWebKit/537.36 (KHTML, like Gecko)');
|
|
35
|
-
await testClient('ANDROID', '17.31.35', 'com.google.android.youtube/17.31.35 (Linux; U; Android 11)', 'Android', '11');
|
|
36
|
-
await testClient('IOS', '19.28.1', 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)', 'iOS', '17.5.1');
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
runAll();
|
package/test_client.js
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
const { StrawClient } = require('./dist/core/client.js');
|
|
2
|
-
|
|
3
|
-
async function test() {
|
|
4
|
-
const client = new StrawClient();
|
|
5
|
-
const payload = {
|
|
6
|
-
context: {
|
|
7
|
-
client: {
|
|
8
|
-
hl: 'en',
|
|
9
|
-
gl: 'US',
|
|
10
|
-
clientName: 'IOS',
|
|
11
|
-
clientVersion: '19.28.1',
|
|
12
|
-
osName: 'iOS',
|
|
13
|
-
osVersion: '17.5.1',
|
|
14
|
-
deviceMake: 'Apple',
|
|
15
|
-
deviceModel: 'iPhone16,2'
|
|
16
|
-
}
|
|
17
|
-
},
|
|
18
|
-
videoId: '_4j1Abt_AiM'
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
const res = await client.request('https://www.youtube.com/youtubei/v1/player', {
|
|
22
|
-
method: 'POST',
|
|
23
|
-
headers: {
|
|
24
|
-
'Content-Type': 'application/json',
|
|
25
|
-
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
26
|
-
},
|
|
27
|
-
body: JSON.stringify(payload)
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
const data = await res.json();
|
|
31
|
-
console.log(Object.keys(data));
|
|
32
|
-
if (data.playabilityStatus) {
|
|
33
|
-
console.log('Playability:', data.playabilityStatus);
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
test();
|
package/test_embed.js
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
const undici = require('undici');
|
|
2
|
-
|
|
3
|
-
async function testEmbed() {
|
|
4
|
-
const url = 'https://www.youtube.com/embed/_4j1Abt_AiM';
|
|
5
|
-
const res = await undici.request(url, {
|
|
6
|
-
headers: {
|
|
7
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
|
8
|
-
'Accept-Language': 'en-US,en;q=0.9',
|
|
9
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
10
|
-
}
|
|
11
|
-
});
|
|
12
|
-
const html = await res.body.text();
|
|
13
|
-
|
|
14
|
-
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
15
|
-
const match = html.match(regex);
|
|
16
|
-
if (match) {
|
|
17
|
-
const data = JSON.parse(match[1]);
|
|
18
|
-
const formats = [...(data.streamingData?.formats || []), ...(data.streamingData?.adaptiveFormats || [])];
|
|
19
|
-
console.log('Embed playability:', data.playabilityStatus?.status);
|
|
20
|
-
console.log('Formats found:', formats.length);
|
|
21
|
-
} else {
|
|
22
|
-
console.log('No ytInitialPlayerResponse found in embed HTML');
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
testEmbed();
|
package/test_html.js
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
const undici = require('undici');
|
|
2
|
-
|
|
3
|
-
async function testHtml() {
|
|
4
|
-
const url = 'https://www.youtube.com/watch?v=_4j1Abt_AiM';
|
|
5
|
-
const res = await undici.request(url, {
|
|
6
|
-
method: 'GET',
|
|
7
|
-
headers: {
|
|
8
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0',
|
|
9
|
-
'Accept-Language': 'en-US,en;q=0.9',
|
|
10
|
-
'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'
|
|
11
|
-
}
|
|
12
|
-
});
|
|
13
|
-
|
|
14
|
-
const html = await res.body.text();
|
|
15
|
-
const match = html.match(/ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/);
|
|
16
|
-
if (match) {
|
|
17
|
-
const data = JSON.parse(match[1]);
|
|
18
|
-
const formats = [...(data.streamingData?.formats || []), ...(data.streamingData?.adaptiveFormats || [])];
|
|
19
|
-
console.log('Got HTML Response with Player:', data.playabilityStatus?.status);
|
|
20
|
-
console.log('Formats:', formats.length);
|
|
21
|
-
} else {
|
|
22
|
-
console.log('No ytInitialPlayerResponse found in direct HTML fetching.');
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
testHtml();
|
package/test_visitor.js
DELETED
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
const undici = require('undici');
|
|
2
|
-
|
|
3
|
-
async function testVisitor() {
|
|
4
|
-
const videoId = '_4j1Abt_AiM';
|
|
5
|
-
const url = `https://www.youtube.com/watch?v=${videoId}`;
|
|
6
|
-
|
|
7
|
-
const htmlRes = await undici.request(url, {
|
|
8
|
-
method: 'GET',
|
|
9
|
-
headers: {
|
|
10
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/115.0.0.0 Safari/537.36',
|
|
11
|
-
'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'
|
|
12
|
-
}
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
const html = await htmlRes.body.text();
|
|
16
|
-
|
|
17
|
-
let visitorData = '';
|
|
18
|
-
const match = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
|
|
19
|
-
if (match) visitorData = match[1];
|
|
20
|
-
|
|
21
|
-
console.log('Got Visitor Data:', visitorData);
|
|
22
|
-
|
|
23
|
-
const payload = {
|
|
24
|
-
context: {
|
|
25
|
-
client: {
|
|
26
|
-
hl: 'en',
|
|
27
|
-
gl: 'US',
|
|
28
|
-
clientName: 'IOS',
|
|
29
|
-
clientVersion: '19.28.1',
|
|
30
|
-
osName: 'iOS',
|
|
31
|
-
osVersion: '17.5.1',
|
|
32
|
-
deviceMake: 'Apple',
|
|
33
|
-
deviceModel: 'iPhone16,2',
|
|
34
|
-
visitorData: visitorData
|
|
35
|
-
}
|
|
36
|
-
},
|
|
37
|
-
videoId: videoId
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
41
|
-
method: 'POST',
|
|
42
|
-
headers: {
|
|
43
|
-
'Content-Type': 'application/json',
|
|
44
|
-
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
45
|
-
},
|
|
46
|
-
body: JSON.stringify(payload)
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
const body = await res.body.json();
|
|
50
|
-
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
51
|
-
|
|
52
|
-
console.log('Target Playability:', body.playabilityStatus?.status);
|
|
53
|
-
console.log('Target Formats:', formats.length);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
testVisitor();
|
package/test_vr.js
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
const undici = require('undici');
|
|
2
|
-
|
|
3
|
-
async function testVR() {
|
|
4
|
-
const payload = {
|
|
5
|
-
context: {
|
|
6
|
-
client: {
|
|
7
|
-
clientName: 'ANDROID_TESTSUITE',
|
|
8
|
-
clientVersion: '1.9',
|
|
9
|
-
androidSdkVersion: 30,
|
|
10
|
-
hl: 'en',
|
|
11
|
-
gl: 'US',
|
|
12
|
-
utcOffsetMinutes: 0
|
|
13
|
-
}
|
|
14
|
-
},
|
|
15
|
-
videoId: '_4j1Abt_AiM'
|
|
16
|
-
};
|
|
17
|
-
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
18
|
-
method: 'POST',
|
|
19
|
-
headers: { 'Content-Type': 'application/json', 'User-Agent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11)' },
|
|
20
|
-
body: JSON.stringify(payload)
|
|
21
|
-
});
|
|
22
|
-
const body = await res.body.json();
|
|
23
|
-
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
24
|
-
console.log('Playability:', body.playabilityStatus?.status);
|
|
25
|
-
console.log('Formats:', formats.length);
|
|
26
|
-
}
|
|
27
|
-
testVR();
|
package/test_yt.js
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
const straw = require('./dist/index.js');
|
|
2
|
-
|
|
3
|
-
async function run() {
|
|
4
|
-
console.time('YouTube Scrape');
|
|
5
|
-
const yt = new straw.YouTubeScraper();
|
|
6
|
-
try {
|
|
7
|
-
const res = await yt.scrapeVideo('https://youtu.be/_4j1Abt_AiM?si=qJY_gv4F_adBYMYP');
|
|
8
|
-
console.log('Title:', res.title);
|
|
9
|
-
console.log('Formats:', res.formats.length);
|
|
10
|
-
console.log('First format URL (truncated):', res.formats[0]?.url?.substring(0, 100));
|
|
11
|
-
} catch (e) {
|
|
12
|
-
console.error('Scrape failed:', e);
|
|
13
|
-
}
|
|
14
|
-
console.timeEnd('YouTube Scrape');
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
run();
|