@zetagoaurum-dev/straw 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/dist/index.js +46 -8
- package/dist/index.mjs +46 -8
- package/package.json +1 -1
- package/release.bat +4 -0
- package/src/scrapers/youtube.ts +50 -9
- package/straw/youtube.py +49 -9
- package/test_api.js +42 -0
- package/test_api_clients.js +39 -0
- package/test_client.js +37 -0
- package/test_embed.js +26 -0
- package/test_html.js +26 -0
- package/test_visitor.js +56 -0
- package/test_vr.js +27 -0
- package/test_yt.js +17 -0
- package/straw/__pycache__/__init__.cpython-311.pyc +0 -0
- package/straw/__pycache__/client.cpython-311.pyc +0 -0
- package/straw/__pycache__/helpers.cpython-311.pyc +0 -0
- package/straw/__pycache__/media.cpython-311.pyc +0 -0
- package/straw/__pycache__/web.cpython-311.pyc +0 -0
- package/straw/__pycache__/youtube.cpython-311.pyc +0 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.1.1] "Performance Patch" - 2026-02-27
|
|
6
|
+
- **Perf:** Re-engineered the YouTube scraper in Node.js and Python to use the `IOS` InnerTube API directly, injecting localized `visitorData` tokens to seamlessly bypass bot checks and cipher encryption. Video format lists are returned instantaneously for optimal downloading infrastructure.
|
|
7
|
+
- **Fix:** Fixed HTML parser blocking on high-volume deployed servers by upgrading to the direct `POST /youtubei/v1/player` endpoints.
|
|
8
|
+
|
|
5
9
|
## [1.1.0] - "Milk Tea" Release - 2026-02-27
|
|
6
10
|
|
|
7
11
|
### Changed
|
package/dist/index.js
CHANGED
|
@@ -185,19 +185,57 @@ var YouTubeScraper = class {
|
|
|
185
185
|
* Parses the ytInitialPlayerResponse object embedded in the watch HTML.
|
|
186
186
|
*/
|
|
187
187
|
async scrapeVideo(url) {
|
|
188
|
+
const videoIdMatch = url.match(/(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))([^"&?\/\s]{11})/);
|
|
189
|
+
if (!videoIdMatch || !videoIdMatch[1]) {
|
|
190
|
+
throw new Error("Invalid YouTube URL");
|
|
191
|
+
}
|
|
192
|
+
const videoId = videoIdMatch[1];
|
|
188
193
|
const html = await this.client.getText(url, {
|
|
189
|
-
headers: {
|
|
190
|
-
"Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430"
|
|
191
|
-
}
|
|
194
|
+
headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
|
|
192
195
|
});
|
|
193
196
|
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
194
197
|
const match = html.match(regex);
|
|
195
|
-
|
|
196
|
-
|
|
198
|
+
let visitorData = "";
|
|
199
|
+
let details = {};
|
|
200
|
+
if (match && match[1]) {
|
|
201
|
+
const data = JSON.parse(match[1]);
|
|
202
|
+
details = data?.videoDetails || {};
|
|
203
|
+
visitorData = data?.responseContext?.visitorData || "";
|
|
204
|
+
}
|
|
205
|
+
if (!visitorData) {
|
|
206
|
+
const vdMatch = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
|
|
207
|
+
if (vdMatch) visitorData = vdMatch[1];
|
|
208
|
+
}
|
|
209
|
+
const payload = {
|
|
210
|
+
context: {
|
|
211
|
+
client: {
|
|
212
|
+
hl: "en",
|
|
213
|
+
gl: "US",
|
|
214
|
+
clientName: "IOS",
|
|
215
|
+
clientVersion: "19.28.1",
|
|
216
|
+
osName: "iOS",
|
|
217
|
+
osVersion: "17.5.1",
|
|
218
|
+
deviceMake: "Apple",
|
|
219
|
+
deviceModel: "iPhone16,2",
|
|
220
|
+
visitorData
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
videoId
|
|
224
|
+
};
|
|
225
|
+
const res = await this.client.request("https://www.youtube.com/youtubei/v1/player", {
|
|
226
|
+
method: "POST",
|
|
227
|
+
headers: {
|
|
228
|
+
"Accept": "application/json",
|
|
229
|
+
"Content-Type": "application/json",
|
|
230
|
+
"User-Agent": "com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)"
|
|
231
|
+
},
|
|
232
|
+
body: JSON.stringify(payload)
|
|
233
|
+
});
|
|
234
|
+
const apiData = await res.json();
|
|
235
|
+
if (!details.title) {
|
|
236
|
+
details = apiData?.videoDetails || {};
|
|
197
237
|
}
|
|
198
|
-
const
|
|
199
|
-
const details = data?.videoDetails;
|
|
200
|
-
const streamingData = data?.streamingData;
|
|
238
|
+
const streamingData = apiData?.streamingData;
|
|
201
239
|
if (!details) {
|
|
202
240
|
throw new Error("Video details not found inside player response.");
|
|
203
241
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -145,19 +145,57 @@ var YouTubeScraper = class {
|
|
|
145
145
|
* Parses the ytInitialPlayerResponse object embedded in the watch HTML.
|
|
146
146
|
*/
|
|
147
147
|
async scrapeVideo(url) {
|
|
148
|
+
const videoIdMatch = url.match(/(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))([^"&?\/\s]{11})/);
|
|
149
|
+
if (!videoIdMatch || !videoIdMatch[1]) {
|
|
150
|
+
throw new Error("Invalid YouTube URL");
|
|
151
|
+
}
|
|
152
|
+
const videoId = videoIdMatch[1];
|
|
148
153
|
const html = await this.client.getText(url, {
|
|
149
|
-
headers: {
|
|
150
|
-
"Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430"
|
|
151
|
-
}
|
|
154
|
+
headers: { "Cookie": "CONSENT=YES+cb.20230501-14-p0.en+FX+430" }
|
|
152
155
|
});
|
|
153
156
|
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
154
157
|
const match = html.match(regex);
|
|
155
|
-
|
|
156
|
-
|
|
158
|
+
let visitorData = "";
|
|
159
|
+
let details = {};
|
|
160
|
+
if (match && match[1]) {
|
|
161
|
+
const data = JSON.parse(match[1]);
|
|
162
|
+
details = data?.videoDetails || {};
|
|
163
|
+
visitorData = data?.responseContext?.visitorData || "";
|
|
164
|
+
}
|
|
165
|
+
if (!visitorData) {
|
|
166
|
+
const vdMatch = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
|
|
167
|
+
if (vdMatch) visitorData = vdMatch[1];
|
|
168
|
+
}
|
|
169
|
+
const payload = {
|
|
170
|
+
context: {
|
|
171
|
+
client: {
|
|
172
|
+
hl: "en",
|
|
173
|
+
gl: "US",
|
|
174
|
+
clientName: "IOS",
|
|
175
|
+
clientVersion: "19.28.1",
|
|
176
|
+
osName: "iOS",
|
|
177
|
+
osVersion: "17.5.1",
|
|
178
|
+
deviceMake: "Apple",
|
|
179
|
+
deviceModel: "iPhone16,2",
|
|
180
|
+
visitorData
|
|
181
|
+
}
|
|
182
|
+
},
|
|
183
|
+
videoId
|
|
184
|
+
};
|
|
185
|
+
const res = await this.client.request("https://www.youtube.com/youtubei/v1/player", {
|
|
186
|
+
method: "POST",
|
|
187
|
+
headers: {
|
|
188
|
+
"Accept": "application/json",
|
|
189
|
+
"Content-Type": "application/json",
|
|
190
|
+
"User-Agent": "com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)"
|
|
191
|
+
},
|
|
192
|
+
body: JSON.stringify(payload)
|
|
193
|
+
});
|
|
194
|
+
const apiData = await res.json();
|
|
195
|
+
if (!details.title) {
|
|
196
|
+
details = apiData?.videoDetails || {};
|
|
157
197
|
}
|
|
158
|
-
const
|
|
159
|
-
const details = data?.videoDetails;
|
|
160
|
-
const streamingData = data?.streamingData;
|
|
198
|
+
const streamingData = apiData?.streamingData;
|
|
161
199
|
if (!details) {
|
|
162
200
|
throw new Error("Video details not found inside player response.");
|
|
163
201
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zetagoaurum-dev/straw",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Enterprise-grade unified JS/TS and Python scraping library for Web, YouTube, and Media (Images, Audio, Video, Documents)",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
package/release.bat
ADDED
package/src/scrapers/youtube.ts
CHANGED
|
@@ -33,23 +33,64 @@ export class YouTubeScraper {
|
|
|
33
33
|
* Parses the ytInitialPlayerResponse object embedded in the watch HTML.
|
|
34
34
|
*/
|
|
35
35
|
public async scrapeVideo(url: string): Promise<YouTubeResult> {
|
|
36
|
+
const videoIdMatch = url.match(/(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))([^"&?\/\s]{11})/);
|
|
37
|
+
if (!videoIdMatch || !videoIdMatch[1]) {
|
|
38
|
+
throw new Error('Invalid YouTube URL');
|
|
39
|
+
}
|
|
40
|
+
const videoId = videoIdMatch[1];
|
|
41
|
+
|
|
36
42
|
const html = await this.client.getText(url, {
|
|
37
|
-
headers: {
|
|
38
|
-
'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'
|
|
39
|
-
}
|
|
43
|
+
headers: { 'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430' }
|
|
40
44
|
});
|
|
41
45
|
|
|
42
|
-
// Find ytInitialPlayerResponse JSON fragment in the HTML
|
|
43
46
|
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
44
47
|
const match = html.match(regex);
|
|
48
|
+
let visitorData = '';
|
|
49
|
+
let details: any = {};
|
|
50
|
+
|
|
51
|
+
if (match && match[1]) {
|
|
52
|
+
const data = JSON.parse(match[1]);
|
|
53
|
+
details = data?.videoDetails || {};
|
|
54
|
+
visitorData = data?.responseContext?.visitorData || '';
|
|
55
|
+
}
|
|
45
56
|
|
|
46
|
-
if (!
|
|
47
|
-
|
|
57
|
+
if (!visitorData) {
|
|
58
|
+
const vdMatch = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
|
|
59
|
+
if (vdMatch) visitorData = vdMatch[1];
|
|
48
60
|
}
|
|
49
61
|
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
|
|
62
|
+
const payload = {
|
|
63
|
+
context: {
|
|
64
|
+
client: {
|
|
65
|
+
hl: 'en',
|
|
66
|
+
gl: 'US',
|
|
67
|
+
clientName: 'IOS',
|
|
68
|
+
clientVersion: '19.28.1',
|
|
69
|
+
osName: 'iOS',
|
|
70
|
+
osVersion: '17.5.1',
|
|
71
|
+
deviceMake: 'Apple',
|
|
72
|
+
deviceModel: 'iPhone16,2',
|
|
73
|
+
visitorData: visitorData
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
videoId: videoId
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const res = await this.client.request('https://www.youtube.com/youtubei/v1/player', {
|
|
80
|
+
method: 'POST',
|
|
81
|
+
headers: {
|
|
82
|
+
'Accept': 'application/json',
|
|
83
|
+
'Content-Type': 'application/json',
|
|
84
|
+
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
85
|
+
},
|
|
86
|
+
body: JSON.stringify(payload)
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const apiData = await res.json() as any;
|
|
90
|
+
if (!details.title) {
|
|
91
|
+
details = apiData?.videoDetails || {};
|
|
92
|
+
}
|
|
93
|
+
const streamingData = apiData?.streamingData;
|
|
53
94
|
|
|
54
95
|
if (!details) {
|
|
55
96
|
throw new Error('Video details not found inside player response.');
|
package/straw/youtube.py
CHANGED
|
@@ -8,18 +8,58 @@ class YouTubeScraper:
|
|
|
8
8
|
self.client = StrawClient(**client_options)
|
|
9
9
|
|
|
10
10
|
async def scrape_video(self, url: str) -> Dict[str, Any]:
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
match = re.search(r'(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))([^"&?\/\s]{11})', url)
|
|
12
|
+
if not match:
|
|
13
|
+
raise Exception("Invalid YouTube URL")
|
|
14
|
+
video_id = match.group(1)
|
|
15
|
+
|
|
16
|
+
headers = {'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'}
|
|
14
17
|
html = await self.client.get_text(url, headers=headers)
|
|
18
|
+
|
|
19
|
+
visitor_data = ""
|
|
20
|
+
details = {}
|
|
21
|
+
|
|
22
|
+
player_match = re.search(r'ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)', html)
|
|
23
|
+
if player_match:
|
|
24
|
+
data_html = json.loads(player_match.group(1))
|
|
25
|
+
details = data_html.get('videoDetails', {})
|
|
26
|
+
visitor_data = data_html.get('responseContext', {}).get('visitorData', '')
|
|
27
|
+
|
|
28
|
+
if not visitor_data:
|
|
29
|
+
vd_match = re.search(r'"visitorData"\s*:\s*"([^"]+)"', html)
|
|
30
|
+
if vd_match:
|
|
31
|
+
visitor_data = vd_match.group(1)
|
|
15
32
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
33
|
+
payload = {
|
|
34
|
+
"context": {
|
|
35
|
+
"client": {
|
|
36
|
+
"hl": "en",
|
|
37
|
+
"gl": "US",
|
|
38
|
+
"clientName": "IOS",
|
|
39
|
+
"clientVersion": "19.28.1",
|
|
40
|
+
"osName": "iOS",
|
|
41
|
+
"osVersion": "17.5.1",
|
|
42
|
+
"deviceMake": "Apple",
|
|
43
|
+
"deviceModel": "iPhone16,2",
|
|
44
|
+
"visitorData": visitor_data
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"videoId": video_id
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
api_headers = {
|
|
51
|
+
'Accept': 'application/json',
|
|
52
|
+
'Content-Type': 'application/json',
|
|
53
|
+
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
54
|
+
}
|
|
19
55
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
56
|
+
response = await self.client.request('POST', 'https://www.youtube.com/youtubei/v1/player', json=payload, headers=api_headers)
|
|
57
|
+
api_data = response.json()
|
|
58
|
+
|
|
59
|
+
if not details.get('title'):
|
|
60
|
+
details = api_data.get('videoDetails', {})
|
|
61
|
+
|
|
62
|
+
streaming_data = api_data.get('streamingData', {})
|
|
23
63
|
|
|
24
64
|
if not details:
|
|
25
65
|
raise Exception("Video details not found inside player response.")
|
package/test_api.js
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
const undici = require('undici');
|
|
2
|
+
|
|
3
|
+
async function testInnerTube() {
|
|
4
|
+
const videoId = '_4j1Abt_AiM';
|
|
5
|
+
|
|
6
|
+
const payload = {
|
|
7
|
+
context: {
|
|
8
|
+
client: {
|
|
9
|
+
hl: 'en',
|
|
10
|
+
gl: 'US',
|
|
11
|
+
clientName: 'IOS',
|
|
12
|
+
clientVersion: '19.28.1',
|
|
13
|
+
osName: 'iOS',
|
|
14
|
+
osVersion: '17.5.1',
|
|
15
|
+
deviceMake: 'Apple',
|
|
16
|
+
deviceModel: 'iPhone16,2'
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
videoId: videoId
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
23
|
+
method: 'POST',
|
|
24
|
+
headers: {
|
|
25
|
+
'Content-Type': 'application/json',
|
|
26
|
+
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
27
|
+
},
|
|
28
|
+
body: JSON.stringify(payload)
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
const body = await res.body.json();
|
|
32
|
+
console.log('Full JSON Response Keys:', Object.keys(body));
|
|
33
|
+
console.log('Raw JSON String (Truncated):', JSON.stringify(body).slice(0, 1000));
|
|
34
|
+
console.log('Playability:', body.playabilityStatus);
|
|
35
|
+
console.log('Title:', body.videoDetails?.title);
|
|
36
|
+
|
|
37
|
+
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
38
|
+
console.log('Total Formats:', formats.length);
|
|
39
|
+
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
testInnerTube();
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
const undici = require('undici');
|
|
2
|
+
|
|
3
|
+
async function testClient(clientName, clientVersion, userAgent, osName='', osVersion='') {
|
|
4
|
+
const payload = {
|
|
5
|
+
context: {
|
|
6
|
+
client: {
|
|
7
|
+
hl: 'en',
|
|
8
|
+
gl: 'US',
|
|
9
|
+
clientName,
|
|
10
|
+
clientVersion,
|
|
11
|
+
osName,
|
|
12
|
+
osVersion
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
videoId: '_4j1Abt_AiM'
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
19
|
+
method: 'POST',
|
|
20
|
+
headers: {
|
|
21
|
+
'Content-Type': 'application/json',
|
|
22
|
+
'User-Agent': userAgent
|
|
23
|
+
},
|
|
24
|
+
body: JSON.stringify(payload)
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
const body = await res.body.json();
|
|
28
|
+
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
29
|
+
console.log(`[${clientName}] Playability:`, body.playabilityStatus?.status, '| Formats:', formats.length);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function runAll() {
|
|
33
|
+
await testClient('WEB_EMBED', '1.20230209.00.00', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)');
|
|
34
|
+
await testClient('TVHTML5', '7.20230209.00.00', 'Mozilla/5.0 (Web0S; Linux/SmartTV) AppleWebKit/537.36 (KHTML, like Gecko)');
|
|
35
|
+
await testClient('ANDROID', '17.31.35', 'com.google.android.youtube/17.31.35 (Linux; U; Android 11)', 'Android', '11');
|
|
36
|
+
await testClient('IOS', '19.28.1', 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)', 'iOS', '17.5.1');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
runAll();
|
package/test_client.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
const { StrawClient } = require('./dist/core/client.js');
|
|
2
|
+
|
|
3
|
+
async function test() {
|
|
4
|
+
const client = new StrawClient();
|
|
5
|
+
const payload = {
|
|
6
|
+
context: {
|
|
7
|
+
client: {
|
|
8
|
+
hl: 'en',
|
|
9
|
+
gl: 'US',
|
|
10
|
+
clientName: 'IOS',
|
|
11
|
+
clientVersion: '19.28.1',
|
|
12
|
+
osName: 'iOS',
|
|
13
|
+
osVersion: '17.5.1',
|
|
14
|
+
deviceMake: 'Apple',
|
|
15
|
+
deviceModel: 'iPhone16,2'
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
videoId: '_4j1Abt_AiM'
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const res = await client.request('https://www.youtube.com/youtubei/v1/player', {
|
|
22
|
+
method: 'POST',
|
|
23
|
+
headers: {
|
|
24
|
+
'Content-Type': 'application/json',
|
|
25
|
+
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
26
|
+
},
|
|
27
|
+
body: JSON.stringify(payload)
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const data = await res.json();
|
|
31
|
+
console.log(Object.keys(data));
|
|
32
|
+
if (data.playabilityStatus) {
|
|
33
|
+
console.log('Playability:', data.playabilityStatus);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
test();
|
package/test_embed.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const undici = require('undici');
|
|
2
|
+
|
|
3
|
+
async function testEmbed() {
|
|
4
|
+
const url = 'https://www.youtube.com/embed/_4j1Abt_AiM';
|
|
5
|
+
const res = await undici.request(url, {
|
|
6
|
+
headers: {
|
|
7
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
|
8
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
9
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
const html = await res.body.text();
|
|
13
|
+
|
|
14
|
+
const regex = /ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/;
|
|
15
|
+
const match = html.match(regex);
|
|
16
|
+
if (match) {
|
|
17
|
+
const data = JSON.parse(match[1]);
|
|
18
|
+
const formats = [...(data.streamingData?.formats || []), ...(data.streamingData?.adaptiveFormats || [])];
|
|
19
|
+
console.log('Embed playability:', data.playabilityStatus?.status);
|
|
20
|
+
console.log('Formats found:', formats.length);
|
|
21
|
+
} else {
|
|
22
|
+
console.log('No ytInitialPlayerResponse found in embed HTML');
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
testEmbed();
|
package/test_html.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const undici = require('undici');
|
|
2
|
+
|
|
3
|
+
async function testHtml() {
|
|
4
|
+
const url = 'https://www.youtube.com/watch?v=_4j1Abt_AiM';
|
|
5
|
+
const res = await undici.request(url, {
|
|
6
|
+
method: 'GET',
|
|
7
|
+
headers: {
|
|
8
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0',
|
|
9
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
10
|
+
'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'
|
|
11
|
+
}
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
const html = await res.body.text();
|
|
15
|
+
const match = html.match(/ytInitialPlayerResponse\s*=\s*({.*?});(?:var|<\/script>)/);
|
|
16
|
+
if (match) {
|
|
17
|
+
const data = JSON.parse(match[1]);
|
|
18
|
+
const formats = [...(data.streamingData?.formats || []), ...(data.streamingData?.adaptiveFormats || [])];
|
|
19
|
+
console.log('Got HTML Response with Player:', data.playabilityStatus?.status);
|
|
20
|
+
console.log('Formats:', formats.length);
|
|
21
|
+
} else {
|
|
22
|
+
console.log('No ytInitialPlayerResponse found in direct HTML fetching.');
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
testHtml();
|
package/test_visitor.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
const undici = require('undici');
|
|
2
|
+
|
|
3
|
+
async function testVisitor() {
|
|
4
|
+
const videoId = '_4j1Abt_AiM';
|
|
5
|
+
const url = `https://www.youtube.com/watch?v=${videoId}`;
|
|
6
|
+
|
|
7
|
+
const htmlRes = await undici.request(url, {
|
|
8
|
+
method: 'GET',
|
|
9
|
+
headers: {
|
|
10
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/115.0.0.0 Safari/537.36',
|
|
11
|
+
'Cookie': 'CONSENT=YES+cb.20230501-14-p0.en+FX+430'
|
|
12
|
+
}
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
const html = await htmlRes.body.text();
|
|
16
|
+
|
|
17
|
+
let visitorData = '';
|
|
18
|
+
const match = html.match(/"visitorData"\s*:\s*"([^"]+)"/);
|
|
19
|
+
if (match) visitorData = match[1];
|
|
20
|
+
|
|
21
|
+
console.log('Got Visitor Data:', visitorData);
|
|
22
|
+
|
|
23
|
+
const payload = {
|
|
24
|
+
context: {
|
|
25
|
+
client: {
|
|
26
|
+
hl: 'en',
|
|
27
|
+
gl: 'US',
|
|
28
|
+
clientName: 'IOS',
|
|
29
|
+
clientVersion: '19.28.1',
|
|
30
|
+
osName: 'iOS',
|
|
31
|
+
osVersion: '17.5.1',
|
|
32
|
+
deviceMake: 'Apple',
|
|
33
|
+
deviceModel: 'iPhone16,2',
|
|
34
|
+
visitorData: visitorData
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
videoId: videoId
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
41
|
+
method: 'POST',
|
|
42
|
+
headers: {
|
|
43
|
+
'Content-Type': 'application/json',
|
|
44
|
+
'User-Agent': 'com.google.ios.youtube/19.28.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X; en_US)'
|
|
45
|
+
},
|
|
46
|
+
body: JSON.stringify(payload)
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
const body = await res.body.json();
|
|
50
|
+
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
51
|
+
|
|
52
|
+
console.log('Target Playability:', body.playabilityStatus?.status);
|
|
53
|
+
console.log('Target Formats:', formats.length);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
testVisitor();
|
package/test_vr.js
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const undici = require('undici');
|
|
2
|
+
|
|
3
|
+
async function testVR() {
|
|
4
|
+
const payload = {
|
|
5
|
+
context: {
|
|
6
|
+
client: {
|
|
7
|
+
clientName: 'ANDROID_TESTSUITE',
|
|
8
|
+
clientVersion: '1.9',
|
|
9
|
+
androidSdkVersion: 30,
|
|
10
|
+
hl: 'en',
|
|
11
|
+
gl: 'US',
|
|
12
|
+
utcOffsetMinutes: 0
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
videoId: '_4j1Abt_AiM'
|
|
16
|
+
};
|
|
17
|
+
const res = await undici.request('https://www.youtube.com/youtubei/v1/player', {
|
|
18
|
+
method: 'POST',
|
|
19
|
+
headers: { 'Content-Type': 'application/json', 'User-Agent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11)' },
|
|
20
|
+
body: JSON.stringify(payload)
|
|
21
|
+
});
|
|
22
|
+
const body = await res.body.json();
|
|
23
|
+
const formats = [...(body.streamingData?.formats || []), ...(body.streamingData?.adaptiveFormats || [])];
|
|
24
|
+
console.log('Playability:', body.playabilityStatus?.status);
|
|
25
|
+
console.log('Formats:', formats.length);
|
|
26
|
+
}
|
|
27
|
+
testVR();
|
package/test_yt.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
const straw = require('./dist/index.js');
|
|
2
|
+
|
|
3
|
+
async function run() {
|
|
4
|
+
console.time('YouTube Scrape');
|
|
5
|
+
const yt = new straw.YouTubeScraper();
|
|
6
|
+
try {
|
|
7
|
+
const res = await yt.scrapeVideo('https://youtu.be/_4j1Abt_AiM?si=qJY_gv4F_adBYMYP');
|
|
8
|
+
console.log('Title:', res.title);
|
|
9
|
+
console.log('Formats:', res.formats.length);
|
|
10
|
+
console.log('First format URL (truncated):', res.formats[0]?.url?.substring(0, 100));
|
|
11
|
+
} catch (e) {
|
|
12
|
+
console.error('Scrape failed:', e);
|
|
13
|
+
}
|
|
14
|
+
console.timeEnd('YouTube Scrape');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
run();
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|