@definisi/vidsrc-scraper 1.0.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/scraper.js +63 -64
- package/dist/types.d.ts +1 -7
- package/package.json +4 -4
package/dist/scraper.js
CHANGED
|
@@ -1,10 +1,23 @@
|
|
|
1
|
-
import
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import https from 'https';
|
|
3
|
+
const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0';
|
|
2
4
|
const DEFAULT_OPTIONS = {
|
|
3
|
-
timeout:
|
|
4
|
-
headless: true,
|
|
5
|
+
timeout: 30000,
|
|
5
6
|
cacheTtl: 900,
|
|
6
7
|
};
|
|
7
8
|
const cache = new Map();
|
|
9
|
+
function createClient(timeout) {
|
|
10
|
+
return axios.create({
|
|
11
|
+
httpsAgent: new https.Agent({ rejectUnauthorized: false }),
|
|
12
|
+
headers: {
|
|
13
|
+
'User-Agent': UA,
|
|
14
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
15
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
16
|
+
'Accept-Encoding': 'identity',
|
|
17
|
+
},
|
|
18
|
+
timeout,
|
|
19
|
+
});
|
|
20
|
+
}
|
|
8
21
|
function getCacheKey(tmdbId, type, season, episode) {
|
|
9
22
|
return `${type}-${tmdbId}-${season || ''}-${episode || ''}`;
|
|
10
23
|
}
|
|
@@ -20,28 +33,18 @@ function setCache(key, data) {
|
|
|
20
33
|
cache.set(key, { data, timestamp: Date.now() });
|
|
21
34
|
}
|
|
22
35
|
function buildEmbedUrl(tmdbId, type, season, episode) {
|
|
23
|
-
const base = 'https://vidsrc.
|
|
36
|
+
const base = 'https://vidsrc-embed.ru/embed';
|
|
24
37
|
if (type === 'tv') {
|
|
25
38
|
return `${base}/tv/${tmdbId}/${season}/${episode}`;
|
|
26
39
|
}
|
|
27
40
|
return `${base}/movie/${tmdbId}`;
|
|
28
41
|
}
|
|
29
|
-
function
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
try {
|
|
36
|
-
const btn = await frame.$(sel);
|
|
37
|
-
if (btn) {
|
|
38
|
-
await btn.click();
|
|
39
|
-
return true;
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
catch { }
|
|
43
|
-
}
|
|
44
|
-
return false;
|
|
42
|
+
function resolveM3u8Url(url) {
|
|
43
|
+
// Replace domain placeholders {v1} through {v5} with cloudnestra.com
|
|
44
|
+
return url
|
|
45
|
+
.split(' or ')[0]
|
|
46
|
+
.trim()
|
|
47
|
+
.replace(/\{v[1-5]\}/g, 'cloudnestra.com');
|
|
45
48
|
}
|
|
46
49
|
export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episode = null, options = {}) {
|
|
47
50
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
@@ -50,58 +53,54 @@ export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episod
|
|
|
50
53
|
if (cached) {
|
|
51
54
|
return cached;
|
|
52
55
|
}
|
|
53
|
-
const
|
|
54
|
-
const context = await browser.newContext({
|
|
55
|
-
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
|
56
|
-
viewport: { width: 1280, height: 720 },
|
|
57
|
-
});
|
|
58
|
-
const page = await context.newPage();
|
|
56
|
+
const client = createClient(opts.timeout);
|
|
59
57
|
let hlsUrl = null;
|
|
60
58
|
const subtitles = [];
|
|
61
|
-
// Capture network responses
|
|
62
|
-
context.on('response', (response) => {
|
|
63
|
-
const url = response.url();
|
|
64
|
-
if (isValidM3u8Url(url) && !hlsUrl) {
|
|
65
|
-
hlsUrl = url;
|
|
66
|
-
}
|
|
67
|
-
if ((url.includes('.vtt') || url.includes('.srt')) && !subtitles.includes(url)) {
|
|
68
|
-
subtitles.push(url);
|
|
69
|
-
}
|
|
70
|
-
});
|
|
71
59
|
try {
|
|
60
|
+
// Step 1: Get embed page
|
|
72
61
|
const embedUrl = buildEmbedUrl(tmdbId, type, season, episode);
|
|
73
|
-
await
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
62
|
+
const embedRes = await client.get(embedUrl);
|
|
63
|
+
// Step 2: Extract cloudnestra RCP iframe URL
|
|
64
|
+
const rcpMatch = embedRes.data.match(/src=["']((?:https?:)?\/\/[^"']*cloudnestra\.com\/rcp\/[^"']+)["']/i);
|
|
65
|
+
if (!rcpMatch) {
|
|
66
|
+
throw new Error('No RCP iframe found in embed page');
|
|
67
|
+
}
|
|
68
|
+
let rcpUrl = rcpMatch[1];
|
|
69
|
+
if (rcpUrl.startsWith('//'))
|
|
70
|
+
rcpUrl = 'https:' + rcpUrl;
|
|
71
|
+
// Step 3: Get RCP page
|
|
72
|
+
const rcpRes = await client.get(rcpUrl, {
|
|
73
|
+
headers: { Referer: embedUrl },
|
|
74
|
+
});
|
|
75
|
+
// Step 4: Extract prorcp hash from loadIframe function
|
|
76
|
+
const prorcpMatch = rcpRes.data.match(/\/prorcp\/([a-zA-Z0-9=+/]+)/);
|
|
77
|
+
if (!prorcpMatch) {
|
|
78
|
+
throw new Error('No prorcp hash found in RCP page');
|
|
79
|
+
}
|
|
80
|
+
const prorcpHash = prorcpMatch[1];
|
|
81
|
+
const rcpHost = new URL(rcpUrl).origin;
|
|
82
|
+
// Step 5: Get prorcp page
|
|
83
|
+
const prorcpUrl = `${rcpHost}/prorcp/${prorcpHash}`;
|
|
84
|
+
const prorcpRes = await client.get(prorcpUrl, {
|
|
85
|
+
headers: { Referer: rcpUrl },
|
|
86
|
+
});
|
|
87
|
+
// Step 6: Extract M3U8 file URL
|
|
88
|
+
const fileMatch = prorcpRes.data.match(/file:\s*["']([^"']+)["']/);
|
|
89
|
+
if (!fileMatch) {
|
|
90
|
+
throw new Error('No file URL found in prorcp page');
|
|
91
|
+
}
|
|
92
|
+
// Step 7: Resolve domain placeholders
|
|
93
|
+
hlsUrl = resolveM3u8Url(fileMatch[1]);
|
|
94
|
+
// Extract subtitles if present
|
|
95
|
+
const subMatches = prorcpRes.data.matchAll(/["'](https?:\/\/[^"']+\.(?:vtt|srt))["']/gi);
|
|
96
|
+
for (const match of subMatches) {
|
|
97
|
+
if (!subtitles.includes(match[1])) {
|
|
98
|
+
subtitles.push(match[1]);
|
|
95
99
|
}
|
|
96
100
|
}
|
|
97
101
|
}
|
|
98
102
|
catch (error) {
|
|
99
|
-
|
|
100
|
-
console.error(`[vidsrc] ${error.message}`);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
finally {
|
|
104
|
-
await browser.close();
|
|
103
|
+
console.error(`[vidsrc] ${error.message}`);
|
|
105
104
|
}
|
|
106
105
|
const result = {
|
|
107
106
|
tmdbId,
|
package/dist/types.d.ts
CHANGED
|
@@ -1,14 +1,8 @@
|
|
|
1
1
|
export interface ScrapeOptions {
|
|
2
|
-
/**
|
|
2
|
+
/** Request timeout in ms (default: 30000) */
|
|
3
3
|
timeout?: number;
|
|
4
4
|
/** Cache TTL in seconds (default: 900) */
|
|
5
5
|
cacheTtl?: number;
|
|
6
|
-
/**
|
|
7
|
-
* Run browser in headless mode (default: true)
|
|
8
|
-
* NOTE: VidSrc now uses Cloudflare Turnstile protection.
|
|
9
|
-
* Set to false to allow manual CAPTCHA solving in visible browser.
|
|
10
|
-
*/
|
|
11
|
-
headless?: boolean;
|
|
12
6
|
}
|
|
13
7
|
export interface ScrapeResult {
|
|
14
8
|
tmdbId: string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@definisi/vidsrc-scraper",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "Extract HLS streaming URLs from VidSrc",
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "Extract HLS streaming URLs from VidSrc using axios",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"tmdb"
|
|
27
27
|
],
|
|
28
28
|
"license": "MIT",
|
|
29
|
-
"
|
|
30
|
-
"
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"axios": "^1.6.0"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^22.0.0",
|