@definisi/vidsrc-scraper 1.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -39
- package/dist/scraper.js +65 -62
- package/dist/types.d.ts +2 -0
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
# @definisi/vidsrc-scraper
|
|
2
2
|
|
|
3
|
-
Extract HLS streaming URLs from VidSrc using
|
|
3
|
+
Extract HLS streaming URLs from VidSrc using Playwright.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## ⚠️ Important Notice
|
|
6
|
+
|
|
7
|
+
VidSrc currently uses Cloudflare Turnstile CAPTCHA protection. This means:
|
|
8
|
+
|
|
9
|
+
1. **Headless mode doesn't work** - The scraper must run with a visible browser (`headless: false`)
|
|
10
|
+
2. **Manual CAPTCHA solving may be required** - You may need to complete the Turnstile challenge manually
|
|
11
|
+
3. **Automation is limited** - Fully automated scraping is currently not possible due to bot detection
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
6
14
|
|
|
7
15
|
```bash
|
|
8
|
-
npm install @definisi/vidsrc-scraper
|
|
16
|
+
npm install @definisi/vidsrc-scraper playwright
|
|
17
|
+
npx playwright install firefox
|
|
9
18
|
```
|
|
10
19
|
|
|
11
20
|
## Usage
|
|
@@ -13,42 +22,36 @@ npm install @definisi/vidsrc-scraper
|
|
|
13
22
|
```typescript
|
|
14
23
|
import { scrapeVidsrc } from '@definisi/vidsrc-scraper';
|
|
15
24
|
|
|
16
|
-
//
|
|
17
|
-
const movie = await scrapeVidsrc('27205');
|
|
18
|
-
|
|
19
|
-
// TV Show
|
|
20
|
-
const tv = await scrapeVidsrc('1396', 'tv', '1', '1');
|
|
21
|
-
|
|
22
|
-
// With options
|
|
25
|
+
// Scrape a movie by TMDB ID
|
|
23
26
|
const result = await scrapeVidsrc('27205', 'movie', null, null, {
|
|
24
|
-
timeout:
|
|
25
|
-
|
|
27
|
+
timeout: 120000,
|
|
28
|
+
headless: false, // Must be false for Turnstile
|
|
26
29
|
});
|
|
27
|
-
```
|
|
28
30
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
31
|
+
if (result.success) {
|
|
32
|
+
console.log('HLS URL:', result.hlsUrl);
|
|
33
|
+
console.log('Subtitles:', result.subtitles);
|
|
34
|
+
}
|
|
32
35
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
| options | ScrapeOptions | {} | Optional config |
|
|
36
|
+
// Scrape a TV episode
|
|
37
|
+
const tvResult = await scrapeVidsrc('1399', 'tv', '1', '1', {
|
|
38
|
+
timeout: 120000,
|
|
39
|
+
headless: false,
|
|
40
|
+
});
|
|
41
|
+
```
|
|
40
42
|
|
|
41
|
-
|
|
43
|
+
## Options
|
|
42
44
|
|
|
43
45
|
| Option | Type | Default | Description |
|
|
44
46
|
|--------|------|---------|-------------|
|
|
45
|
-
| timeout | number |
|
|
46
|
-
| cacheTtl | number | 900 | Cache
|
|
47
|
+
| timeout | number | 120000 | Total timeout in milliseconds |
|
|
48
|
+
| cacheTtl | number | 900 | Cache time-to-live in seconds |
|
|
49
|
+
| headless | boolean | false | Run browser in headless mode (not recommended) |
|
|
47
50
|
|
|
48
|
-
|
|
51
|
+
## Result Type
|
|
49
52
|
|
|
50
53
|
```typescript
|
|
51
|
-
{
|
|
54
|
+
interface ScrapeResult {
|
|
52
55
|
tmdbId: string;
|
|
53
56
|
type: 'movie' | 'tv';
|
|
54
57
|
season: string | null;
|
|
@@ -60,20 +63,27 @@ const result = await scrapeVidsrc('27205', 'movie', null, null, {
|
|
|
60
63
|
}
|
|
61
64
|
```
|
|
62
65
|
|
|
63
|
-
|
|
66
|
+
## Playback
|
|
67
|
+
|
|
68
|
+
The HLS URLs require proper headers. Example with VLC:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
vlc "HLS_URL" --http-user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Limitations
|
|
64
75
|
|
|
65
|
-
|
|
76
|
+
- Requires Playwright and Firefox browser
|
|
77
|
+
- Requires visible browser window (headless mode blocked by Turnstile)
|
|
78
|
+
- May require manual CAPTCHA completion
|
|
79
|
+
- Results are cached to reduce CAPTCHA occurrences
|
|
80
|
+
- VidSrc may change their protection at any time
|
|
66
81
|
|
|
67
82
|
## Requirements
|
|
68
83
|
|
|
69
|
-
- Node.js 18+
|
|
84
|
+
- Node.js 18+
|
|
85
|
+
- Playwright
|
|
70
86
|
|
|
71
|
-
##
|
|
87
|
+
## License
|
|
72
88
|
|
|
73
|
-
|
|
74
|
-
# VLC
|
|
75
|
-
vlc "HLS_URL" --http-user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
|
|
76
|
-
|
|
77
|
-
# ffplay
|
|
78
|
-
ffplay -user_agent "Mozilla/5.0" "HLS_URL"
|
|
79
|
-
```
|
|
89
|
+
MIT
|
package/dist/scraper.js
CHANGED
|
@@ -1,10 +1,23 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import https from 'https';
|
|
3
|
+
const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0';
|
|
1
4
|
const DEFAULT_OPTIONS = {
|
|
2
5
|
timeout: 30000,
|
|
3
6
|
cacheTtl: 900,
|
|
4
7
|
};
|
|
5
|
-
const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0';
|
|
6
|
-
const CDN_DOMAINS = ['cloudnestra.com', 'quibblezoomfable.com'];
|
|
7
8
|
const cache = new Map();
|
|
9
|
+
function createClient(timeout) {
|
|
10
|
+
return axios.create({
|
|
11
|
+
httpsAgent: new https.Agent({ rejectUnauthorized: false }),
|
|
12
|
+
headers: {
|
|
13
|
+
'User-Agent': UA,
|
|
14
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
15
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
16
|
+
'Accept-Encoding': 'identity',
|
|
17
|
+
},
|
|
18
|
+
timeout,
|
|
19
|
+
});
|
|
20
|
+
}
|
|
8
21
|
function getCacheKey(tmdbId, type, season, episode) {
|
|
9
22
|
return `${type}-${tmdbId}-${season || ''}-${episode || ''}`;
|
|
10
23
|
}
|
|
@@ -20,84 +33,74 @@ function setCache(key, data) {
|
|
|
20
33
|
cache.set(key, { data, timestamp: Date.now() });
|
|
21
34
|
}
|
|
22
35
|
function buildEmbedUrl(tmdbId, type, season, episode) {
|
|
23
|
-
const base = 'https://
|
|
36
|
+
const base = 'https://vidsrc-embed.ru/embed';
|
|
24
37
|
if (type === 'tv') {
|
|
25
|
-
return `${base}/tv
|
|
26
|
-
}
|
|
27
|
-
return `${base}/movie?tmdb=${tmdbId}`;
|
|
28
|
-
}
|
|
29
|
-
async function fetchWithHeaders(url, referer) {
|
|
30
|
-
const headers = {
|
|
31
|
-
'User-Agent': UA,
|
|
32
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
33
|
-
'Accept-Language': 'en-US,en;q=0.5',
|
|
34
|
-
};
|
|
35
|
-
if (referer) {
|
|
36
|
-
headers['Referer'] = referer;
|
|
37
|
-
}
|
|
38
|
-
const res = await fetch(url, { headers });
|
|
39
|
-
if (!res.ok)
|
|
40
|
-
throw new Error(`HTTP ${res.status}`);
|
|
41
|
-
return res.text();
|
|
42
|
-
}
|
|
43
|
-
async function resolveM3u8Domain(templateUrl) {
|
|
44
|
-
for (const domain of CDN_DOMAINS) {
|
|
45
|
-
const testUrl = templateUrl.replace(/\{v\d+\}/, domain);
|
|
46
|
-
try {
|
|
47
|
-
const res = await fetch(testUrl, {
|
|
48
|
-
method: 'HEAD',
|
|
49
|
-
headers: { 'User-Agent': UA },
|
|
50
|
-
});
|
|
51
|
-
if (res.ok)
|
|
52
|
-
return testUrl;
|
|
53
|
-
}
|
|
54
|
-
catch { }
|
|
38
|
+
return `${base}/tv/${tmdbId}/${season}/${episode}`;
|
|
55
39
|
}
|
|
56
|
-
return
|
|
40
|
+
return `${base}/movie/${tmdbId}`;
|
|
57
41
|
}
|
|
58
|
-
function
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
return subtitles;
|
|
42
|
+
function resolveM3u8Url(url) {
|
|
43
|
+
// Replace domain placeholders {v1} through {v5} with cloudnestra.com
|
|
44
|
+
return url
|
|
45
|
+
.split(' or ')[0]
|
|
46
|
+
.trim()
|
|
47
|
+
.replace(/\{v[1-5]\}/g, 'cloudnestra.com');
|
|
67
48
|
}
|
|
68
49
|
export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episode = null, options = {}) {
|
|
69
50
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
70
51
|
const cacheKey = getCacheKey(tmdbId, type, season, episode);
|
|
71
52
|
const cached = getFromCache(cacheKey, opts.cacheTtl);
|
|
72
|
-
if (cached)
|
|
53
|
+
if (cached) {
|
|
73
54
|
return cached;
|
|
55
|
+
}
|
|
56
|
+
const client = createClient(opts.timeout);
|
|
74
57
|
let hlsUrl = null;
|
|
75
|
-
|
|
58
|
+
const subtitles = [];
|
|
76
59
|
try {
|
|
60
|
+
// Step 1: Get embed page
|
|
77
61
|
const embedUrl = buildEmbedUrl(tmdbId, type, season, episode);
|
|
78
|
-
const
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
62
|
+
const embedRes = await client.get(embedUrl);
|
|
63
|
+
// Step 2: Extract cloudnestra RCP iframe URL
|
|
64
|
+
const rcpMatch = embedRes.data.match(/src=["']((?:https?:)?\/\/[^"']*cloudnestra\.com\/rcp\/[^"']+)["']/i);
|
|
65
|
+
if (!rcpMatch) {
|
|
66
|
+
throw new Error('No RCP iframe found in embed page');
|
|
67
|
+
}
|
|
82
68
|
let rcpUrl = rcpMatch[1];
|
|
83
69
|
if (rcpUrl.startsWith('//'))
|
|
84
70
|
rcpUrl = 'https:' + rcpUrl;
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
const
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
71
|
+
// Step 3: Get RCP page
|
|
72
|
+
const rcpRes = await client.get(rcpUrl, {
|
|
73
|
+
headers: { Referer: embedUrl },
|
|
74
|
+
});
|
|
75
|
+
// Step 4: Extract prorcp hash from loadIframe function
|
|
76
|
+
const prorcpMatch = rcpRes.data.match(/\/prorcp\/([a-zA-Z0-9=+/]+)/);
|
|
77
|
+
if (!prorcpMatch) {
|
|
78
|
+
throw new Error('No prorcp hash found in RCP page');
|
|
79
|
+
}
|
|
80
|
+
const prorcpHash = prorcpMatch[1];
|
|
81
|
+
const rcpHost = new URL(rcpUrl).origin;
|
|
82
|
+
// Step 5: Get prorcp page
|
|
83
|
+
const prorcpUrl = `${rcpHost}/prorcp/${prorcpHash}`;
|
|
84
|
+
const prorcpRes = await client.get(prorcpUrl, {
|
|
85
|
+
headers: { Referer: rcpUrl },
|
|
86
|
+
});
|
|
87
|
+
// Step 6: Extract M3U8 file URL
|
|
88
|
+
const fileMatch = prorcpRes.data.match(/file:\s*["']([^"']+)["']/);
|
|
89
|
+
if (!fileMatch) {
|
|
90
|
+
throw new Error('No file URL found in prorcp page');
|
|
91
|
+
}
|
|
92
|
+
// Step 7: Resolve domain placeholders
|
|
93
|
+
hlsUrl = resolveM3u8Url(fileMatch[1]);
|
|
94
|
+
// Extract subtitles if present
|
|
95
|
+
const subMatches = prorcpRes.data.matchAll(/["'](https?:\/\/[^"']+\.(?:vtt|srt))["']/gi);
|
|
96
|
+
for (const match of subMatches) {
|
|
97
|
+
if (!subtitles.includes(match[1])) {
|
|
98
|
+
subtitles.push(match[1]);
|
|
99
|
+
}
|
|
96
100
|
}
|
|
97
|
-
subtitles = extractSubtitles(prorcpHtml);
|
|
98
101
|
}
|
|
99
102
|
catch (error) {
|
|
100
|
-
console.error(`[
|
|
103
|
+
console.error(`[vidsrc] ${error.message}`);
|
|
101
104
|
}
|
|
102
105
|
const result = {
|
|
103
106
|
tmdbId,
|
package/dist/types.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@definisi/vidsrc-scraper",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "Extract HLS streaming URLs from VidSrc
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "Extract HLS streaming URLs from VidSrc using axios",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"tmdb"
|
|
27
27
|
],
|
|
28
28
|
"license": "MIT",
|
|
29
|
-
"
|
|
30
|
-
"
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"axios": "^1.6.0"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^22.0.0",
|