@definisi/vidsrc-scraper 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -33
- package/dist/scraper.js +53 -38
- package/dist/types.d.ts +8 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
# @definisi/vidsrc-scraper
|
|
2
2
|
|
|
3
|
-
Extract HLS streaming URLs from VidSrc using
|
|
3
|
+
Extract HLS streaming URLs from VidSrc using Playwright.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## ⚠️ Important Notice
|
|
6
|
+
|
|
7
|
+
VidSrc currently uses Cloudflare Turnstile CAPTCHA protection. This means:
|
|
8
|
+
|
|
9
|
+
1. **Headless mode doesn't work** - The scraper must run with a visible browser (`headless: false`)
|
|
10
|
+
2. **Manual CAPTCHA solving may be required** - You may need to complete the Turnstile challenge manually
|
|
11
|
+
3. **Automation is limited** - Fully automated scraping is currently not possible due to bot detection
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
6
14
|
|
|
7
15
|
```bash
|
|
8
|
-
npm install @definisi/vidsrc-scraper
|
|
16
|
+
npm install @definisi/vidsrc-scraper playwright
|
|
9
17
|
npx playwright install firefox
|
|
10
18
|
```
|
|
11
19
|
|
|
@@ -14,44 +22,36 @@ npx playwright install firefox
|
|
|
14
22
|
```typescript
|
|
15
23
|
import { scrapeVidsrc } from '@definisi/vidsrc-scraper';
|
|
16
24
|
|
|
17
|
-
//
|
|
18
|
-
const movie = await scrapeVidsrc('27205');
|
|
19
|
-
|
|
20
|
-
// TV Show
|
|
21
|
-
const tv = await scrapeVidsrc('1396', 'tv', '1', '1');
|
|
22
|
-
|
|
23
|
-
// With options
|
|
25
|
+
// Scrape a movie by TMDB ID
|
|
24
26
|
const result = await scrapeVidsrc('27205', 'movie', null, null, {
|
|
25
|
-
timeout:
|
|
26
|
-
headless:
|
|
27
|
-
cacheTtl: 600,
|
|
27
|
+
timeout: 120000,
|
|
28
|
+
headless: false, // Must be false for Turnstile
|
|
28
29
|
});
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
## API
|
|
32
30
|
|
|
33
|
-
|
|
31
|
+
if (result.success) {
|
|
32
|
+
console.log('HLS URL:', result.hlsUrl);
|
|
33
|
+
console.log('Subtitles:', result.subtitles);
|
|
34
|
+
}
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
| options | ScrapeOptions | {} | Optional config |
|
|
36
|
+
// Scrape a TV episode
|
|
37
|
+
const tvResult = await scrapeVidsrc('1399', 'tv', '1', '1', {
|
|
38
|
+
timeout: 120000,
|
|
39
|
+
headless: false,
|
|
40
|
+
});
|
|
41
|
+
```
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
## Options
|
|
44
44
|
|
|
45
45
|
| Option | Type | Default | Description |
|
|
46
46
|
|--------|------|---------|-------------|
|
|
47
|
-
| timeout | number |
|
|
48
|
-
|
|
|
49
|
-
|
|
|
47
|
+
| timeout | number | 120000 | Total timeout in milliseconds |
|
|
48
|
+
| cacheTtl | number | 900 | Cache time-to-live in seconds |
|
|
49
|
+
| headless | boolean | false | Run browser in headless mode (not recommended) |
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
## Result Type
|
|
52
52
|
|
|
53
53
|
```typescript
|
|
54
|
-
{
|
|
54
|
+
interface ScrapeResult {
|
|
55
55
|
tmdbId: string;
|
|
56
56
|
type: 'movie' | 'tv';
|
|
57
57
|
season: string | null;
|
|
@@ -63,11 +63,27 @@ const result = await scrapeVidsrc('27205', 'movie', null, null, {
|
|
|
63
63
|
}
|
|
64
64
|
```
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
## Playback
|
|
67
|
+
|
|
68
|
+
The HLS URLs require proper headers. Example with VLC:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
vlc "HLS_URL" --http-user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Limitations
|
|
67
75
|
|
|
68
|
-
|
|
76
|
+
- Requires Playwright and Firefox browser
|
|
77
|
+
- Requires visible browser window (headless mode blocked by Turnstile)
|
|
78
|
+
- May require manual CAPTCHA completion
|
|
79
|
+
- Results are cached to reduce CAPTCHA occurrences
|
|
80
|
+
- VidSrc may change their protection at any time
|
|
69
81
|
|
|
70
82
|
## Requirements
|
|
71
83
|
|
|
72
84
|
- Node.js 18+
|
|
73
|
-
- Playwright
|
|
85
|
+
- Playwright
|
|
86
|
+
|
|
87
|
+
## License
|
|
88
|
+
|
|
89
|
+
MIT
|
package/dist/scraper.js
CHANGED
|
@@ -20,25 +20,28 @@ function setCache(key, data) {
|
|
|
20
20
|
cache.set(key, { data, timestamp: Date.now() });
|
|
21
21
|
}
|
|
22
22
|
function buildEmbedUrl(tmdbId, type, season, episode) {
|
|
23
|
-
const base = 'https://
|
|
23
|
+
const base = 'https://vidsrc.xyz/embed';
|
|
24
24
|
if (type === 'tv') {
|
|
25
|
-
return `${base}/tv
|
|
25
|
+
return `${base}/tv/${tmdbId}/${season}/${episode}`;
|
|
26
26
|
}
|
|
27
|
-
return `${base}/movie
|
|
27
|
+
return `${base}/movie/${tmdbId}`;
|
|
28
28
|
}
|
|
29
29
|
function isValidM3u8Url(url) {
|
|
30
30
|
return url.includes('.m3u8') && !url.includes('{v') && !url.includes('{s');
|
|
31
31
|
}
|
|
32
|
-
async function
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
async function clickPlayButton(frame) {
|
|
33
|
+
const selectors = ['#pl_but', '#pl_but_background', '.play-btn'];
|
|
34
|
+
for (const sel of selectors) {
|
|
35
|
+
try {
|
|
36
|
+
const btn = await frame.$(sel);
|
|
37
|
+
if (btn) {
|
|
38
|
+
await btn.click();
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
40
41
|
}
|
|
41
|
-
|
|
42
|
+
catch { }
|
|
43
|
+
}
|
|
44
|
+
return false;
|
|
42
45
|
}
|
|
43
46
|
export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episode = null, options = {}) {
|
|
44
47
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
@@ -50,40 +53,52 @@ export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episod
|
|
|
50
53
|
const browser = await firefox.launch({ headless: opts.headless });
|
|
51
54
|
const context = await browser.newContext({
|
|
52
55
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
|
53
|
-
viewport: { width:
|
|
56
|
+
viewport: { width: 1280, height: 720 },
|
|
54
57
|
});
|
|
55
58
|
const page = await context.newPage();
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
let hlsUrl = null;
|
|
60
|
+
const subtitles = [];
|
|
61
|
+
// Capture network responses
|
|
62
|
+
context.on('response', (response) => {
|
|
63
|
+
const url = response.url();
|
|
64
|
+
if (isValidM3u8Url(url) && !hlsUrl) {
|
|
65
|
+
hlsUrl = url;
|
|
66
|
+
}
|
|
67
|
+
if ((url.includes('.vtt') || url.includes('.srt')) && !subtitles.includes(url)) {
|
|
68
|
+
subtitles.push(url);
|
|
69
|
+
}
|
|
70
|
+
});
|
|
58
71
|
try {
|
|
59
72
|
const embedUrl = buildEmbedUrl(tmdbId, type, season, episode);
|
|
60
|
-
await page.goto(embedUrl, { waitUntil: '
|
|
73
|
+
await page.goto(embedUrl, { waitUntil: 'commit', timeout: opts.timeout });
|
|
61
74
|
await page.waitForTimeout(3000);
|
|
62
|
-
for
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
}
|
|
76
|
-
await page.waitForTimeout(5000);
|
|
75
|
+
// Poll for cloudnestra frame and play button (handles both movies and Turnstile-protected TV)
|
|
76
|
+
const maxWait = opts.timeout - 5000;
|
|
77
|
+
const startWait = Date.now();
|
|
78
|
+
while (Date.now() - startWait < maxWait && !hlsUrl) {
|
|
79
|
+
for (const frame of page.frames()) {
|
|
80
|
+
const frameUrl = frame.url();
|
|
81
|
+
if (frameUrl.includes('cloudnestra') || frameUrl.includes('/rcp/')) {
|
|
82
|
+
const clicked = await clickPlayButton(frame);
|
|
83
|
+
if (clicked) {
|
|
84
|
+
// Wait for HLS after click
|
|
85
|
+
const clickTime = Date.now();
|
|
86
|
+
while (!hlsUrl && Date.now() - clickTime < 15000) {
|
|
87
|
+
await page.waitForTimeout(500);
|
|
88
|
+
}
|
|
77
89
|
}
|
|
78
|
-
|
|
90
|
+
break;
|
|
79
91
|
}
|
|
80
|
-
|
|
92
|
+
}
|
|
93
|
+
if (!hlsUrl) {
|
|
94
|
+
await page.waitForTimeout(1000);
|
|
81
95
|
}
|
|
82
96
|
}
|
|
83
|
-
await page.waitForTimeout(3000);
|
|
84
97
|
}
|
|
85
98
|
catch (error) {
|
|
86
|
-
|
|
99
|
+
if (!hlsUrl) {
|
|
100
|
+
console.error(`[vidsrc] ${error.message}`);
|
|
101
|
+
}
|
|
87
102
|
}
|
|
88
103
|
finally {
|
|
89
104
|
await browser.close();
|
|
@@ -93,9 +108,9 @@ export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episod
|
|
|
93
108
|
type,
|
|
94
109
|
season,
|
|
95
110
|
episode,
|
|
96
|
-
hlsUrl
|
|
97
|
-
subtitles
|
|
98
|
-
success: !!
|
|
111
|
+
hlsUrl,
|
|
112
|
+
subtitles,
|
|
113
|
+
success: !!hlsUrl,
|
|
99
114
|
timestamp: new Date().toISOString(),
|
|
100
115
|
};
|
|
101
116
|
if (result.success) {
|
package/dist/types.d.ts
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
export interface ScrapeOptions {
|
|
2
|
+
/** Total timeout in ms (default: 60000) */
|
|
2
3
|
timeout?: number;
|
|
3
|
-
|
|
4
|
+
/** Cache TTL in seconds (default: 900) */
|
|
4
5
|
cacheTtl?: number;
|
|
6
|
+
/**
|
|
7
|
+
* Run browser in headless mode (default: true)
|
|
8
|
+
* NOTE: VidSrc now uses Cloudflare Turnstile protection.
|
|
9
|
+
* Set to false to allow manual CAPTCHA solving in visible browser.
|
|
10
|
+
*/
|
|
11
|
+
headless?: boolean;
|
|
5
12
|
}
|
|
6
13
|
export interface ScrapeResult {
|
|
7
14
|
tmdbId: string;
|