@definisi/vidsrc-scraper 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,11 +1,19 @@
1
1
  # @definisi/vidsrc-scraper
2
2
 
3
- Extract HLS streaming URLs from VidSrc using TMDB ID.
3
+ Extract HLS streaming URLs from VidSrc using Playwright.
4
4
 
5
- ## Install
5
+ ## ⚠️ Important Notice
6
+
7
+ VidSrc currently uses Cloudflare Turnstile CAPTCHA protection. This means:
8
+
9
+ 1. **Headless mode doesn't work** - The scraper must run with a visible browser (`headless: false`)
10
+ 2. **Manual CAPTCHA solving may be required** - You may need to complete the Turnstile challenge manually
11
+ 3. **Automation is limited** - Fully automated scraping is currently not possible due to bot detection
12
+
13
+ ## Installation
6
14
 
7
15
  ```bash
8
- npm install @definisi/vidsrc-scraper
16
+ npm install @definisi/vidsrc-scraper playwright
9
17
  npx playwright install firefox
10
18
  ```
11
19
 
@@ -14,44 +22,36 @@ npx playwright install firefox
14
22
  ```typescript
15
23
  import { scrapeVidsrc } from '@definisi/vidsrc-scraper';
16
24
 
17
- // Movie
18
- const movie = await scrapeVidsrc('27205');
19
-
20
- // TV Show
21
- const tv = await scrapeVidsrc('1396', 'tv', '1', '1');
22
-
23
- // With options
25
+ // Scrape a movie by TMDB ID
24
26
  const result = await scrapeVidsrc('27205', 'movie', null, null, {
25
- timeout: 30000,
26
- headless: true,
27
- cacheTtl: 600,
27
+ timeout: 120000,
28
+ headless: false, // Must be false for Turnstile
28
29
  });
29
- ```
30
-
31
- ## API
32
30
 
33
- ### scrapeVidsrc(tmdbId, type?, season?, episode?, options?)
31
+ if (result.success) {
32
+ console.log('HLS URL:', result.hlsUrl);
33
+ console.log('Subtitles:', result.subtitles);
34
+ }
34
35
 
35
- | Parameter | Type | Default | Description |
36
- |-----------|------|---------|-------------|
37
- | tmdbId | string | - | TMDB ID |
38
- | type | 'movie' \| 'tv' | 'movie' | Content type |
39
- | season | string \| null | null | Season number (TV only) |
40
- | episode | string \| null | null | Episode number (TV only) |
41
- | options | ScrapeOptions | {} | Optional config |
36
+ // Scrape a TV episode
37
+ const tvResult = await scrapeVidsrc('1399', 'tv', '1', '1', {
38
+ timeout: 120000,
39
+ headless: false,
40
+ });
41
+ ```
42
42
 
43
- ### ScrapeOptions
43
+ ## Options
44
44
 
45
45
  | Option | Type | Default | Description |
46
46
  |--------|------|---------|-------------|
47
- | timeout | number | 60000 | Page load timeout (ms) |
48
- | headless | boolean | true | Run browser headless |
49
- | cacheTtl | number | 900 | Cache TTL (seconds) |
47
+ | timeout | number | 120000 | Total timeout in milliseconds |
48
+ | cacheTtl | number | 900 | Cache time-to-live in seconds |
49
+ | headless | boolean | false | Run browser in headless mode (not recommended) |
50
50
 
51
- ### ScrapeResult
51
+ ## Result Type
52
52
 
53
53
  ```typescript
54
- {
54
+ interface ScrapeResult {
55
55
  tmdbId: string;
56
56
  type: 'movie' | 'tv';
57
57
  season: string | null;
@@ -63,11 +63,27 @@ const result = await scrapeVidsrc('27205', 'movie', null, null, {
63
63
  }
64
64
  ```
65
65
 
66
- ### clearCache()
66
+ ## Playback
67
+
68
+ The HLS URLs require proper headers. Example with VLC:
69
+
70
+ ```bash
71
+ vlc "HLS_URL" --http-user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
72
+ ```
73
+
74
+ ## Limitations
67
75
 
68
- Clear the in-memory cache.
76
+ - Requires Playwright and Firefox browser
77
+ - Requires visible browser window (headless mode blocked by Turnstile)
78
+ - May require manual CAPTCHA completion
79
+ - Results are cached to reduce CAPTCHA occurrences
80
+ - VidSrc may change their protection at any time
69
81
 
70
82
  ## Requirements
71
83
 
72
84
  - Node.js 18+
73
- - Playwright Firefox
85
+ - Playwright
86
+
87
+ ## License
88
+
89
+ MIT
package/dist/scraper.js CHANGED
@@ -20,25 +20,28 @@ function setCache(key, data) {
20
20
  cache.set(key, { data, timestamp: Date.now() });
21
21
  }
22
22
  function buildEmbedUrl(tmdbId, type, season, episode) {
23
- const base = 'https://vidsrcme.vidsrc.icu/embed';
23
+ const base = 'https://vidsrc.xyz/embed';
24
24
  if (type === 'tv') {
25
- return `${base}/tv?tmdb=${tmdbId}&season=${season}&episode=${episode}&autoplay=1&ds_lang=en`;
25
+ return `${base}/tv/${tmdbId}/${season}/${episode}`;
26
26
  }
27
- return `${base}/movie?tmdb=${tmdbId}&autoplay=1&ds_lang=en`;
27
+ return `${base}/movie/${tmdbId}`;
28
28
  }
29
29
  function isValidM3u8Url(url) {
30
30
  return url.includes('.m3u8') && !url.includes('{v') && !url.includes('{s');
31
31
  }
32
- async function setupResponseHandler(context, result) {
33
- context.on('response', async (response) => {
34
- const url = response.url();
35
- if (isValidM3u8Url(url) && !result.hlsUrl) {
36
- result.hlsUrl = url;
37
- }
38
- if (url.includes('.vtt') || url.includes('.srt')) {
39
- result.subtitles.push(url);
32
+ async function clickPlayButton(frame) {
33
+ const selectors = ['#pl_but', '#pl_but_background', '.play-btn'];
34
+ for (const sel of selectors) {
35
+ try {
36
+ const btn = await frame.$(sel);
37
+ if (btn) {
38
+ await btn.click();
39
+ return true;
40
+ }
40
41
  }
41
- });
42
+ catch { }
43
+ }
44
+ return false;
42
45
  }
43
46
  export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episode = null, options = {}) {
44
47
  const opts = { ...DEFAULT_OPTIONS, ...options };
@@ -50,40 +53,52 @@ export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episod
50
53
  const browser = await firefox.launch({ headless: opts.headless });
51
54
  const context = await browser.newContext({
52
55
  userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
53
- viewport: { width: 1920, height: 1080 },
56
+ viewport: { width: 1280, height: 720 },
54
57
  });
55
58
  const page = await context.newPage();
56
- const responseData = { hlsUrl: null, subtitles: [] };
57
- await setupResponseHandler(context, responseData);
59
+ let hlsUrl = null;
60
+ const subtitles = [];
61
+ // Capture network responses
62
+ context.on('response', (response) => {
63
+ const url = response.url();
64
+ if (isValidM3u8Url(url) && !hlsUrl) {
65
+ hlsUrl = url;
66
+ }
67
+ if ((url.includes('.vtt') || url.includes('.srt')) && !subtitles.includes(url)) {
68
+ subtitles.push(url);
69
+ }
70
+ });
58
71
  try {
59
72
  const embedUrl = buildEmbedUrl(tmdbId, type, season, episode);
60
- await page.goto(embedUrl, { waitUntil: 'networkidle', timeout: opts.timeout });
73
+ await page.goto(embedUrl, { waitUntil: 'commit', timeout: opts.timeout });
61
74
  await page.waitForTimeout(3000);
62
- for (const frame of page.frames()) {
63
- const frameUrl = frame.url();
64
- if (frameUrl.includes('cloudnestra.com/rcp/') || frameUrl.includes('/rcp/')) {
65
- try {
66
- await frame.click('#pl_but, #pl_but_background, .play-btn', { timeout: 5000 });
67
- await page.waitForTimeout(5000);
68
- }
69
- catch {
70
- try {
71
- await frame.evaluate(() => {
72
- if (typeof window.loadIframe === 'function') {
73
- window.loadIframe(1);
74
- }
75
- });
76
- await page.waitForTimeout(5000);
75
+ // Poll for cloudnestra frame and play button (handles both movies and Turnstile-protected TV)
76
+ const maxWait = opts.timeout - 5000;
77
+ const startWait = Date.now();
78
+ while (Date.now() - startWait < maxWait && !hlsUrl) {
79
+ for (const frame of page.frames()) {
80
+ const frameUrl = frame.url();
81
+ if (frameUrl.includes('cloudnestra') || frameUrl.includes('/rcp/')) {
82
+ const clicked = await clickPlayButton(frame);
83
+ if (clicked) {
84
+ // Wait for HLS after click
85
+ const clickTime = Date.now();
86
+ while (!hlsUrl && Date.now() - clickTime < 15000) {
87
+ await page.waitForTimeout(500);
88
+ }
77
89
  }
78
- catch { }
90
+ break;
79
91
  }
80
- break;
92
+ }
93
+ if (!hlsUrl) {
94
+ await page.waitForTimeout(1000);
81
95
  }
82
96
  }
83
- await page.waitForTimeout(3000);
84
97
  }
85
98
  catch (error) {
86
- console.error(`[ERROR] ${error.message}`);
99
+ if (!hlsUrl) {
100
+ console.error(`[vidsrc] ${error.message}`);
101
+ }
87
102
  }
88
103
  finally {
89
104
  await browser.close();
@@ -93,9 +108,9 @@ export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episod
93
108
  type,
94
109
  season,
95
110
  episode,
96
- hlsUrl: responseData.hlsUrl,
97
- subtitles: responseData.subtitles,
98
- success: !!responseData.hlsUrl,
111
+ hlsUrl,
112
+ subtitles,
113
+ success: !!hlsUrl,
99
114
  timestamp: new Date().toISOString(),
100
115
  };
101
116
  if (result.success) {
package/dist/types.d.ts CHANGED
@@ -1,7 +1,14 @@
1
1
  export interface ScrapeOptions {
2
+ /** Total timeout in ms (default: 60000) */
2
3
  timeout?: number;
3
- headless?: boolean;
4
+ /** Cache TTL in seconds (default: 900) */
4
5
  cacheTtl?: number;
6
+ /**
7
+ * Run browser in headless mode (default: true)
8
+ * NOTE: VidSrc now uses Cloudflare Turnstile protection.
9
+ * Set to false to allow manual CAPTCHA solving in visible browser.
10
+ */
11
+ headless?: boolean;
5
12
  }
6
13
  export interface ScrapeResult {
7
14
  tmdbId: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@definisi/vidsrc-scraper",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "Extract HLS streaming URLs from VidSrc",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",