@definisi/vidsrc-scraper 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,11 +1,20 @@
1
1
  # @definisi/vidsrc-scraper
2
2
 
3
- Extract HLS streaming URLs from VidSrc using TMDB ID. Zero dependencies, works on any platform.
3
+ Extract HLS streaming URLs from VidSrc using Playwright.
4
4
 
5
- ## Install
5
+ ## ⚠️ Important Notice
6
+
7
+ VidSrc currently uses Cloudflare Turnstile CAPTCHA protection. This means:
8
+
9
+ 1. **Headless mode doesn't work** - The scraper must run with a visible browser (`headless: false`)
10
+ 2. **Manual CAPTCHA solving may be required** - You may need to complete the Turnstile challenge manually
11
+ 3. **Automation is limited** - Fully automated scraping is currently not possible due to bot detection
12
+
13
+ ## Installation
6
14
 
7
15
  ```bash
8
- npm install @definisi/vidsrc-scraper
16
+ npm install @definisi/vidsrc-scraper playwright
17
+ npx playwright install firefox
9
18
  ```
10
19
 
11
20
  ## Usage
@@ -13,42 +22,36 @@ npm install @definisi/vidsrc-scraper
13
22
  ```typescript
14
23
  import { scrapeVidsrc } from '@definisi/vidsrc-scraper';
15
24
 
16
- // Movie
17
- const movie = await scrapeVidsrc('27205');
18
-
19
- // TV Show
20
- const tv = await scrapeVidsrc('1396', 'tv', '1', '1');
21
-
22
- // With options
25
+ // Scrape a movie by TMDB ID
23
26
  const result = await scrapeVidsrc('27205', 'movie', null, null, {
24
- timeout: 15000,
25
- cacheTtl: 600,
27
+ timeout: 120000,
28
+ headless: false, // Must be false for Turnstile
26
29
  });
27
- ```
28
30
 
29
- ## API
30
-
31
- ### scrapeVidsrc(tmdbId, type?, season?, episode?, options?)
31
+ if (result.success) {
32
+ console.log('HLS URL:', result.hlsUrl);
33
+ console.log('Subtitles:', result.subtitles);
34
+ }
32
35
 
33
- | Parameter | Type | Default | Description |
34
- |-----------|------|---------|-------------|
35
- | tmdbId | string | - | TMDB ID |
36
- | type | 'movie' \| 'tv' | 'movie' | Content type |
37
- | season | string \| null | null | Season number (TV only) |
38
- | episode | string \| null | null | Episode number (TV only) |
39
- | options | ScrapeOptions | {} | Optional config |
36
+ // Scrape a TV episode
37
+ const tvResult = await scrapeVidsrc('1399', 'tv', '1', '1', {
38
+ timeout: 120000,
39
+ headless: false,
40
+ });
41
+ ```
40
42
 
41
- ### ScrapeOptions
43
+ ## Options
42
44
 
43
45
  | Option | Type | Default | Description |
44
46
  |--------|------|---------|-------------|
45
- | timeout | number | 30000 | Request timeout (ms) |
46
- | cacheTtl | number | 900 | Cache TTL (seconds) |
47
+ | timeout | number | 120000 | Total timeout in milliseconds |
48
+ | cacheTtl | number | 900 | Cache time-to-live in seconds |
49
+ | headless | boolean | false | Run browser in headless mode (not recommended) |
47
50
 
48
- ### ScrapeResult
51
+ ## Result Type
49
52
 
50
53
  ```typescript
51
- {
54
+ interface ScrapeResult {
52
55
  tmdbId: string;
53
56
  type: 'movie' | 'tv';
54
57
  season: string | null;
@@ -60,20 +63,27 @@ const result = await scrapeVidsrc('27205', 'movie', null, null, {
60
63
  }
61
64
  ```
62
65
 
63
- ### clearCache()
66
+ ## Playback
67
+
68
+ The HLS URLs require proper headers. Example with VLC:
69
+
70
+ ```bash
71
+ vlc "HLS_URL" --http-user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
72
+ ```
73
+
74
+ ## Limitations
64
75
 
65
- Clear the in-memory cache.
76
+ - Requires Playwright and Firefox browser
77
+ - Requires visible browser window (headless mode blocked by Turnstile)
78
+ - May require manual CAPTCHA completion
79
+ - Results are cached to reduce CAPTCHA occurrences
80
+ - VidSrc may change their protection at any time
66
81
 
67
82
  ## Requirements
68
83
 
69
- - Node.js 18+ (uses native fetch)
84
+ - Node.js 18+
85
+ - Playwright
70
86
 
71
- ## Playback
87
+ ## License
72
88
 
73
- ```bash
74
- # VLC
75
- vlc "HLS_URL" --http-user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
76
-
77
- # ffplay
78
- ffplay -user_agent "Mozilla/5.0" "HLS_URL"
79
- ```
89
+ MIT
package/dist/scraper.js CHANGED
@@ -1,10 +1,23 @@
1
+ import axios from 'axios';
2
+ import https from 'https';
3
+ const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0';
1
4
  const DEFAULT_OPTIONS = {
2
5
  timeout: 30000,
3
6
  cacheTtl: 900,
4
7
  };
5
- const UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0';
6
- const CDN_DOMAINS = ['cloudnestra.com', 'quibblezoomfable.com'];
7
8
  const cache = new Map();
9
+ function createClient(timeout) {
10
+ return axios.create({
11
+ httpsAgent: new https.Agent({ rejectUnauthorized: false }),
12
+ headers: {
13
+ 'User-Agent': UA,
14
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
15
+ 'Accept-Language': 'en-US,en;q=0.5',
16
+ 'Accept-Encoding': 'identity',
17
+ },
18
+ timeout,
19
+ });
20
+ }
8
21
  function getCacheKey(tmdbId, type, season, episode) {
9
22
  return `${type}-${tmdbId}-${season || ''}-${episode || ''}`;
10
23
  }
@@ -20,84 +33,74 @@ function setCache(key, data) {
20
33
  cache.set(key, { data, timestamp: Date.now() });
21
34
  }
22
35
  function buildEmbedUrl(tmdbId, type, season, episode) {
23
- const base = 'https://vidsrcme.vidsrc.icu/embed';
36
+ const base = 'https://vidsrc-embed.ru/embed';
24
37
  if (type === 'tv') {
25
- return `${base}/tv?tmdb=${tmdbId}&season=${season}&episode=${episode}`;
26
- }
27
- return `${base}/movie?tmdb=${tmdbId}`;
28
- }
29
- async function fetchWithHeaders(url, referer) {
30
- const headers = {
31
- 'User-Agent': UA,
32
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
33
- 'Accept-Language': 'en-US,en;q=0.5',
34
- };
35
- if (referer) {
36
- headers['Referer'] = referer;
37
- }
38
- const res = await fetch(url, { headers });
39
- if (!res.ok)
40
- throw new Error(`HTTP ${res.status}`);
41
- return res.text();
42
- }
43
- async function resolveM3u8Domain(templateUrl) {
44
- for (const domain of CDN_DOMAINS) {
45
- const testUrl = templateUrl.replace(/\{v\d+\}/, domain);
46
- try {
47
- const res = await fetch(testUrl, {
48
- method: 'HEAD',
49
- headers: { 'User-Agent': UA },
50
- });
51
- if (res.ok)
52
- return testUrl;
53
- }
54
- catch { }
38
+ return `${base}/tv/${tmdbId}/${season}/${episode}`;
55
39
  }
56
- return null;
40
+ return `${base}/movie/${tmdbId}`;
57
41
  }
58
- function extractSubtitles(html) {
59
- const subtitles = [];
60
- const subMatches = html.matchAll(/https?:\/\/[^"'\s]+\.(?:vtt|srt)[^"'\s]*/g);
61
- for (const match of subMatches) {
62
- if (!subtitles.includes(match[0])) {
63
- subtitles.push(match[0]);
64
- }
65
- }
66
- return subtitles;
42
+ function resolveM3u8Url(url) {
43
+ // Replace domain placeholders {v1} through {v5} with cloudnestra.com
44
+ return url
45
+ .split(' or ')[0]
46
+ .trim()
47
+ .replace(/\{v[1-5]\}/g, 'cloudnestra.com');
67
48
  }
68
49
  export async function scrapeVidsrc(tmdbId, type = 'movie', season = null, episode = null, options = {}) {
69
50
  const opts = { ...DEFAULT_OPTIONS, ...options };
70
51
  const cacheKey = getCacheKey(tmdbId, type, season, episode);
71
52
  const cached = getFromCache(cacheKey, opts.cacheTtl);
72
- if (cached)
53
+ if (cached) {
73
54
  return cached;
55
+ }
56
+ const client = createClient(opts.timeout);
74
57
  let hlsUrl = null;
75
- let subtitles = [];
58
+ const subtitles = [];
76
59
  try {
60
+ // Step 1: Get embed page
77
61
  const embedUrl = buildEmbedUrl(tmdbId, type, season, episode);
78
- const embedHtml = await fetchWithHeaders(embedUrl);
79
- const rcpMatch = embedHtml.match(/src="([^"]*\/rcp\/[^"]*)"/);
80
- if (!rcpMatch)
81
- throw new Error('RCP iframe not found');
62
+ const embedRes = await client.get(embedUrl);
63
+ // Step 2: Extract cloudnestra RCP iframe URL
64
+ const rcpMatch = embedRes.data.match(/src=["']((?:https?:)?\/\/[^"']*cloudnestra\.com\/rcp\/[^"']+)["']/i);
65
+ if (!rcpMatch) {
66
+ throw new Error('No RCP iframe found in embed page');
67
+ }
82
68
  let rcpUrl = rcpMatch[1];
83
69
  if (rcpUrl.startsWith('//'))
84
70
  rcpUrl = 'https:' + rcpUrl;
85
- const rcpHtml = await fetchWithHeaders(rcpUrl, embedUrl);
86
- const prorcpMatch = rcpHtml.match(/\/prorcp\/([a-zA-Z0-9=]+)/);
87
- if (!prorcpMatch)
88
- throw new Error('Prorcp hash not found');
89
- const prorcpUrl = `https://cloudnestra.com/prorcp/${prorcpMatch[1]}`;
90
- const prorcpHtml = await fetchWithHeaders(prorcpUrl, rcpUrl);
91
- const fileMatch = prorcpHtml.match(/file:\s*["']([^"']+)["']/);
92
- if (fileMatch) {
93
- const fileUrls = fileMatch[1].split(' or ');
94
- const templateUrl = fileUrls[0].trim();
95
- hlsUrl = await resolveM3u8Domain(templateUrl);
71
+ // Step 3: Get RCP page
72
+ const rcpRes = await client.get(rcpUrl, {
73
+ headers: { Referer: embedUrl },
74
+ });
75
+ // Step 4: Extract prorcp hash from loadIframe function
76
+ const prorcpMatch = rcpRes.data.match(/\/prorcp\/([a-zA-Z0-9=+/]+)/);
77
+ if (!prorcpMatch) {
78
+ throw new Error('No prorcp hash found in RCP page');
79
+ }
80
+ const prorcpHash = prorcpMatch[1];
81
+ const rcpHost = new URL(rcpUrl).origin;
82
+ // Step 5: Get prorcp page
83
+ const prorcpUrl = `${rcpHost}/prorcp/${prorcpHash}`;
84
+ const prorcpRes = await client.get(prorcpUrl, {
85
+ headers: { Referer: rcpUrl },
86
+ });
87
+ // Step 6: Extract M3U8 file URL
88
+ const fileMatch = prorcpRes.data.match(/file:\s*["']([^"']+)["']/);
89
+ if (!fileMatch) {
90
+ throw new Error('No file URL found in prorcp page');
91
+ }
92
+ // Step 7: Resolve domain placeholders
93
+ hlsUrl = resolveM3u8Url(fileMatch[1]);
94
+ // Extract subtitles if present
95
+ const subMatches = prorcpRes.data.matchAll(/["'](https?:\/\/[^"']+\.(?:vtt|srt))["']/gi);
96
+ for (const match of subMatches) {
97
+ if (!subtitles.includes(match[1])) {
98
+ subtitles.push(match[1]);
99
+ }
96
100
  }
97
- subtitles = extractSubtitles(prorcpHtml);
98
101
  }
99
102
  catch (error) {
100
- console.error(`[ERROR] ${error.message}`);
103
+ console.error(`[vidsrc] ${error.message}`);
101
104
  }
102
105
  const result = {
103
106
  tmdbId,
package/dist/types.d.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  export interface ScrapeOptions {
2
+ /** Request timeout in ms (default: 30000) */
2
3
  timeout?: number;
4
+ /** Cache TTL in seconds (default: 900) */
3
5
  cacheTtl?: number;
4
6
  }
5
7
  export interface ScrapeResult {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@definisi/vidsrc-scraper",
3
- "version": "1.1.0",
4
- "description": "Extract HLS streaming URLs from VidSrc (no browser required)",
3
+ "version": "2.0.0",
4
+ "description": "Extract HLS streaming URLs from VidSrc using axios",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "types": "./dist/index.d.ts",
@@ -26,8 +26,8 @@
26
26
  "tmdb"
27
27
  ],
28
28
  "license": "MIT",
29
- "engines": {
30
- "node": ">=18.0.0"
29
+ "dependencies": {
30
+ "axios": "^1.6.0"
31
31
  },
32
32
  "devDependencies": {
33
33
  "@types/node": "^22.0.0",