ph-scraper-api 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +153 -1
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -1 +1,153 @@
|
|
|
1
|
-
|
|
1
|
+
const axios = require('axios');
|
|
2
|
+
const cheerio = require('cheerio');
|
|
3
|
+
const { exec } = require('child_process');
|
|
4
|
+
const { promisify } = require('util');
|
|
5
|
+
const { mkdtemp, rm, readFile } = require('fs').promises;
|
|
6
|
+
const { join } = require('path');
|
|
7
|
+
const { tmpdir } = require('os');
|
|
8
|
+
|
|
9
|
+
const execAsync = promisify(exec);
|
|
10
|
+
const UA = 'Mozilla/5.0 (Linux; Android 11; Redmi Note 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36';
|
|
11
|
+
|
|
12
|
+
class PornHub {
|
|
13
|
+
constructor() {
|
|
14
|
+
this.baseUrl = 'https://www.pornhub.com';
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
_parseDuration(iso) {
|
|
18
|
+
if (!iso) return null;
|
|
19
|
+
const match = iso.match(/PT(\d+)H(\d+)M(\d+)S/);
|
|
20
|
+
if (!match) return iso;
|
|
21
|
+
const h = parseInt(match[1]);
|
|
22
|
+
const m = parseInt(match[2]);
|
|
23
|
+
const s = parseInt(match[3]);
|
|
24
|
+
if (h > 0) return `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
|
|
25
|
+
return `${m}:${String(s).padStart(2, '0')}`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
_extractMediaDefinitions(s) {
|
|
29
|
+
const start = s.indexOf('mediaDefinitions');
|
|
30
|
+
if (start === -1) return null;
|
|
31
|
+
const arrStart = s.indexOf('[', start);
|
|
32
|
+
if (arrStart === -1) return null;
|
|
33
|
+
let depth = 0, end = -1;
|
|
34
|
+
for (let i = arrStart; i < s.length; i++) {
|
|
35
|
+
if (s[i] === '[') depth++;
|
|
36
|
+
else if (s[i] === ']') { depth--; if (depth === 0) { end = i; break; } }
|
|
37
|
+
}
|
|
38
|
+
try { return JSON.parse(s.slice(arrStart, end + 1).replace(/\\\//g, '/')); }
|
|
39
|
+
catch { return null; }
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// --- Updated Search Function ---
|
|
43
|
+
async search(query, limit = 10) {
|
|
44
|
+
try {
|
|
45
|
+
const { data } = await axios.get(`${this.baseUrl}/video/search?search=${encodeURIComponent(query)}`, {
|
|
46
|
+
headers: {
|
|
47
|
+
'User-Agent': UA,
|
|
48
|
+
'Accept-Language': 'en-US,en;q=0.9'
|
|
49
|
+
},
|
|
50
|
+
timeout: 12000
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const $ = cheerio.load(data);
|
|
54
|
+
const results = [];
|
|
55
|
+
|
|
56
|
+
$('li[data-video-vkey]').each((_, el) => {
|
|
57
|
+
if (results.length >= limit) return false;
|
|
58
|
+
|
|
59
|
+
const anchor = $(el).find('a.imageLink').first();
|
|
60
|
+
const img = $(el).find('img.videoThumb').first();
|
|
61
|
+
|
|
62
|
+
const href = anchor.attr('href') || '';
|
|
63
|
+
const title = $(el).find('.title a').first().text().trim();
|
|
64
|
+
const thumb = img.attr('src') || '';
|
|
65
|
+
const preview = anchor.attr('data-webm') || ''; // වීඩියෝ ප්රිවීව් එක (WebM)
|
|
66
|
+
const duration = $(el).find('.duration').first().text().trim();
|
|
67
|
+
const vkey = $(el).attr('data-video-vkey') || '';
|
|
68
|
+
|
|
69
|
+
if (!title || !href) return;
|
|
70
|
+
|
|
71
|
+
results.push({
|
|
72
|
+
title,
|
|
73
|
+
url: href.startsWith('http') ? href : `${this.baseUrl}${href}`,
|
|
74
|
+
thumb,
|
|
75
|
+
preview,
|
|
76
|
+
duration,
|
|
77
|
+
vkey,
|
|
78
|
+
views: $(el).find('.views var').text().trim()
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
return results;
|
|
83
|
+
} catch (e) {
|
|
84
|
+
throw new Error(`Search failed: ${e.message}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async download(urlOrVkey) {
|
|
89
|
+
const url = urlOrVkey.includes('viewkey=') ? urlOrVkey : `${this.baseUrl}/view_video.php?viewkey=${urlOrVkey}`;
|
|
90
|
+
const { data } = await axios.get(url, {
|
|
91
|
+
headers: { 'User-Agent': UA },
|
|
92
|
+
timeout: 12000
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const $ = cheerio.load(data);
|
|
96
|
+
const scripts = $('script').map((_, el) => $(el).html()).get();
|
|
97
|
+
|
|
98
|
+
let mediaDefinitions = null;
|
|
99
|
+
for (const s of scripts) {
|
|
100
|
+
if (!s || !s.includes('mediaDefinitions')) continue;
|
|
101
|
+
mediaDefinitions = this._extractMediaDefinitions(s);
|
|
102
|
+
if (mediaDefinitions) break;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (!mediaDefinitions) throw new Error('No video qualities found.');
|
|
106
|
+
|
|
107
|
+
const hlss = mediaDefinitions
|
|
108
|
+
.filter(d => d.format === 'hls' && d.videoUrl && d.quality)
|
|
109
|
+
.sort((a, b) => parseInt(b.quality) - parseInt(a.quality));
|
|
110
|
+
|
|
111
|
+
const jsonLd = $('script[type="application/ld+json"]').first().html();
|
|
112
|
+
let metadata = { title: null, thumb: null, duration: null, hls: {} };
|
|
113
|
+
|
|
114
|
+
if (jsonLd) {
|
|
115
|
+
try {
|
|
116
|
+
const parsed = JSON.parse(jsonLd);
|
|
117
|
+
metadata.title = parsed.name || null;
|
|
118
|
+
metadata.thumb = parsed.thumbnailUrl || null;
|
|
119
|
+
metadata.duration = this._parseDuration(parsed.duration);
|
|
120
|
+
} catch {}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (!metadata.title) metadata.title = $('h1.title span').text().trim() || "No Title";
|
|
124
|
+
|
|
125
|
+
hlss.forEach(d => {
|
|
126
|
+
metadata.hls[`${d.quality}p`] = d.videoUrl;
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
return metadata;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
async downloadBuffer(url, quality = '720') {
|
|
133
|
+
const info = await this.download(url);
|
|
134
|
+
const hlsUrl = info.hls[`${quality}p`] || Object.values(info.hls)[0];
|
|
135
|
+
|
|
136
|
+
const tmpDir = await mkdtemp(join(tmpdir(), 'phdl-'));
|
|
137
|
+
const outPath = join(tmpDir, 'video.mp4');
|
|
138
|
+
|
|
139
|
+
try {
|
|
140
|
+
await execAsync(
|
|
141
|
+
`ffmpeg -v quiet -y -user_agent "${UA}" -headers "Referer: https://www.pornhub.com/\r\n" -i "${hlsUrl}" -t 300 -c copy -bsf:a aac_adtstoasc "${outPath}"`,
|
|
142
|
+
{ timeout: 120000 }
|
|
143
|
+
);
|
|
144
|
+
const buffer = await readFile(outPath);
|
|
145
|
+
return { title: info.title, buffer };
|
|
146
|
+
} finally {
|
|
147
|
+
await rm(tmpDir, { recursive: true, force: true });
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
module.exports = new PornHub();
|
|
153
|
+
|