@soyaxell09/zenbot-scraper 1.0.13 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -90
- package/package.json +1 -2
- package/src/index.js +6 -6
- package/src/nsfw/index.js +2 -3
- package/src/nsfw/rule34.js +0 -39
- package/src/scrapers/index.js +3 -3
- package/src/scrapers/threads.js +55 -0
- package/src/scrapers/youtube.js +160 -114
- package/src/search/giphy.js +67 -12
- package/src/search/index.js +8 -8
- package/src/search/pinterest.js +182 -46
- package/src/search/spotify.js +1 -1
- package/src/scrapers/instagram.js +0 -106
- package/src/scrapers/youtubev2.js +0 -131
package/src/scrapers/youtube.js
CHANGED
|
@@ -1,137 +1,183 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
* GitHub: https://github.com/AxelDev09
|
|
4
|
-
* Instagram: @axeldev09
|
|
5
|
-
* Deja los créditos we 🗣️
|
|
6
|
-
*/
|
|
1
|
+
// Parchado y modificado por AxelDev09
|
|
2
|
+
// scraper creado por FG-ERROR
|
|
7
3
|
|
|
8
4
|
import axios from 'axios'
|
|
9
|
-
import ytdl from '@distube/ytdl-core'
|
|
10
5
|
|
|
11
|
-
const
|
|
12
|
-
let _config = null
|
|
6
|
+
const delay = ms => new Promise(r => setTimeout(r, ms))
|
|
13
7
|
|
|
14
|
-
|
|
15
|
-
if (
|
|
16
|
-
const
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
'Accept': 'text/html,application/xhtml+xml',
|
|
21
|
-
},
|
|
22
|
-
timeout: 15000,
|
|
23
|
-
})
|
|
24
|
-
const html = res.data
|
|
25
|
-
const key = html.match(/"INNERTUBE_API_KEY"\s*:\s*"([^"]+)"/)?.[1] || 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM394'
|
|
26
|
-
const visitorData = html.match(/"visitorData"\s*:\s*"([^"]+)"/)?.[1] || ''
|
|
27
|
-
const clientVersion = html.match(/"clientVersion"\s*:\s*"([^"]+)"/)?.[1] || '2.20240101.00.00'
|
|
28
|
-
_config = { key, visitorData, clientVersion }
|
|
29
|
-
return _config
|
|
8
|
+
function parseFileSize(size) {
|
|
9
|
+
if (!size) return 0
|
|
10
|
+
const units = { B: 1, KB: 1024, MB: 1024 ** 2, GB: 1024 ** 3, TB: 1024 ** 4 }
|
|
11
|
+
const match = size.toString().trim().match(/([\d.]+)\s*(B|KB|MB|GB|TB)/i)
|
|
12
|
+
if (!match) return 0
|
|
13
|
+
return Math.round(parseFloat(match[1]) * (units[match[2].toUpperCase()] || 1))
|
|
30
14
|
}
|
|
31
15
|
|
|
32
|
-
function
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
const m = url.match(p)
|
|
39
|
-
if (m) return m[1]
|
|
40
|
-
}
|
|
41
|
-
return null
|
|
16
|
+
function formatFileSize(bytes) {
|
|
17
|
+
if (!bytes || isNaN(bytes)) return '0 B'
|
|
18
|
+
const units = ['B', 'KB', 'MB', 'GB', 'TB']
|
|
19
|
+
let i = 0
|
|
20
|
+
while (bytes >= 1024 && i < units.length - 1) { bytes /= 1024; i++ }
|
|
21
|
+
return `${bytes.toFixed(1).replace(/\.0$/, '')} ${units[i]}`
|
|
42
22
|
}
|
|
43
23
|
|
|
44
|
-
export async function
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
.filter(f => f.url)
|
|
52
|
-
.map(f => ({
|
|
53
|
-
itag: f.itag,
|
|
54
|
-
url: f.url,
|
|
55
|
-
mimeType: f.mimeType || '',
|
|
56
|
-
quality: f.qualityLabel || f.quality || '',
|
|
57
|
-
bitrate: f.bitrate || f.averageBitrate || 0,
|
|
58
|
-
width: f.width || 0,
|
|
59
|
-
height: f.height || 0,
|
|
60
|
-
fps: f.fps || 0,
|
|
61
|
-
hasVideo: !!f.hasVideo,
|
|
62
|
-
hasAudio: !!f.hasAudio,
|
|
63
|
-
container: f.container || '',
|
|
64
|
-
codecs: f.codecs || '',
|
|
65
|
-
}))
|
|
24
|
+
export async function getFileSize(url) {
|
|
25
|
+
try {
|
|
26
|
+
const res = await axios.head(url, { timeout: 10000 })
|
|
27
|
+
const bytes = parseInt(res.headers['content-length'] || 0)
|
|
28
|
+
return formatFileSize(bytes)
|
|
29
|
+
} catch { return '0 B' }
|
|
30
|
+
}
|
|
66
31
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
}
|
|
32
|
+
function normalizeYT(url) {
|
|
33
|
+
try {
|
|
34
|
+
const u = new URL(url)
|
|
35
|
+
if (u.hostname.includes('youtu.be')) return url
|
|
36
|
+
if (u.hostname.includes('youtube.com')) {
|
|
37
|
+
if (u.pathname.includes('/watch')) return `https://youtu.be/${u.searchParams.get('v')}`
|
|
38
|
+
if (u.pathname.includes('/shorts/')) return `https://youtu.be/${u.pathname.split('/shorts/')[1]}`
|
|
39
|
+
if (u.pathname.includes('/embed/')) return `https://youtu.be/${u.pathname.split('/embed/')[1]}`
|
|
40
|
+
}
|
|
41
|
+
return url
|
|
42
|
+
} catch { return url }
|
|
79
43
|
}
|
|
80
44
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
45
|
+
async function waitForDownload(mediaUrl) {
|
|
46
|
+
for (let i = 0; i < 15; i++) {
|
|
47
|
+
try {
|
|
48
|
+
const { data } = await axios.get(mediaUrl, { timeout: 15000 })
|
|
49
|
+
if (data?.percent === 'Completed' && data?.fileUrl && data.fileUrl !== 'In Processing...')
|
|
50
|
+
return data.fileUrl
|
|
51
|
+
} catch {}
|
|
52
|
+
await delay(4000)
|
|
53
|
+
}
|
|
54
|
+
throw new Error('No se pudo generar el enlace de descarga')
|
|
55
|
+
}
|
|
84
56
|
|
|
85
|
-
|
|
86
|
-
const
|
|
57
|
+
async function fetchYtdownto(url) {
|
|
58
|
+
const { data } = await axios.post(
|
|
59
|
+
'https://app.ytdown.to/proxy.php',
|
|
60
|
+
new URLSearchParams({ url }).toString(),
|
|
61
|
+
{
|
|
62
|
+
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
63
|
+
timeout: 20000,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
const api = data?.api
|
|
67
|
+
if (!api) throw new Error('No se pudo obtener información del video')
|
|
68
|
+
if (api.status === 'ERROR') throw new Error(api.message)
|
|
87
69
|
|
|
88
|
-
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
.sort((a, b) => b.bitrate - a.bitrate)
|
|
92
|
-
if (!audioFormats.length) throw new Error('Sin formatos de audio disponibles')
|
|
93
|
-
const best = audioFormats[0]
|
|
70
|
+
const qualities = (api.mediaItems || []).map((v, i) => {
|
|
71
|
+
const match = v?.mediaUrl?.match(/(\d+)p|(\d+)k/)
|
|
72
|
+
const res = match ? match[0] : v.mediaQuality
|
|
94
73
|
return {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
74
|
+
id: i + 1,
|
|
75
|
+
type: v.type,
|
|
76
|
+
quality: res,
|
|
77
|
+
label: `${v.mediaExtension?.toUpperCase()} - ${v.mediaQuality}`,
|
|
78
|
+
size: v.mediaFileSize,
|
|
79
|
+
sizeB: parseFileSize(v.mediaFileSize),
|
|
80
|
+
mediaUrl: v.mediaUrl,
|
|
81
|
+
duration: v.mediaDuration,
|
|
103
82
|
}
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
return { api, qualities }
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export async function ytDownload(url, type = 'video', quality = '360p') {
|
|
89
|
+
url = normalizeYT(url)
|
|
90
|
+
|
|
91
|
+
const { api, qualities } = await fetchYtdownto(url)
|
|
92
|
+
|
|
93
|
+
const isAudio = type === 'mp3' || type === 'audio'
|
|
94
|
+
const targetQ = quality.toLowerCase()
|
|
95
|
+
|
|
96
|
+
const filtered = isAudio
|
|
97
|
+
? qualities.filter(v => v.type === 'audio' || v.quality?.includes('k'))
|
|
98
|
+
: qualities.filter(v => v.type === 'video' || v.quality?.includes('p'))
|
|
99
|
+
|
|
100
|
+
if (!filtered.length) {
|
|
101
|
+
const disponibles = qualities.map(v => v.quality).filter(Boolean).join(', ')
|
|
102
|
+
throw new Error(`Calidad ${quality} no disponible. Disponibles: ${disponibles}`)
|
|
104
103
|
}
|
|
105
104
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
.filter(
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
.sort((a, b) =>
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
quality: best.quality, width: best.width, height: best.height,
|
|
120
|
-
fps: best.fps, title: info.title, author: info.author,
|
|
121
|
-
thumbnail: info.thumbnail, duration: info.duration,
|
|
122
|
-
note: 'solo_video_sin_audio',
|
|
105
|
+
let selected
|
|
106
|
+
if (isAudio) {
|
|
107
|
+
const mp3s = filtered.filter(v => v.label?.toLowerCase().includes('mp3'))
|
|
108
|
+
const pool = mp3s.length ? mp3s : filtered
|
|
109
|
+
const exact = pool.find(v => v.quality?.toLowerCase() === targetQ)
|
|
110
|
+
if (exact) {
|
|
111
|
+
selected = exact
|
|
112
|
+
} else {
|
|
113
|
+
selected = pool.sort((a, b) => {
|
|
114
|
+
const qa = parseInt(a.quality) || 0
|
|
115
|
+
const qb = parseInt(b.quality) || 0
|
|
116
|
+
return qb - qa
|
|
117
|
+
})[0]
|
|
123
118
|
}
|
|
119
|
+
} else {
|
|
120
|
+
selected = filtered.find(v => v.quality?.toLowerCase() === targetQ) || filtered[0]
|
|
124
121
|
}
|
|
125
122
|
|
|
126
|
-
|
|
123
|
+
if (!selected) {
|
|
124
|
+
const disponibles = qualities.map(v => v.quality).filter(Boolean).join(', ')
|
|
125
|
+
throw new Error(`Calidad ${quality} no disponible. Disponibles: ${disponibles}`)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const dlUrl = await waitForDownload(selected.mediaUrl)
|
|
129
|
+
|
|
127
130
|
return {
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
131
|
+
title: api.title,
|
|
132
|
+
uploader: api.userInfo?.name || '',
|
|
133
|
+
views: api.mediaStats?.viewsCount || '',
|
|
134
|
+
thumb: api.imagePreviewUrl || '',
|
|
135
|
+
type: isAudio ? 'audio' : 'video',
|
|
136
|
+
quality: selected.quality,
|
|
137
|
+
size: selected.size,
|
|
138
|
+
sizeB: selected.sizeB,
|
|
139
|
+
duration: selected.duration,
|
|
140
|
+
url: dlUrl,
|
|
132
141
|
}
|
|
133
142
|
}
|
|
134
143
|
|
|
144
|
+
export async function ytInfo(url) {
|
|
145
|
+
url = normalizeYT(url)
|
|
146
|
+
const { api, qualities } = await fetchYtdownto(url)
|
|
147
|
+
const duration = qualities[0]?.duration || ''
|
|
148
|
+
const id = url.match(/youtu\.be\/([a-zA-Z0-9_-]{11})/)?.[1] || ''
|
|
149
|
+
return {
|
|
150
|
+
id,
|
|
151
|
+
title: api.title,
|
|
152
|
+
uploader: api.userInfo?.name || '',
|
|
153
|
+
views: api.mediaStats?.viewsCount || '',
|
|
154
|
+
thumb: api.imagePreviewUrl || '',
|
|
155
|
+
duration,
|
|
156
|
+
qualities,
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const INNERTUBE_URL = 'https://www.youtube.com/youtubei/v1'
|
|
161
|
+
let _config = null
|
|
162
|
+
|
|
163
|
+
async function getConfig() {
|
|
164
|
+
if (_config) return _config
|
|
165
|
+
const res = await axios.get('https://www.youtube.com/', {
|
|
166
|
+
headers: {
|
|
167
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36',
|
|
168
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
169
|
+
'Accept': 'text/html,application/xhtml+xml',
|
|
170
|
+
},
|
|
171
|
+
timeout: 15000,
|
|
172
|
+
})
|
|
173
|
+
const html = res.data
|
|
174
|
+
const key = html.match(/"INNERTUBE_API_KEY"\s*:\s*"([^"]+)"/)?.[1] || 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM394'
|
|
175
|
+
const visitorData = html.match(/"visitorData"\s*:\s*"([^"]+)"/)?.[1] || ''
|
|
176
|
+
const clientVersion = html.match(/"clientVersion"\s*:\s*"([^"]+)"/)?.[1] || '2.20240101.00.00'
|
|
177
|
+
_config = { key, visitorData, clientVersion }
|
|
178
|
+
return _config
|
|
179
|
+
}
|
|
180
|
+
|
|
135
181
|
export async function ytSearch(query, limit = 5) {
|
|
136
182
|
const cfg = await getConfig()
|
|
137
183
|
const res = await axios.post(
|
|
@@ -149,11 +195,11 @@ export async function ytSearch(query, limit = 5) {
|
|
|
149
195
|
},
|
|
150
196
|
{
|
|
151
197
|
headers: {
|
|
152
|
-
'User-Agent':
|
|
153
|
-
'X-YouTube-Client-Name':
|
|
198
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36',
|
|
199
|
+
'X-YouTube-Client-Name': '1',
|
|
154
200
|
'X-YouTube-Client-Version': cfg.clientVersion,
|
|
155
|
-
'Content-Type':
|
|
156
|
-
'X-Goog-Visitor-Id':
|
|
201
|
+
'Content-Type': 'application/json',
|
|
202
|
+
'X-Goog-Visitor-Id': cfg.visitorData,
|
|
157
203
|
},
|
|
158
204
|
timeout: 15000,
|
|
159
205
|
}
|
package/src/search/giphy.js
CHANGED
|
@@ -13,24 +13,79 @@ const HEADERS = {
|
|
|
13
13
|
|
|
14
14
|
const TENOR_KEY = 'AIzaSyAyimkuYQYF_FXVALexPuGQctUWRURdCYQ'
|
|
15
15
|
|
|
16
|
+
function mapItem(g) {
|
|
17
|
+
return {
|
|
18
|
+
id: g.id,
|
|
19
|
+
title: g.title || g.content_description || '',
|
|
20
|
+
url: g.itemurl || '',
|
|
21
|
+
gif: g.media_formats?.gif?.url || g.media_formats?.mediumgif?.url || '',
|
|
22
|
+
preview: g.media_formats?.nanogif?.url || g.media_formats?.tinygif?.url || '',
|
|
23
|
+
mp4: g.media_formats?.mp4?.url || g.media_formats?.loopedmp4?.url || '',
|
|
24
|
+
width: g.media_formats?.gif?.dims?.[0] || 0,
|
|
25
|
+
height: g.media_formats?.gif?.dims?.[1] || 0,
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const _gifCache = new Map()
|
|
30
|
+
|
|
16
31
|
export async function giphy(query, limit = 5, type = 'search') {
|
|
17
32
|
const endpoint = type === 'trending'
|
|
18
33
|
? `https://tenor.googleapis.com/v2/featured?key=${TENOR_KEY}&limit=${limit}&media_filter=gif`
|
|
19
|
-
: `https://tenor.googleapis.com/v2/search?q=${encodeURIComponent(query)}&key=${TENOR_KEY}&limit=${limit}&media_filter=gif`
|
|
34
|
+
: `https://tenor.googleapis.com/v2/search?q=${encodeURIComponent(query)}&key=${TENOR_KEY}&limit=${limit}&media_filter=gif,mp4`
|
|
20
35
|
|
|
21
36
|
const res = await axios.get(endpoint, { headers: HEADERS, timeout: 15000 })
|
|
22
37
|
const data = res.data?.results || []
|
|
23
|
-
|
|
24
38
|
if (!data.length) throw new Error('Sin resultados en Tenor')
|
|
39
|
+
return data.map(mapItem)
|
|
40
|
+
}
|
|
25
41
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
42
|
+
export async function gifSearch(query) {
|
|
43
|
+
const cacheKey = `gif:${query}`
|
|
44
|
+
let cached = _gifCache.get(cacheKey)
|
|
45
|
+
if (!cached) {
|
|
46
|
+
cached = { items: [], next: null, done: false }
|
|
47
|
+
_gifCache.set(cacheKey, cached)
|
|
48
|
+
setTimeout(() => _gifCache.delete(cacheKey), 1000 * 60 * 20)
|
|
49
|
+
}
|
|
50
|
+
return cached
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export async function gifNext(query) {
|
|
54
|
+
const cached = await gifSearch(query)
|
|
55
|
+
if (cached.done && cached.items.length === 0) throw new Error('Sin resultados')
|
|
56
|
+
|
|
57
|
+
if (!cached.done) {
|
|
58
|
+
const params = {
|
|
59
|
+
q: query,
|
|
60
|
+
key: TENOR_KEY,
|
|
61
|
+
limit: 20,
|
|
62
|
+
media_filter: 'gif',
|
|
63
|
+
}
|
|
64
|
+
if (cached.next) params.pos = cached.next
|
|
65
|
+
params.media_filter = 'gif,mp4'
|
|
66
|
+
|
|
67
|
+
const res = await axios.get('https://tenor.googleapis.com/v2/search', { params, headers: HEADERS, timeout: 15000 })
|
|
68
|
+
const data = res.data?.results || []
|
|
69
|
+
cached.next = res.data?.next || null
|
|
70
|
+
if (!cached.next || !data.length) cached.done = true
|
|
71
|
+
cached.items.push(...data.map(mapItem))
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (!cached.items.length) throw new Error('Sin resultados')
|
|
75
|
+
|
|
76
|
+
const item = cached.items.shift()
|
|
77
|
+
return item
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export async function giphyBuffer(query) {
|
|
81
|
+
const item = await gifNext(query)
|
|
82
|
+
const url = item.mp4 || item.gif
|
|
83
|
+
if (!url) throw new Error('Sin URL de descarga')
|
|
84
|
+
const res = await axios.get(url, { responseType: 'arraybuffer', headers: HEADERS, timeout: 20000 })
|
|
85
|
+
return {
|
|
86
|
+
buffer: Buffer.from(res.data),
|
|
87
|
+
mimetype: item.mp4 ? 'video/mp4' : 'image/gif',
|
|
88
|
+
title: item.title,
|
|
89
|
+
url,
|
|
90
|
+
}
|
|
36
91
|
}
|
package/src/search/index.js
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
* Deja los créditos we 🗣️
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
export { ytSearch }
|
|
9
|
-
export { googleSearch }
|
|
10
|
-
export { spotify }
|
|
11
|
-
export { giphy }
|
|
12
|
-
export { pinsearch, pinimg, pinvid }
|
|
13
|
-
export { stickerSearch }
|
|
14
|
-
export { animeImage }
|
|
15
|
-
export { wallpaperSearch }
|
|
8
|
+
export { ytSearch } from './youtube.js'
|
|
9
|
+
export { googleSearch } from './google.js'
|
|
10
|
+
export { spotify } from './spotify.js'
|
|
11
|
+
export { giphy, gifNext, giphyBuffer } from './giphy.js'
|
|
12
|
+
export { pinsearch, pinimg, pinvid } from './pinterest.js'
|
|
13
|
+
export { stickerSearch } from './stickersearch.js'
|
|
14
|
+
export { animeImage } from './anime.js'
|
|
15
|
+
export { wallpaperSearch } from './wallpaper.js'
|