@soyaxell09/zenbot-scraper 1.0.12 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,15 +64,21 @@ export async function pinvid(input, limit = 5) {
64
64
  return vids.map((v, i) => ({ index: i + 1, ...v }))
65
65
  }
66
66
 
67
- export async function pinsearch(query, limit = 10) {
68
- const res = await axios.get(
69
- `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}`,
70
- { headers: HEADERS, timeout: 15000 }
71
- )
72
- const $ = cheerio.load(res.data)
73
- const seen = new Set()
74
- const imgs = []
75
-
67
+ function extractFromHtml(html) {
68
+ const $ = cheerio.load(html)
69
+ const pinIds = []
70
+ const seenIds = new Set()
71
+ $('a[href*="/pin/"]').each((_, el) => {
72
+ const m = ($(el).attr('href') || '').match(/\/pin\/(\d+)/)
73
+ if (m && !seenIds.has(m[1])) { seenIds.add(m[1]); pinIds.push(m[1]) }
74
+ })
75
+ if (!pinIds.length) {
76
+ for (const m of html.matchAll(/"id"\s*:\s*"(\d{15,})"/g)) {
77
+ if (!seenIds.has(m[1])) { seenIds.add(m[1]); pinIds.push(m[1]) }
78
+ }
79
+ }
80
+ const imgList = []
81
+ const seenImgs = new Set()
76
82
  $('img').each((_, el) => {
77
83
  const src = $(el).attr('src') || ''
78
84
  const srcset = $(el).attr('srcset') || ''
@@ -80,22 +86,127 @@ export async function pinsearch(query, limit = 10) {
80
86
  for (const s of sources) {
81
87
  if (!isValidPin(s)) continue
82
88
  const high = toOriginal(s)
83
- if (!seen.has(high)) { seen.add(high); imgs.push(high) }
89
+ if (!seenImgs.has(high)) { seenImgs.add(high); imgList.push(high) }
84
90
  }
85
91
  })
86
-
87
92
  $('[style]').each((_, el) => {
88
- const style = $(el).attr('style') || ''
89
- const m = style.match(/url\(['"]?(https:\/\/i\.pinimg\.com[^'")\s]+)['"]?\)/)
93
+ const m = ($(el).attr('style') || '').match(/url\(['"]?(https:\/\/i\.pinimg\.com[^'")\.s]+)['"]?\)/)
90
94
  if (m && isValidPin(m[1])) {
91
95
  const high = toOriginal(m[1])
92
- if (!seen.has(high)) { seen.add(high); imgs.push(high) }
96
+ if (!seenImgs.has(high)) { seenImgs.add(high); imgList.push(high) }
93
97
  }
94
98
  })
99
+ return { pinIds, imgList }
100
+ }
101
+
102
+ const _pinSearchCache = new Map()
103
+
104
+ export async function pinsearch(query, limit = 50) {
105
+ const cacheKey = `search:${query}`
106
+ let cached = _pinSearchCache.get(cacheKey)
107
+
108
+ if (!cached) {
109
+ cached = { items: [], page: 0, bookmark: null, done: false }
110
+ _pinSearchCache.set(cacheKey, cached)
111
+ setTimeout(() => _pinSearchCache.delete(cacheKey), 1000 * 60 * 30)
112
+ }
113
+
114
+ const seenIds = new Set(cached.items.map(i => i.pinId).filter(Boolean))
115
+
116
+ while (cached.items.length < limit && !cached.done) {
117
+ try {
118
+ let html
119
+ if (cached.page === 0) {
120
+ const res = await axios.get(
121
+ `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}`,
122
+ { headers: HEADERS, timeout: 15000 }
123
+ )
124
+ html = res.data
125
+ const bmMatch = html.match(/"bookmark"\s*:\s*"([^"]+)"/)
126
+ cached.bookmark = bmMatch ? bmMatch[1] : null
127
+ } else if (cached.bookmark) {
128
+ const res = await axios.get('https://www.pinterest.com/resource/SearchResource/get/', {
129
+ params: {
130
+ source_url: `/search/pins/?q=${encodeURIComponent(query)}`,
131
+ data: JSON.stringify({
132
+ options: { query, scope: 'pins', bookmarks: [cached.bookmark], page_size: 25 },
133
+ context: {}
134
+ }),
135
+ _: Date.now()
136
+ },
137
+ headers: {
138
+ ...HEADERS,
139
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36',
140
+ 'X-Requested-With': 'XMLHttpRequest',
141
+ 'Accept': 'application/json',
142
+ 'Referer': `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}`,
143
+ },
144
+ timeout: 15000
145
+ })
146
+ const apiData = res.data
147
+ cached.bookmark = apiData?.resource_response?.bookmark || null
148
+ if (!cached.bookmark) cached.done = true
95
149
 
96
- const result = imgs.slice(0, limit)
97
- if (!result.length) throw new Error('Sin resultados en Pinterest')
98
- return result.map((url, i) => ({ index: i + 1, image: url, url }))
150
+ const newIds = (apiData?.resource_response?.data || [])
151
+ .filter(p => p.id && !seenIds.has(p.id))
152
+ .map(p => {
153
+ seenIds.add(p.id)
154
+ const img = p.images?.orig?.url || p.images?.['736x']?.url || p.image_signature
155
+ ? `https://i.pinimg.com/originals/${p.image_signature?.replace(/(..)(..)(..)(.+)/, '$1/$2/$3/$4')}.jpg`
156
+ : null
157
+ return img ? { index: cached.items.length + 1, image: img, url: `https://www.pinterest.com/pin/${p.id}/`, pinId: p.id } : null
158
+ })
159
+ .filter(Boolean)
160
+ cached.items.push(...newIds)
161
+ cached.page++
162
+ continue
163
+ } else {
164
+ cached.done = true
165
+ break
166
+ }
167
+
168
+ const { pinIds, imgList } = extractFromHtml(html)
169
+ for (let i = 0; i < imgList.length; i++) {
170
+ const pid = pinIds[i]
171
+ if (pid && seenIds.has(pid)) continue
172
+ if (pid) seenIds.add(pid)
173
+ cached.items.push({
174
+ index: cached.items.length + 1,
175
+ image: imgList[i],
176
+ url: pid ? `https://www.pinterest.com/pin/${pid}/` : imgList[i],
177
+ pinId: pid || null
178
+ })
179
+ }
180
+ cached.page++
181
+ } catch { cached.done = true; break }
182
+ }
183
+
184
+ if (!cached.items.length) throw new Error('Sin resultados en Pinterest')
185
+ return cached.items.slice(0, limit)
186
+ }
187
+
188
+ function extractPinData(html) {
189
+ const $ = cheerio.load(html)
190
+ let found = null
191
+ $('script').each((_, el) => {
192
+ const txt = $(el).html() || ''
193
+ const idx = txt.indexOf('"v3GetPinQuery')
194
+ if (idx !== -1 && !found) {
195
+ const jsonStart = txt.lastIndexOf('{', idx)
196
+ let depth = 0, end = -1
197
+ for (let i = jsonStart; i < txt.length; i++) {
198
+ if (txt[i] === '{') depth++
199
+ else if (txt[i] === '}') { depth--; if (depth === 0) { end = i; break } }
200
+ }
201
+ if (end !== -1) {
202
+ try { found = JSON.parse(txt.slice(jsonStart, end + 1)) } catch {}
203
+ }
204
+ }
205
+ })
206
+ const key = found ? Object.keys(found)[0] : null
207
+ const pin = key ? found[key]?.data : null
208
+ if (!pin || found[key]?.__typename === 'PinNotFound') return null
209
+ return pin
99
210
  }
100
211
 
101
212
  export async function pinimg(input, limit = 5) {
@@ -108,30 +219,60 @@ export async function pinimg(input, limit = 5) {
108
219
  const r = await axios.get(input, { headers: HEADERS, maxRedirects: 5, timeout: 10000 })
109
220
  resolvedUrl = r.request?.res?.responseUrl || r.config?.url || input
110
221
  }
111
- const url = resolvedUrl
112
222
 
113
- const res = await axios.get(url, { headers: HEADERS, timeout: 15000 })
223
+ const res = await axios.get(resolvedUrl, {
224
+ headers: { ...HEADERS, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36' },
225
+ timeout: 15000
226
+ })
114
227
  const html = res.data
115
- const $ = cheerio.load(html)
116
- const seen = new Set()
117
- const imgs = []
228
+ const pin = extractPinData(html)
118
229
 
119
- const jsonMatch = html.match(/"orig"\s*:\s*\{"url"\s*:\s*"([^"]+)"/)
120
- if (jsonMatch) {
121
- const u = jsonMatch[1].replace(/\\u002F/g, '/').replace(/\\\//g, '/')
122
- if (u.includes('i.pinimg.com')) { seen.add(u); imgs.push(u) }
123
- }
230
+ const cleanUrl = resolvedUrl.split('?')[0].split('/sent/')[0]
231
+ const id = cleanUrl.match(/\/pin\/(\d+)/)?.[1] || ''
124
232
 
125
- const PIN_IMG_RE = /"url"\s*:\s*"(https:\\?\/\\?\/i\.pinimg\.com[^"]+)"/g
126
- const pinMatches = [...html.matchAll(PIN_IMG_RE)]
127
- for (const m of pinMatches) {
128
- const u = m[1].replace(/\\u002F/g, '/').replace(/\\\//g, '/').replace(/\\/g, '')
129
- if (isValidPin(u)) {
130
- const high = toOriginal(u)
131
- if (!seen.has(high)) { seen.add(high); imgs.push(high) }
233
+ if (pin) {
234
+ const creator = pin.nativeCreator || pin.closeupAttribution || pin.originPinner || {}
235
+ const tags = (pin.pinJoin?.seoBreadcrumbs || []).map(b => b.name).filter(Boolean)
236
+ const saves = pin.aggregatedPinData?.aggregatedStats?.saves ?? null
237
+ const board = pin.board?.url ? `https://www.pinterest.com${pin.board.url}` : null
238
+
239
+ return {
240
+ id,
241
+ title: pin.title || pin.seoTitle || '',
242
+ description: pin.description?.trim() || pin.seoAltText || pin.gridDescription?.trim() || '',
243
+ altText: pin.seoAltText || '',
244
+ image: pin.images_orig?.url || pin.imageLargeUrl || pin.images_736x?.url || '',
245
+ images: {
246
+ orig: pin.images_orig?.url || pin.imageLargeUrl || '',
247
+ '736': pin.images_736x?.url || '',
248
+ '474': pin.images_474x?.url || '',
249
+ '236': pin.images_236x?.url || '',
250
+ '136': pin.images_136x136?.url || '',
251
+ },
252
+ width: pin.images_474x?.width || pin.images_736x?.width || 0,
253
+ height: pin.images_474x?.height || pin.images_736x?.height || 0,
254
+ dominantColor: pin.dominantColor || '',
255
+ saves: saves,
256
+ repins: pin.repinCount ?? 0,
257
+ createdAt: pin.createdAt || '',
258
+ tags,
259
+ domain: pin.domain || '',
260
+ link: pin.link || '',
261
+ board,
262
+ creator: {
263
+ username: creator.username || '',
264
+ fullName: creator.fullName || creator.full_name || '',
265
+ },
266
+ pinner: {
267
+ username: pin.pinner?.username || '',
268
+ },
269
+ url: cleanUrl,
132
270
  }
133
271
  }
134
272
 
273
+ const $ = cheerio.load(html)
274
+ const seen = new Set()
275
+ const imgs = []
135
276
  $('img').each((_, el) => {
136
277
  const src = $(el).attr('src') || ''
137
278
  const srcset = $(el).attr('srcset') || ''
@@ -142,21 +283,16 @@ export async function pinimg(input, limit = 5) {
142
283
  if (!seen.has(high)) { seen.add(high); imgs.push(high) }
143
284
  }
144
285
  })
145
-
146
- const ogImg = $('meta[property="og:image"]').attr('content') || ''
147
- const ogTitle = $('meta[property="og:title"]').attr('content') || ''
148
- const ogDesc = $('meta[property="og:description"]').attr('content') || ''
149
- if (ogImg && !seen.has(ogImg)) imgs.push(ogImg)
150
-
151
286
  const unique = [...new Set(imgs)].filter(Boolean)
152
287
  if (!unique.length) throw new Error('No se pudo extraer la imagen del pin')
153
288
 
154
289
  return {
155
- id: resolvedUrl.match(/\/pin\/(\d+)/)?.[1] || '',
156
- title: ogTitle,
157
- description: ogDesc,
158
- image: unique[0],
159
- images: unique,
160
- url: resolvedUrl,
290
+ id,
291
+ title: '', description: '', altText: '', image: unique[0],
292
+ images: { orig: unique[0], '736': '', '474': '', '236': unique[0], '136': '' },
293
+ width: 0, height: 0, dominantColor: '', saves: null, repins: 0,
294
+ createdAt: '', tags: [], domain: '', link: '', board: null,
295
+ creator: { username: '', fullName: '' }, pinner: { username: '' },
296
+ url: cleanUrl,
161
297
  }
162
298
  }
@@ -31,7 +31,7 @@ async function searchTracks(query, limit = 5) {
31
31
  }))
32
32
  }
33
33
 
34
- async function searchAlbums(query, limit = 5) {
34
+ async function searchAlbums(query, limit = 12) {
35
35
  const res = await axios.get(
36
36
  `https://api.deezer.com/search/album?q=${encodeURIComponent(query)}&limit=${limit}`,
37
37
  { headers: HEADERS, timeout: 15000 }
@@ -1,98 +0,0 @@
1
- /*
2
- * © Created by AxelDev09 🔥
3
- * GitHub: https://github.com/AxelDev09
4
- * Instagram: @axeldev09
5
- * Deja los créditos we 🗣️
6
- */
7
-
8
- import axios from 'axios';
9
- import * as cheerio from 'cheerio';
10
-
11
- const UA = 'Mozilla/5.0 (Linux; Android 11; Redmi Note 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36';
12
- const AJAX_URL = 'https://igsnapinsta.com/wp-admin/admin-ajax.php';
13
- const BASE_URL = 'https://igsnapinsta.com';
14
-
15
- function decodeUrl(encodedUrl) {
16
- try { return Buffer.from(encodedUrl, 'base64').toString('utf-8'); }
17
- catch { return encodedUrl; }
18
- }
19
-
20
- function parseItems(html) {
21
- const $ = cheerio.load(html);
22
- const items = [];
23
- const seen = new Set();
24
-
25
- function add(type, url) {
26
- const clean = url.replace(/&amp;/g, '&').trim();
27
- if (!clean || seen.has(clean)) return;
28
- seen.add(clean);
29
- items.push({ type, url: clean });
30
- }
31
-
32
- $('source[src]').each((_, el) => {
33
- const src = $(el).attr('src');
34
- if (src) add('video', src);
35
- });
36
-
37
- $('a[href]').each((_, el) => {
38
- const href = $(el).attr('href') || '';
39
- if (!href.includes('kdnsd/v1/download')) return;
40
- const b64 = href.split('url=')[1] || '';
41
- const decoded = decodeUrl(decodeURIComponent(b64));
42
- const type = decoded.includes('.mp4') ? 'video' : 'image';
43
- add(type, href.replace(/&amp;/g, '&'));
44
- });
45
-
46
- $('img[src]').each((_, el) => {
47
- const src = $(el).attr('src') || '';
48
- if (src.includes('kdnsd/v1/download') || src.includes('cdninstagram') || src.includes('fbcdn'))
49
- add('image', src);
50
- });
51
-
52
- return items;
53
- }
54
-
55
- function detectType(url) {
56
- if (url.includes('/reel/')) return 'reel';
57
- if (url.includes('/p/')) return 'post';
58
- if (url.includes('/stories/')) return 'story';
59
- if (url.includes('/tv/')) return 'video';
60
- const path = new URL(url).pathname.replace(/\/$/, '');
61
- if (path.split('/').length === 2) return 'profile';
62
- return 'post';
63
- }
64
-
65
- export async function igDownload(url) {
66
- if (!url.includes('instagram.com'))
67
- throw new Error('URL inválida. Debe ser un link de Instagram.');
68
-
69
- const { data } = await axios.post(
70
- AJAX_URL,
71
- new URLSearchParams({ action: 'kdnsd_get_instagram_video', url }),
72
- {
73
- headers: {
74
- 'User-Agent': UA,
75
- 'Content-Type': 'application/x-www-form-urlencoded',
76
- 'Referer': `${BASE_URL}/es/`,
77
- 'Origin': BASE_URL,
78
- 'X-Requested-With': 'XMLHttpRequest',
79
- },
80
- timeout: 20000
81
- }
82
- );
83
-
84
- if (!data?.success || !data?.data?.html) {
85
- const html = data?.data?.html || '';
86
- if (html.includes('private') || html.includes('privado') || data?.data?.message?.includes('private'))
87
- throw new Error('Perfil privado. Solo se puede descargar contenido de perfiles públicos.');
88
- throw new Error('No se pudo obtener el contenido. Verificá que el perfil/post sea público.');
89
- }
90
-
91
- const type = detectType(url);
92
- const items = parseItems(data.data.html);
93
-
94
- if (!items.length)
95
- throw new Error('No se encontró contenido descargable.');
96
-
97
- return { type, items };
98
- }
@@ -1,131 +0,0 @@
1
- // Créditos a FG-error
2
- import axios from 'axios'
3
-
4
- const delay = ms => new Promise(r => setTimeout(r, ms))
5
-
6
- function parseFileSize(size) {
7
- if (!size) return 0
8
- const units = { B: 1, KB: 1024, MB: 1024 ** 2, GB: 1024 ** 3, TB: 1024 ** 4 }
9
- const match = size.toString().trim().match(/([\d.]+)\s*(B|KB|MB|GB|TB)/i)
10
- if (!match) return 0
11
- return Math.round(parseFloat(match[1]) * (units[match[2].toUpperCase()] || 1))
12
- }
13
-
14
- function formatFileSize(bytes) {
15
- if (!bytes || isNaN(bytes)) return '0 B'
16
- const units = ['B', 'KB', 'MB', 'GB', 'TB']
17
- let i = 0
18
- while (bytes >= 1024 && i < units.length - 1) { bytes /= 1024; i++ }
19
- return `${bytes.toFixed(1).replace(/\.0$/, '')} ${units[i]}`
20
- }
21
-
22
- export async function getFileSizeV2(url) {
23
- try {
24
- const res = await axios.head(url, { timeout: 10000 })
25
- const bytes = parseInt(res.headers['content-length'] || 0)
26
- return formatFileSize(bytes)
27
- } catch { return '0 B' }
28
- }
29
-
30
- function normalizeYT(url) {
31
- try {
32
- const u = new URL(url)
33
- if (u.hostname.includes('youtu.be')) return url
34
- if (u.hostname.includes('youtube.com')) {
35
- if (u.pathname.includes('/watch')) return `https://youtu.be/${u.searchParams.get('v')}`
36
- if (u.pathname.includes('/shorts/')) return `https://youtu.be/${u.pathname.split('/shorts/')[1]}`
37
- if (u.pathname.includes('/embed/')) return `https://youtu.be/${u.pathname.split('/embed/')[1]}`
38
- }
39
- return url
40
- } catch { return url }
41
- }
42
-
43
- async function waitForDownload(mediaUrl) {
44
- for (let i = 0; i < 15; i++) {
45
- try {
46
- const { data } = await axios.get(mediaUrl, { timeout: 15000 })
47
- if (data?.percent === 'Completed' && data?.fileUrl && data.fileUrl !== 'In Processing...')
48
- return data.fileUrl
49
- } catch {}
50
- await delay(4000)
51
- }
52
- throw new Error('No se pudo generar el enlace de descarga')
53
- }
54
-
55
- async function fetchYtdownto(url) {
56
- const { data } = await axios.post(
57
- 'https://app.ytdown.to/proxy.php',
58
- new URLSearchParams({ url }).toString(),
59
- {
60
- headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
61
- timeout: 20000,
62
- }
63
- )
64
- const api = data?.api
65
- if (!api) throw new Error('No se pudo obtener información del video')
66
- if (api.status === 'ERROR') throw new Error(api.message)
67
-
68
- const qualities = (api.mediaItems || []).map((v, i) => {
69
- const match = v?.mediaUrl?.match(/(\d+)p|(\d+)k/)
70
- const res = match ? match[0] : v.mediaQuality
71
- return {
72
- id: i + 1,
73
- type: v.type,
74
- quality: res,
75
- label: `${v.mediaExtension?.toUpperCase()} - ${v.mediaQuality}`,
76
- size: v.mediaFileSize,
77
- sizeB: parseFileSize(v.mediaFileSize),
78
- mediaUrl: v.mediaUrl,
79
- duration: v.mediaDuration,
80
- }
81
- })
82
-
83
- return { api, qualities }
84
- }
85
-
86
- export async function ytDownloadV2(url, type = 'video', quality = '360p') {
87
- url = normalizeYT(url)
88
-
89
- const { api, qualities } = await fetchYtdownto(url)
90
-
91
- const isAudio = type === 'mp3' || type === 'audio'
92
- const targetQ = quality.toLowerCase()
93
-
94
- const filtered = isAudio
95
- ? qualities.filter(v => v.type === 'audio' || v.quality?.includes('k'))
96
- : qualities.filter(v => v.type === 'video' || v.quality?.includes('p'))
97
-
98
- const selected = filtered.find(v => v.quality?.toLowerCase() === targetQ) || filtered[0]
99
-
100
- if (!selected) {
101
- const disponibles = qualities.map(v => v.quality).filter(Boolean).join(', ')
102
- throw new Error(`Calidad ${quality} no disponible. Disponibles: ${disponibles}`)
103
- }
104
-
105
- const dlUrl = await waitForDownload(selected.mediaUrl)
106
-
107
- return {
108
- title: api.title,
109
- uploader: api.userInfo?.name || '',
110
- views: api.mediaStats?.viewsCount || '',
111
- thumb: api.imagePreviewUrl || '',
112
- type: isAudio ? 'audio' : 'video',
113
- quality: selected.quality,
114
- size: selected.size,
115
- sizeB: selected.sizeB,
116
- duration: selected.duration,
117
- url: dlUrl,
118
- }
119
- }
120
-
121
- export async function ytInfoV2(url) {
122
- url = normalizeYT(url)
123
- const { api, qualities } = await fetchYtdownto(url)
124
- return {
125
- title: api.title,
126
- uploader: api.userInfo?.name || '',
127
- views: api.mediaStats?.viewsCount || '',
128
- thumb: api.imagePreviewUrl || '',
129
- qualities,
130
- }
131
- }