@soyaxell09/zenbot-scraper 1.0.13 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,15 +64,21 @@ export async function pinvid(input, limit = 5) {
64
64
  return vids.map((v, i) => ({ index: i + 1, ...v }))
65
65
  }
66
66
 
67
- export async function pinsearch(query, limit = 10) {
68
- const res = await axios.get(
69
- `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}`,
70
- { headers: HEADERS, timeout: 15000 }
71
- )
72
- const $ = cheerio.load(res.data)
73
- const seen = new Set()
74
- const imgs = []
75
-
67
+ function extractFromHtml(html) {
68
+ const $ = cheerio.load(html)
69
+ const pinIds = []
70
+ const seenIds = new Set()
71
+ $('a[href*="/pin/"]').each((_, el) => {
72
+ const m = ($(el).attr('href') || '').match(/\/pin\/(\d+)/)
73
+ if (m && !seenIds.has(m[1])) { seenIds.add(m[1]); pinIds.push(m[1]) }
74
+ })
75
+ if (!pinIds.length) {
76
+ for (const m of html.matchAll(/"id"\s*:\s*"(\d{15,})"/g)) {
77
+ if (!seenIds.has(m[1])) { seenIds.add(m[1]); pinIds.push(m[1]) }
78
+ }
79
+ }
80
+ const imgList = []
81
+ const seenImgs = new Set()
76
82
  $('img').each((_, el) => {
77
83
  const src = $(el).attr('src') || ''
78
84
  const srcset = $(el).attr('srcset') || ''
@@ -80,22 +86,127 @@ export async function pinsearch(query, limit = 10) {
80
86
  for (const s of sources) {
81
87
  if (!isValidPin(s)) continue
82
88
  const high = toOriginal(s)
83
- if (!seen.has(high)) { seen.add(high); imgs.push(high) }
89
+ if (!seenImgs.has(high)) { seenImgs.add(high); imgList.push(high) }
84
90
  }
85
91
  })
86
-
87
92
  $('[style]').each((_, el) => {
88
- const style = $(el).attr('style') || ''
89
- const m = style.match(/url\(['"]?(https:\/\/i\.pinimg\.com[^'")\s]+)['"]?\)/)
93
+ const m = ($(el).attr('style') || '').match(/url\(['"]?(https:\/\/i\.pinimg\.com[^'")\.s]+)['"]?\)/)
90
94
  if (m && isValidPin(m[1])) {
91
95
  const high = toOriginal(m[1])
92
- if (!seen.has(high)) { seen.add(high); imgs.push(high) }
96
+ if (!seenImgs.has(high)) { seenImgs.add(high); imgList.push(high) }
93
97
  }
94
98
  })
99
+ return { pinIds, imgList }
100
+ }
101
+
102
+ const _pinSearchCache = new Map()
103
+
104
+ export async function pinsearch(query, limit = 50) {
105
+ const cacheKey = `search:${query}`
106
+ let cached = _pinSearchCache.get(cacheKey)
107
+
108
+ if (!cached) {
109
+ cached = { items: [], page: 0, bookmark: null, done: false }
110
+ _pinSearchCache.set(cacheKey, cached)
111
+ setTimeout(() => _pinSearchCache.delete(cacheKey), 1000 * 60 * 30)
112
+ }
113
+
114
+ const seenIds = new Set(cached.items.map(i => i.pinId).filter(Boolean))
115
+
116
+ while (cached.items.length < limit && !cached.done) {
117
+ try {
118
+ let html
119
+ if (cached.page === 0) {
120
+ const res = await axios.get(
121
+ `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}`,
122
+ { headers: HEADERS, timeout: 15000 }
123
+ )
124
+ html = res.data
125
+ const bmMatch = html.match(/"bookmark"\s*:\s*"([^"]+)"/)
126
+ cached.bookmark = bmMatch ? bmMatch[1] : null
127
+ } else if (cached.bookmark) {
128
+ const res = await axios.get('https://www.pinterest.com/resource/SearchResource/get/', {
129
+ params: {
130
+ source_url: `/search/pins/?q=${encodeURIComponent(query)}`,
131
+ data: JSON.stringify({
132
+ options: { query, scope: 'pins', bookmarks: [cached.bookmark], page_size: 25 },
133
+ context: {}
134
+ }),
135
+ _: Date.now()
136
+ },
137
+ headers: {
138
+ ...HEADERS,
139
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36',
140
+ 'X-Requested-With': 'XMLHttpRequest',
141
+ 'Accept': 'application/json',
142
+ 'Referer': `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}`,
143
+ },
144
+ timeout: 15000
145
+ })
146
+ const apiData = res.data
147
+ cached.bookmark = apiData?.resource_response?.bookmark || null
148
+ if (!cached.bookmark) cached.done = true
95
149
 
96
- const result = imgs.slice(0, limit)
97
- if (!result.length) throw new Error('Sin resultados en Pinterest')
98
- return result.map((url, i) => ({ index: i + 1, image: url, url }))
150
+ const newIds = (apiData?.resource_response?.data || [])
151
+ .filter(p => p.id && !seenIds.has(p.id))
152
+ .map(p => {
153
+ seenIds.add(p.id)
154
+ const img = p.images?.orig?.url || p.images?.['736x']?.url || p.image_signature
155
+ ? `https://i.pinimg.com/originals/${p.image_signature?.replace(/(..)(..)(..)(.+)/, '$1/$2/$3/$4')}.jpg`
156
+ : null
157
+ return img ? { index: cached.items.length + 1, image: img, url: `https://www.pinterest.com/pin/${p.id}/`, pinId: p.id } : null
158
+ })
159
+ .filter(Boolean)
160
+ cached.items.push(...newIds)
161
+ cached.page++
162
+ continue
163
+ } else {
164
+ cached.done = true
165
+ break
166
+ }
167
+
168
+ const { pinIds, imgList } = extractFromHtml(html)
169
+ for (let i = 0; i < imgList.length; i++) {
170
+ const pid = pinIds[i]
171
+ if (pid && seenIds.has(pid)) continue
172
+ if (pid) seenIds.add(pid)
173
+ cached.items.push({
174
+ index: cached.items.length + 1,
175
+ image: imgList[i],
176
+ url: pid ? `https://www.pinterest.com/pin/${pid}/` : imgList[i],
177
+ pinId: pid || null
178
+ })
179
+ }
180
+ cached.page++
181
+ } catch { cached.done = true; break }
182
+ }
183
+
184
+ if (!cached.items.length) throw new Error('Sin resultados en Pinterest')
185
+ return cached.items.slice(0, limit)
186
+ }
187
+
188
+ function extractPinData(html) {
189
+ const $ = cheerio.load(html)
190
+ let found = null
191
+ $('script').each((_, el) => {
192
+ const txt = $(el).html() || ''
193
+ const idx = txt.indexOf('"v3GetPinQuery')
194
+ if (idx !== -1 && !found) {
195
+ const jsonStart = txt.lastIndexOf('{', idx)
196
+ let depth = 0, end = -1
197
+ for (let i = jsonStart; i < txt.length; i++) {
198
+ if (txt[i] === '{') depth++
199
+ else if (txt[i] === '}') { depth--; if (depth === 0) { end = i; break } }
200
+ }
201
+ if (end !== -1) {
202
+ try { found = JSON.parse(txt.slice(jsonStart, end + 1)) } catch {}
203
+ }
204
+ }
205
+ })
206
+ const key = found ? Object.keys(found)[0] : null
207
+ const pin = key ? found[key]?.data : null
208
+ if (!pin || found[key]?.__typename === 'PinNotFound') return null
209
+ return pin
99
210
  }
100
211
 
101
212
  export async function pinimg(input, limit = 5) {
@@ -108,30 +219,60 @@ export async function pinimg(input, limit = 5) {
108
219
  const r = await axios.get(input, { headers: HEADERS, maxRedirects: 5, timeout: 10000 })
109
220
  resolvedUrl = r.request?.res?.responseUrl || r.config?.url || input
110
221
  }
111
- const url = resolvedUrl
112
222
 
113
- const res = await axios.get(url, { headers: HEADERS, timeout: 15000 })
223
+ const res = await axios.get(resolvedUrl, {
224
+ headers: { ...HEADERS, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36' },
225
+ timeout: 15000
226
+ })
114
227
  const html = res.data
115
- const $ = cheerio.load(html)
116
- const seen = new Set()
117
- const imgs = []
228
+ const pin = extractPinData(html)
118
229
 
119
- const jsonMatch = html.match(/"orig"\s*:\s*\{"url"\s*:\s*"([^"]+)"/)
120
- if (jsonMatch) {
121
- const u = jsonMatch[1].replace(/\\u002F/g, '/').replace(/\\\//g, '/')
122
- if (u.includes('i.pinimg.com')) { seen.add(u); imgs.push(u) }
123
- }
230
+ const cleanUrl = resolvedUrl.split('?')[0].split('/sent/')[0]
231
+ const id = cleanUrl.match(/\/pin\/(\d+)/)?.[1] || ''
124
232
 
125
- const PIN_IMG_RE = /"url"\s*:\s*"(https:\\?\/\\?\/i\.pinimg\.com[^"]+)"/g
126
- const pinMatches = [...html.matchAll(PIN_IMG_RE)]
127
- for (const m of pinMatches) {
128
- const u = m[1].replace(/\\u002F/g, '/').replace(/\\\//g, '/').replace(/\\/g, '')
129
- if (isValidPin(u)) {
130
- const high = toOriginal(u)
131
- if (!seen.has(high)) { seen.add(high); imgs.push(high) }
233
+ if (pin) {
234
+ const creator = pin.nativeCreator || pin.closeupAttribution || pin.originPinner || {}
235
+ const tags = (pin.pinJoin?.seoBreadcrumbs || []).map(b => b.name).filter(Boolean)
236
+ const saves = pin.aggregatedPinData?.aggregatedStats?.saves ?? null
237
+ const board = pin.board?.url ? `https://www.pinterest.com${pin.board.url}` : null
238
+
239
+ return {
240
+ id,
241
+ title: pin.title || pin.seoTitle || '',
242
+ description: pin.description?.trim() || pin.seoAltText || pin.gridDescription?.trim() || '',
243
+ altText: pin.seoAltText || '',
244
+ image: pin.images_orig?.url || pin.imageLargeUrl || pin.images_736x?.url || '',
245
+ images: {
246
+ orig: pin.images_orig?.url || pin.imageLargeUrl || '',
247
+ '736': pin.images_736x?.url || '',
248
+ '474': pin.images_474x?.url || '',
249
+ '236': pin.images_236x?.url || '',
250
+ '136': pin.images_136x136?.url || '',
251
+ },
252
+ width: pin.images_474x?.width || pin.images_736x?.width || 0,
253
+ height: pin.images_474x?.height || pin.images_736x?.height || 0,
254
+ dominantColor: pin.dominantColor || '',
255
+ saves: saves,
256
+ repins: pin.repinCount ?? 0,
257
+ createdAt: pin.createdAt || '',
258
+ tags,
259
+ domain: pin.domain || '',
260
+ link: pin.link || '',
261
+ board,
262
+ creator: {
263
+ username: creator.username || '',
264
+ fullName: creator.fullName || creator.full_name || '',
265
+ },
266
+ pinner: {
267
+ username: pin.pinner?.username || '',
268
+ },
269
+ url: cleanUrl,
132
270
  }
133
271
  }
134
272
 
273
+ const $ = cheerio.load(html)
274
+ const seen = new Set()
275
+ const imgs = []
135
276
  $('img').each((_, el) => {
136
277
  const src = $(el).attr('src') || ''
137
278
  const srcset = $(el).attr('srcset') || ''
@@ -142,21 +283,16 @@ export async function pinimg(input, limit = 5) {
142
283
  if (!seen.has(high)) { seen.add(high); imgs.push(high) }
143
284
  }
144
285
  })
145
-
146
- const ogImg = $('meta[property="og:image"]').attr('content') || ''
147
- const ogTitle = $('meta[property="og:title"]').attr('content') || ''
148
- const ogDesc = $('meta[property="og:description"]').attr('content') || ''
149
- if (ogImg && !seen.has(ogImg)) imgs.push(ogImg)
150
-
151
286
  const unique = [...new Set(imgs)].filter(Boolean)
152
287
  if (!unique.length) throw new Error('No se pudo extraer la imagen del pin')
153
288
 
154
289
  return {
155
- id: resolvedUrl.match(/\/pin\/(\d+)/)?.[1] || '',
156
- title: ogTitle,
157
- description: ogDesc,
158
- image: unique[0],
159
- images: unique,
160
- url: resolvedUrl,
290
+ id,
291
+ title: '', description: '', altText: '', image: unique[0],
292
+ images: { orig: unique[0], '736': '', '474': '', '236': unique[0], '136': '' },
293
+ width: 0, height: 0, dominantColor: '', saves: null, repins: 0,
294
+ createdAt: '', tags: [], domain: '', link: '', board: null,
295
+ creator: { username: '', fullName: '' }, pinner: { username: '' },
296
+ url: cleanUrl,
161
297
  }
162
298
  }
@@ -31,7 +31,7 @@ async function searchTracks(query, limit = 5) {
31
31
  }))
32
32
  }
33
33
 
34
- async function searchAlbums(query, limit = 5) {
34
+ async function searchAlbums(query, limit = 12) {
35
35
  const res = await axios.get(
36
36
  `https://api.deezer.com/search/album?q=${encodeURIComponent(query)}&limit=${limit}`,
37
37
  { headers: HEADERS, timeout: 15000 }
@@ -1,131 +0,0 @@
1
- // Créditos a FG-error
2
- import axios from 'axios'
3
-
4
- const delay = ms => new Promise(r => setTimeout(r, ms))
5
-
6
- function parseFileSize(size) {
7
- if (!size) return 0
8
- const units = { B: 1, KB: 1024, MB: 1024 ** 2, GB: 1024 ** 3, TB: 1024 ** 4 }
9
- const match = size.toString().trim().match(/([\d.]+)\s*(B|KB|MB|GB|TB)/i)
10
- if (!match) return 0
11
- return Math.round(parseFloat(match[1]) * (units[match[2].toUpperCase()] || 1))
12
- }
13
-
14
- function formatFileSize(bytes) {
15
- if (!bytes || isNaN(bytes)) return '0 B'
16
- const units = ['B', 'KB', 'MB', 'GB', 'TB']
17
- let i = 0
18
- while (bytes >= 1024 && i < units.length - 1) { bytes /= 1024; i++ }
19
- return `${bytes.toFixed(1).replace(/\.0$/, '')} ${units[i]}`
20
- }
21
-
22
- export async function getFileSizeV2(url) {
23
- try {
24
- const res = await axios.head(url, { timeout: 10000 })
25
- const bytes = parseInt(res.headers['content-length'] || 0)
26
- return formatFileSize(bytes)
27
- } catch { return '0 B' }
28
- }
29
-
30
- function normalizeYT(url) {
31
- try {
32
- const u = new URL(url)
33
- if (u.hostname.includes('youtu.be')) return url
34
- if (u.hostname.includes('youtube.com')) {
35
- if (u.pathname.includes('/watch')) return `https://youtu.be/${u.searchParams.get('v')}`
36
- if (u.pathname.includes('/shorts/')) return `https://youtu.be/${u.pathname.split('/shorts/')[1]}`
37
- if (u.pathname.includes('/embed/')) return `https://youtu.be/${u.pathname.split('/embed/')[1]}`
38
- }
39
- return url
40
- } catch { return url }
41
- }
42
-
43
- async function waitForDownload(mediaUrl) {
44
- for (let i = 0; i < 15; i++) {
45
- try {
46
- const { data } = await axios.get(mediaUrl, { timeout: 15000 })
47
- if (data?.percent === 'Completed' && data?.fileUrl && data.fileUrl !== 'In Processing...')
48
- return data.fileUrl
49
- } catch {}
50
- await delay(4000)
51
- }
52
- throw new Error('No se pudo generar el enlace de descarga')
53
- }
54
-
55
- async function fetchYtdownto(url) {
56
- const { data } = await axios.post(
57
- 'https://app.ytdown.to/proxy.php',
58
- new URLSearchParams({ url }).toString(),
59
- {
60
- headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
61
- timeout: 20000,
62
- }
63
- )
64
- const api = data?.api
65
- if (!api) throw new Error('No se pudo obtener información del video')
66
- if (api.status === 'ERROR') throw new Error(api.message)
67
-
68
- const qualities = (api.mediaItems || []).map((v, i) => {
69
- const match = v?.mediaUrl?.match(/(\d+)p|(\d+)k/)
70
- const res = match ? match[0] : v.mediaQuality
71
- return {
72
- id: i + 1,
73
- type: v.type,
74
- quality: res,
75
- label: `${v.mediaExtension?.toUpperCase()} - ${v.mediaQuality}`,
76
- size: v.mediaFileSize,
77
- sizeB: parseFileSize(v.mediaFileSize),
78
- mediaUrl: v.mediaUrl,
79
- duration: v.mediaDuration,
80
- }
81
- })
82
-
83
- return { api, qualities }
84
- }
85
-
86
- export async function ytDownloadV2(url, type = 'video', quality = '360p') {
87
- url = normalizeYT(url)
88
-
89
- const { api, qualities } = await fetchYtdownto(url)
90
-
91
- const isAudio = type === 'mp3' || type === 'audio'
92
- const targetQ = quality.toLowerCase()
93
-
94
- const filtered = isAudio
95
- ? qualities.filter(v => v.type === 'audio' || v.quality?.includes('k'))
96
- : qualities.filter(v => v.type === 'video' || v.quality?.includes('p'))
97
-
98
- const selected = filtered.find(v => v.quality?.toLowerCase() === targetQ) || filtered[0]
99
-
100
- if (!selected) {
101
- const disponibles = qualities.map(v => v.quality).filter(Boolean).join(', ')
102
- throw new Error(`Calidad ${quality} no disponible. Disponibles: ${disponibles}`)
103
- }
104
-
105
- const dlUrl = await waitForDownload(selected.mediaUrl)
106
-
107
- return {
108
- title: api.title,
109
- uploader: api.userInfo?.name || '',
110
- views: api.mediaStats?.viewsCount || '',
111
- thumb: api.imagePreviewUrl || '',
112
- type: isAudio ? 'audio' : 'video',
113
- quality: selected.quality,
114
- size: selected.size,
115
- sizeB: selected.sizeB,
116
- duration: selected.duration,
117
- url: dlUrl,
118
- }
119
- }
120
-
121
- export async function ytInfoV2(url) {
122
- url = normalizeYT(url)
123
- const { api, qualities } = await fetchYtdownto(url)
124
- return {
125
- title: api.title,
126
- uploader: api.userInfo?.name || '',
127
- views: api.mediaStats?.viewsCount || '',
128
- thumb: api.imagePreviewUrl || '',
129
- qualities,
130
- }
131
- }