@soyaxell09/zenbot-scraper 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/scrapers/instagram.js +189 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soyaxell09/zenbot-scraper",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Scrapers de descarga y búsqueda para bots de WhatsApp — YouTube, TikTok, Instagram, Facebook, Twitter, Pinterest, MediaFire, GitHub, APK, Google Drive, XNXX, PornHub, XVideos, XHamster, Rule34, Screenshot y más.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* © Created by AxelDev09 🔥
|
|
3
|
+
* GitHub: https://github.com/AxelDev09
|
|
4
|
+
* Instagram: @axeldev09
|
|
5
|
+
* Deja los créditos we 🗣️
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import axios from 'axios';
|
|
9
|
+
import * as cheerio from 'cheerio';
|
|
10
|
+
|
|
11
|
+
const UA = 'Mozilla/5.0 (Linux; Android 11; Redmi Note 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36';
|
|
12
|
+
const AJAX_URL = 'https://igsnapinsta.com/wp-admin/admin-ajax.php';
|
|
13
|
+
const BASE_URL = 'https://igsnapinsta.com';
|
|
14
|
+
|
|
15
|
+
function decodeUrl(encodedUrl) {
|
|
16
|
+
try { return Buffer.from(encodedUrl, 'base64').toString('utf-8'); }
|
|
17
|
+
catch { return encodedUrl; }
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function parseItems(html) {
|
|
21
|
+
const $ = cheerio.load(html);
|
|
22
|
+
const items = [];
|
|
23
|
+
const seen = new Set();
|
|
24
|
+
|
|
25
|
+
function add(type, url) {
|
|
26
|
+
const clean = url.replace(/&/g, '&').trim();
|
|
27
|
+
if (!clean || seen.has(clean)) return;
|
|
28
|
+
seen.add(clean);
|
|
29
|
+
items.push({ type, url: clean });
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
$('source[src]').each((_, el) => {
|
|
33
|
+
const src = $(el).attr('src');
|
|
34
|
+
if (src && !src.includes('kdnsd')) add('video', src);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
$('img[src]').each((_, el) => {
|
|
38
|
+
const src = $(el).attr('src') || '';
|
|
39
|
+
if ((src.includes('cdninstagram') || src.includes('fbcdn')) && !src.includes('kdnsd'))
|
|
40
|
+
add('image', src);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
if (!items.length) {
|
|
44
|
+
$('a[href]').each((_, el) => {
|
|
45
|
+
const href = $(el).attr('href') || '';
|
|
46
|
+
if (!href.includes('kdnsd/v1/download')) return;
|
|
47
|
+
const b64 = href.split('url=')[1] || '';
|
|
48
|
+
const decoded = decodeUrl(decodeURIComponent(b64));
|
|
49
|
+
const type = decoded.includes('.mp4') ? 'video' : 'image';
|
|
50
|
+
add(type, href.replace(/&/g, '&'));
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
$('img[src]').each((_, el) => {
|
|
54
|
+
const src = $(el).attr('src') || '';
|
|
55
|
+
if (src.includes('kdnsd/v1/download')) add('image', src);
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return items;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function detectType(url) {
|
|
63
|
+
if (url.includes('/reel/')) return 'reel';
|
|
64
|
+
if (url.includes('/p/')) return 'post';
|
|
65
|
+
if (url.includes('/stories/')) return 'story';
|
|
66
|
+
if (url.includes('/tv/')) return 'video';
|
|
67
|
+
const path = new URL(url).pathname.replace(/\/$/, '');
|
|
68
|
+
if (path.split('/').length === 2) return 'profile';
|
|
69
|
+
return 'post';
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export async function igDownload(url) {
|
|
73
|
+
if (!url.includes('instagram.com'))
|
|
74
|
+
throw new Error('URL inválida. Debe ser un link de Instagram.');
|
|
75
|
+
|
|
76
|
+
// Limpiar URL: quitar parámetros UTM y asegurar trailing slash
|
|
77
|
+
const base = url.split('?')[0].split('#')[0]
|
|
78
|
+
url = base.endsWith('/') ? base : base + '/'
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
const { data } = await axios.post(
|
|
83
|
+
AJAX_URL,
|
|
84
|
+
new URLSearchParams({ action: 'kdnsd_get_instagram_video', url }),
|
|
85
|
+
{
|
|
86
|
+
headers: {
|
|
87
|
+
'User-Agent': UA,
|
|
88
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
89
|
+
'Referer': `${BASE_URL}/es/`,
|
|
90
|
+
'Origin': BASE_URL,
|
|
91
|
+
'X-Requested-With': 'XMLHttpRequest',
|
|
92
|
+
},
|
|
93
|
+
timeout: 20000
|
|
94
|
+
}
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
if (!data?.success || !data?.data?.html) {
|
|
98
|
+
const html = data?.data?.html || '';
|
|
99
|
+
if (html.includes('private') || html.includes('privado') || data?.data?.message?.includes('private'))
|
|
100
|
+
throw new Error('Perfil privado. Solo se puede descargar contenido de perfiles públicos.');
|
|
101
|
+
throw new Error('No se pudo obtener el contenido. Verificá que el perfil/post sea público.');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const type = detectType(url);
|
|
105
|
+
const items = parseItems(data.data.html);
|
|
106
|
+
|
|
107
|
+
if (!items.length)
|
|
108
|
+
throw new Error('No se encontró contenido descargable.');
|
|
109
|
+
|
|
110
|
+
return { type, items };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export async function igReelDownload(url) {
|
|
114
|
+
if (!url.includes('instagram.com') || (!url.includes('/reel/') && !url.includes('/p/')))
|
|
115
|
+
throw new Error('URL inválida. Debe ser un link de reel o post de Instagram.')
|
|
116
|
+
return igDownload(url)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export async function igStalk(input) {
|
|
120
|
+
const username = input.replace(/https?:\/\/(www\.)?instagram\.com\/?/, '').replace(/\/$/, '').replace('@', '').split('?')[0].trim()
|
|
121
|
+
if (!username) throw new Error('Usuario inválido.')
|
|
122
|
+
|
|
123
|
+
const UAs = [
|
|
124
|
+
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
|
125
|
+
'Twitterbot/1.0',
|
|
126
|
+
'WhatsApp/2.23.1 A',
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
for (const ua of UAs) {
|
|
130
|
+
try {
|
|
131
|
+
const { data: html } = await axios.get(`https://www.instagram.com/${username}/`, {
|
|
132
|
+
headers: { 'User-Agent': ua, 'Accept-Language': 'en-US,en;q=0.9' },
|
|
133
|
+
timeout: 12000
|
|
134
|
+
})
|
|
135
|
+
const $ = cheerio.load(html)
|
|
136
|
+
const meta = $('meta[property="og:description"]').attr('content') || ''
|
|
137
|
+
const name = $('meta[property="og:title"]').attr('content') || ''
|
|
138
|
+
const avatar = $('meta[property="og:image"]').attr('content') || ''
|
|
139
|
+
if (!name && !meta) continue
|
|
140
|
+
const m = meta.match(/([\d,.KkMm]+)\s*Followers[,\s]+([\d,.KkMm]+)\s*Following[,\s]+([\d,.KkMm]+)\s*Posts?/i)
|
|
141
|
+
const bioRaw = meta.replace(/[\d,.KkMm]+\s*Followers[^-–—]*[-–—]\s*/i, '').trim()
|
|
142
|
+
const bioClean = bioRaw.replace(/^See Instagram photos and videos from .+/i, '').trim()
|
|
143
|
+
const fullNameClean = name
|
|
144
|
+
.replace(/\s*\(@?[^)]*\)\s*/g, '')
|
|
145
|
+
.replace(/\s*[•·]\s*Instagram.*/i, '')
|
|
146
|
+
.trim()
|
|
147
|
+
const isVerified = html.includes('"is_verified":true') || html.includes('"verified":true') || html.includes('aria-label="Verified"')
|
|
148
|
+
const isPrivate = html.includes('"is_private":true')
|
|
149
|
+
return {
|
|
150
|
+
username,
|
|
151
|
+
fullName: fullNameClean || username,
|
|
152
|
+
bio: bioClean || '',
|
|
153
|
+
followers: m ? m[1] : '?',
|
|
154
|
+
following: m ? m[2] : '?',
|
|
155
|
+
posts: m ? m[3] : '?',
|
|
156
|
+
isPrivate,
|
|
157
|
+
isVerified,
|
|
158
|
+
avatar,
|
|
159
|
+
url: `https://www.instagram.com/${username}/`,
|
|
160
|
+
}
|
|
161
|
+
} catch {}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
throw new Error('No se pudo obtener información del perfil.')
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export async function igStories(input) {
|
|
168
|
+
const username = input.replace(/https?:\/\/(www\.)?instagram\.com\/?/, '').replace(/\/$/, '').replace('@', '').split('?')[0].trim()
|
|
169
|
+
if (!username) throw new Error('Usuario inválido.')
|
|
170
|
+
for (const base of ['https://storiesig.info', 'https://imginn.com']) {
|
|
171
|
+
try {
|
|
172
|
+
const { data: html } = await axios.get(`${base}/stories/${username}/`, {
|
|
173
|
+
headers: { 'User-Agent': UA },
|
|
174
|
+
timeout: 12000
|
|
175
|
+
})
|
|
176
|
+
const $ = cheerio.load(html)
|
|
177
|
+
const items = []
|
|
178
|
+
$('img[src], source[src], video[src]').each((_, el) => {
|
|
179
|
+
const src = $(el).attr('src') || ''
|
|
180
|
+
if (src && (src.includes('cdninstagram') || src.includes('fbcdn') || src.includes(base))) {
|
|
181
|
+
const type = $(el).is('source, video') ? 'video' : 'image'
|
|
182
|
+
items.push({ type, url: src })
|
|
183
|
+
}
|
|
184
|
+
})
|
|
185
|
+
if (items.length) return { username, items }
|
|
186
|
+
} catch {}
|
|
187
|
+
}
|
|
188
|
+
throw new Error('No se encontraron stories o el perfil es privado.')
|
|
189
|
+
}
|