@lmna22/aio-downloader 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +416 -0
- package/package.json +51 -0
- package/src/download.js +53 -0
- package/src/index.js +62 -0
- package/src/lib/instagram.js +395 -0
- package/src/lib/pinterest.js +313 -0
- package/src/lib/pixiv.js +425 -0
- package/src/lib/tiktok.js +120 -0
- package/src/lib/twitter.js +239 -0
- package/src/lib/youtube.js +257 -0
- package/src/utils.js +93 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
const axios = require('axios');
|
|
2
|
+
const cheerio = require('cheerio');
|
|
3
|
+
const qs = require('qs');
|
|
4
|
+
|
|
5
|
+
const HEADERS = {
|
|
6
|
+
Accept: '*/*',
|
|
7
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
8
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
9
|
+
'X-FB-Friendly-Name': 'PolarisPostActionLoadPostQueryQuery',
|
|
10
|
+
'X-CSRFToken': 'RVDUooU5MYsBbS1CNN3CzVAuEP8oHB52',
|
|
11
|
+
'X-IG-App-ID': '1217981644879628',
|
|
12
|
+
'X-FB-LSD': 'AVqbxe3J_YA',
|
|
13
|
+
'X-ASBD-ID': '129477',
|
|
14
|
+
'Sec-Fetch-Dest': 'empty',
|
|
15
|
+
'Sec-Fetch-Mode': 'cors',
|
|
16
|
+
'Sec-Fetch-Site': 'same-origin',
|
|
17
|
+
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-G973U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/14.2 Chrome/87.0.4280.141 Mobile Safari/537.36'
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
function getInstagramPostId(url) {
|
|
21
|
+
const regex = /(?:https?:\/\/)?(?:www\.)?instagram\.com\/(?:p|tv|reel)\/([^/?#&]+).*/;
|
|
22
|
+
const match = url.match(regex);
|
|
23
|
+
return match ? match[1] : null;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function encodeGraphqlRequestData(shortcode) {
|
|
27
|
+
const requestData = {
|
|
28
|
+
av: "0",
|
|
29
|
+
__d: "www",
|
|
30
|
+
__user: "0",
|
|
31
|
+
__a: "1",
|
|
32
|
+
__req: "3",
|
|
33
|
+
__hs: "19624.HYP:instagram_web_pkg.2.1..0.0",
|
|
34
|
+
dpr: "3",
|
|
35
|
+
__ccg: "UNKNOWN",
|
|
36
|
+
__rev: "1008824440",
|
|
37
|
+
__s: "xf44ne:zhh75g:xr51e7",
|
|
38
|
+
__hsi: "7282217488877343271",
|
|
39
|
+
__dyn: "7xeUmwlEnwn8K2WnFw9-2i5U4e0yoW3q32360CEbo1nEhw2nVE4W0om78b87C0yE5ufz81s8hwGwQwoEcE7O2l0Fwqo31w9a9x-0z8-U2zxe2GewGwso88cobEaU2eUlwhEe87q7-0iK2S3qazo7u1xwIw8O321LwTwKG1pg661pwr86C1mwraCg",
|
|
40
|
+
__csr: "gZ3yFmJkillQvV6ybimnG8AmhqujGbLADgjyEOWz49z9XDlAXBJpC7Wy-vQTSvUGWGh5u8KibG44dBiigrgjDxGjU0150Q0848azk48N09C02IR0go4SaR70r8owyg9pU0V23hwiA0LQczA48S0f-x-27o05NG0fkw",
|
|
41
|
+
__comet_req: "7",
|
|
42
|
+
lsd: "AVqbxe3J_YA",
|
|
43
|
+
jazoest: "2957",
|
|
44
|
+
__spin_r: "1008824440",
|
|
45
|
+
__spin_b: "trunk",
|
|
46
|
+
__spin_t: "1695523385",
|
|
47
|
+
fb_api_caller_class: "RelayModern",
|
|
48
|
+
fb_api_req_friendly_name: "PolarisPostActionLoadPostQueryQuery",
|
|
49
|
+
variables: JSON.stringify({
|
|
50
|
+
shortcode: shortcode,
|
|
51
|
+
fetch_comment_count: null,
|
|
52
|
+
fetch_related_profile_media_count: null,
|
|
53
|
+
parent_comment_count: null,
|
|
54
|
+
child_comment_count: null,
|
|
55
|
+
fetch_like_count: null,
|
|
56
|
+
fetch_tagged_user_count: null,
|
|
57
|
+
fetch_preview_comment_count: null,
|
|
58
|
+
has_threaded_comments: false,
|
|
59
|
+
hoisted_comment_id: null,
|
|
60
|
+
hoisted_reply_id: null,
|
|
61
|
+
}),
|
|
62
|
+
server_timestamps: "true",
|
|
63
|
+
doc_id: "10015901848480474",
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
return qs.stringify(requestData);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async function getPostGraphqlData(postId, proxy) {
|
|
70
|
+
const encodedData = encodeGraphqlRequestData(postId);
|
|
71
|
+
const response = await axios.post('https://www.instagram.com/api/graphql', encodedData, { headers: HEADERS, httpsAgent: proxy });
|
|
72
|
+
return response.data;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function extractPostInfo(mediaData) {
|
|
76
|
+
const getUrlFromData = (data) => {
|
|
77
|
+
if (data.edge_sidecar_to_children) {
|
|
78
|
+
return data.edge_sidecar_to_children.edges
|
|
79
|
+
.map(edge => edge.node.video_url || edge.node.display_url);
|
|
80
|
+
}
|
|
81
|
+
return data.video_url ? [data.video_url] : [data.display_url];
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
creator: '@fort.kun',
|
|
86
|
+
status: true,
|
|
87
|
+
data: {
|
|
88
|
+
url: getUrlFromData(mediaData),
|
|
89
|
+
caption: mediaData.edge_media_to_caption.edges[0]?.node.text || null,
|
|
90
|
+
username: mediaData.owner.username,
|
|
91
|
+
like: mediaData.edge_media_preview_like.count,
|
|
92
|
+
comment: mediaData.edge_media_to_comment.count,
|
|
93
|
+
isVideo: mediaData.is_video,
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async function directScrape(url, proxy = null) {
|
|
99
|
+
try {
|
|
100
|
+
const postId = getInstagramPostId(url);
|
|
101
|
+
if (!postId) {
|
|
102
|
+
throw new Error('Invalid Instagram URL');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const data = await getPostGraphqlData(postId, proxy);
|
|
106
|
+
const mediaData = data.data?.xdt_shortcode_media;
|
|
107
|
+
|
|
108
|
+
if (!mediaData) {
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return extractPostInfo(mediaData);
|
|
113
|
+
} catch (error) {
|
|
114
|
+
return {
|
|
115
|
+
creator: '@fort.kun',
|
|
116
|
+
status: false,
|
|
117
|
+
message: error.message
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const varHeaders = {
|
|
123
|
+
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
124
|
+
'accept-language': 'en-US,en;q=0.9',
|
|
125
|
+
'cache-control': 'no-cache',
|
|
126
|
+
'sec-ch-prefers-color-scheme': 'light',
|
|
127
|
+
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
|
|
128
|
+
'sec-ch-ua-mobile': '?0',
|
|
129
|
+
'sec-ch-ua-platform': '"Windows"',
|
|
130
|
+
'sec-fetch-dest': 'document',
|
|
131
|
+
'sec-fetch-mode': 'navigate',
|
|
132
|
+
'sec-fetch-site': 'same-origin',
|
|
133
|
+
'sec-fetch-user': '?1',
|
|
134
|
+
'upgrade-insecure-requests': '1',
|
|
135
|
+
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/124.0.0.0',
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
let grapHeaders = {
|
|
139
|
+
'Accept': 'application/json',
|
|
140
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
141
|
+
'Cache-Control': 'no-cache',
|
|
142
|
+
'Dnt': '1',
|
|
143
|
+
'Pragma': 'no-cache',
|
|
144
|
+
'Referer': '',
|
|
145
|
+
'Sec-Fetch-Dest': 'empty',
|
|
146
|
+
'Sec-Fetch-Mode': 'cors',
|
|
147
|
+
'Sec-Fetch-Site': 'same-origin',
|
|
148
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
|
|
149
|
+
'X-Csrftoken': 'EuZcvVSeiRAC60CJQRrRC6',
|
|
150
|
+
'X-Ig-App-Id': '936619743392459',
|
|
151
|
+
'X-Ig-Www-Claim': '0',
|
|
152
|
+
'X-Requested-With': 'XMLHttpRequest'
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
async function userGraphql(url) {
|
|
156
|
+
try {
|
|
157
|
+
let body = await axios.get(url, {
|
|
158
|
+
headers: varHeaders,
|
|
159
|
+
}).then(res => res.data);
|
|
160
|
+
|
|
161
|
+
let user_id = body.match(/<meta\s+property="instapp:owner_user_id"\s+content="(\d+)"/)[1];
|
|
162
|
+
let video_id = body.match(/instagram:\/\/media\?id=(\d+)/)[1];
|
|
163
|
+
|
|
164
|
+
const graphUrl = `https://www.instagram.com/graphql/query/?doc_id=7571407972945935&variables=%7B%22id%22%3A%22${user_id}%22%2C%22include_clips_attribution_info%22%3Afalse%2C%22first%22%3A1000%7D`;
|
|
165
|
+
|
|
166
|
+
const graph = await axios.get(graphUrl, {
|
|
167
|
+
method: 'GET',
|
|
168
|
+
headers: grapHeaders,
|
|
169
|
+
}).then(response => response.data);
|
|
170
|
+
|
|
171
|
+
const edges = graph.data.user.edge_owner_to_timeline_media.edges;
|
|
172
|
+
let videoData = edges.find(edge => edge.node.id === video_id);
|
|
173
|
+
|
|
174
|
+
if (!videoData) {
|
|
175
|
+
return {
|
|
176
|
+
creator: '@fort.kun',
|
|
177
|
+
status: false,
|
|
178
|
+
message: 'Video not found'
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
videoData = videoData.node;
|
|
183
|
+
|
|
184
|
+
const getUrlFromData = (videoData) => {
|
|
185
|
+
if (videoData.edge_sidecar_to_children) {
|
|
186
|
+
return videoData.edge_sidecar_to_children.edges
|
|
187
|
+
.map(edge => edge.node.video_url || edge.node.display_url);
|
|
188
|
+
}
|
|
189
|
+
return videoData.video_url ? [videoData.video_url] : [videoData.display_url];
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
const listUrl = getUrlFromData(videoData);
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
creator: '@fort.kun',
|
|
196
|
+
status: true,
|
|
197
|
+
data: {
|
|
198
|
+
url: listUrl,
|
|
199
|
+
caption: videoData['edge_media_to_caption']['edges'].length > 0 ? videoData['edge_media_to_caption']['edges'][0]['node']['text'] : null,
|
|
200
|
+
username: videoData['owner']['username'],
|
|
201
|
+
like: videoData['edge_media_preview_like']['count'],
|
|
202
|
+
comment: videoData['edge_media_to_comment']['count'],
|
|
203
|
+
isVideo: videoData['is_video'],
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
} catch (error) {
|
|
207
|
+
return {
|
|
208
|
+
creator: '@fort.kun',
|
|
209
|
+
status: false,
|
|
210
|
+
message: error.message
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
async function saveIG(query) {
|
|
216
|
+
try {
|
|
217
|
+
const requestData = {
|
|
218
|
+
q: query,
|
|
219
|
+
t: 'media',
|
|
220
|
+
lang: 'en'
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
const requestHeaders = {
|
|
224
|
+
Accept: '*/*',
|
|
225
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
226
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
227
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
228
|
+
Origin: 'https://saveig.app',
|
|
229
|
+
Referer: 'https://saveig.app/en',
|
|
230
|
+
'Sec-Ch-Ua': '"Not/A)Brand";v="99", "Microsoft Edge";v="115", "Chromium";v="115"',
|
|
231
|
+
'Sec-Ch-Ua-Mobile': '?0',
|
|
232
|
+
'Sec-Ch-Ua-Platform': '"Windows"',
|
|
233
|
+
'Sec-Fetch-Dest': 'empty',
|
|
234
|
+
'Sec-Fetch-Mode': 'cors',
|
|
235
|
+
'Sec-Fetch-Site': 'same-origin',
|
|
236
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.183',
|
|
237
|
+
'X-Requested-With': 'XMLHttpRequest'
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
const axiosInstance = axios.create({ headers: requestHeaders });
|
|
241
|
+
|
|
242
|
+
const [response] = await Promise.all([
|
|
243
|
+
axiosInstance.post('https://saveig.app/api/ajaxSearch', qs.stringify(requestData))
|
|
244
|
+
]);
|
|
245
|
+
|
|
246
|
+
const responseData = response.data;
|
|
247
|
+
const htmlContent = responseData.data;
|
|
248
|
+
|
|
249
|
+
const $ = cheerio.load(htmlContent);
|
|
250
|
+
const downloadItems = $('.download-items');
|
|
251
|
+
|
|
252
|
+
const downloadLinks = [];
|
|
253
|
+
downloadItems.each((index, element) => {
|
|
254
|
+
const downloadLink = $(element).find('.download-items__btn > a').attr('href');
|
|
255
|
+
downloadLinks.push(downloadLink);
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
creator: '@fort.kun',
|
|
260
|
+
status: true,
|
|
261
|
+
data: {
|
|
262
|
+
url: downloadLinks,
|
|
263
|
+
caption: null,
|
|
264
|
+
username: null,
|
|
265
|
+
like: null,
|
|
266
|
+
comment: null,
|
|
267
|
+
isVideo: null,
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
} catch (error) {
|
|
271
|
+
return {
|
|
272
|
+
creator: '@fort.kun',
|
|
273
|
+
status: false,
|
|
274
|
+
message: error.message
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function extractData(script) {
|
|
280
|
+
const regex = /downVideo\('([^']+)'.*?\.mp([34])/g;
|
|
281
|
+
const regex2 = /window\.open\("([^"]+)"/g;
|
|
282
|
+
|
|
283
|
+
let videoUrl = [];
|
|
284
|
+
|
|
285
|
+
let match;
|
|
286
|
+
while ((match = regex.exec(script)) !== null) {
|
|
287
|
+
const url = match[1];
|
|
288
|
+
const fileType = match[2];
|
|
289
|
+
|
|
290
|
+
if (fileType === '4') {
|
|
291
|
+
videoUrl.push('https:' + url);
|
|
292
|
+
} else if (fileType === '3') {
|
|
293
|
+
videoUrl.push(url);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
while ((match = regex2.exec(script)) !== null) {
|
|
298
|
+
const url = match[1];
|
|
299
|
+
videoUrl.push(url);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
return videoUrl;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
async function dlpanda(url) {
|
|
306
|
+
try {
|
|
307
|
+
let result = await axios.get(`https://dlpanda.com/instagram?url=${url}`).then(response => {
|
|
308
|
+
return response.data;
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
let downloadLinks = extractData(result);
|
|
312
|
+
if (downloadLinks.length === 0) {
|
|
313
|
+
return {
|
|
314
|
+
creator: '@fort.kun',
|
|
315
|
+
status: false,
|
|
316
|
+
message: 'No video found'
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
creator: '@fort.kun',
|
|
322
|
+
status: true,
|
|
323
|
+
data: {
|
|
324
|
+
url: downloadLinks,
|
|
325
|
+
caption: null,
|
|
326
|
+
username: null,
|
|
327
|
+
like: null,
|
|
328
|
+
comment: null,
|
|
329
|
+
isVideo: null,
|
|
330
|
+
}
|
|
331
|
+
};
|
|
332
|
+
} catch (error) {
|
|
333
|
+
return {
|
|
334
|
+
creator: '@fort.kun',
|
|
335
|
+
status: false,
|
|
336
|
+
message: error.message
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
async function instagramDownloader(text, Func) {
|
|
342
|
+
const methods = [
|
|
343
|
+
async () => {
|
|
344
|
+
try {
|
|
345
|
+
const data = await directScrape(text);
|
|
346
|
+
if (!data || !data.status) return null;
|
|
347
|
+
return data;
|
|
348
|
+
} catch (error) {
|
|
349
|
+
return null;
|
|
350
|
+
}
|
|
351
|
+
},
|
|
352
|
+
async () => {
|
|
353
|
+
try {
|
|
354
|
+
const data = await userGraphql(text);
|
|
355
|
+
if (!data.status) return null;
|
|
356
|
+
return data;
|
|
357
|
+
} catch (error) {
|
|
358
|
+
return null;
|
|
359
|
+
}
|
|
360
|
+
},
|
|
361
|
+
async () => {
|
|
362
|
+
try {
|
|
363
|
+
const data = await saveIG(text);
|
|
364
|
+
if (!data.status) return null;
|
|
365
|
+
return data;
|
|
366
|
+
} catch (error) {
|
|
367
|
+
return null;
|
|
368
|
+
}
|
|
369
|
+
},
|
|
370
|
+
async () => {
|
|
371
|
+
try {
|
|
372
|
+
const data = await dlpanda(text);
|
|
373
|
+
if (!data.status) return null;
|
|
374
|
+
return data;
|
|
375
|
+
} catch (error) {
|
|
376
|
+
return null;
|
|
377
|
+
}
|
|
378
|
+
},
|
|
379
|
+
];
|
|
380
|
+
|
|
381
|
+
for (const method of methods) {
|
|
382
|
+
const info = await method();
|
|
383
|
+
if (info !== null && info.status) {
|
|
384
|
+
return info;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
return {
|
|
389
|
+
creator: '@fort.kun',
|
|
390
|
+
status: false,
|
|
391
|
+
message: 'All methods failed'
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
module.exports = instagramDownloader;
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
const axios = require("axios");
|
|
2
|
+
const cheerio = require("cheerio");
|
|
3
|
+
const { DEFAULT_UA, cleanText, safeJsonParse, uniqBy, isValidUrl } = require("../utils");
|
|
4
|
+
|
|
5
|
+
function isPinterestUrl(text) {
|
|
6
|
+
try {
|
|
7
|
+
const u = new URL(text);
|
|
8
|
+
return u.hostname.includes("pinterest.");
|
|
9
|
+
} catch {
|
|
10
|
+
return false;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function buildSearchUrl(query) {
|
|
15
|
+
return `https://www.pinterest.com/search/pins/?q=${encodeURIComponent(query)}`;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function normalizePinterestPinUrl(url, id = null) {
|
|
19
|
+
if (!url && id) return `https://www.pinterest.com/pin/${id}/`;
|
|
20
|
+
if (!url) return null;
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
const u = new URL(url, "https://www.pinterest.com");
|
|
24
|
+
if (!u.hostname.includes("pinterest.")) return url;
|
|
25
|
+
|
|
26
|
+
const match = u.pathname.match(/\/pin\/(\d+)/);
|
|
27
|
+
if (match) return `https://www.pinterest.com/pin/${match[1]}/`;
|
|
28
|
+
if (id) return `https://www.pinterest.com/pin/${id}/`;
|
|
29
|
+
return u.toString();
|
|
30
|
+
} catch {
|
|
31
|
+
const match = String(url).match(/\/pin\/(\d+)/);
|
|
32
|
+
if (match) return `https://www.pinterest.com/pin/${match[1]}/`;
|
|
33
|
+
if (id) return `https://www.pinterest.com/pin/${id}/`;
|
|
34
|
+
return url;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function normalizePin(pin) {
|
|
39
|
+
if (!pin || typeof pin !== "object") return null;
|
|
40
|
+
|
|
41
|
+
const id = pin.id || pin.pinId || pin.pin_id || pin.aggregated_pin_data?.id || null;
|
|
42
|
+
|
|
43
|
+
const title = cleanText(
|
|
44
|
+
pin.title || pin.grid_title || pin.seo_description || pin.description ||
|
|
45
|
+
pin.rich_summary?.display_description || pin.story_pin_data?.title || pin.alt_text || null
|
|
46
|
+
);
|
|
47
|
+
|
|
48
|
+
if (title && title.toLowerCase() === "pin") return null;
|
|
49
|
+
|
|
50
|
+
const rawLink = pin.link || pin.url || pin.closeup_unified_description?.url || pin.canonical_url || null;
|
|
51
|
+
const link = normalizePinterestPinUrl(rawLink, id);
|
|
52
|
+
|
|
53
|
+
const image =
|
|
54
|
+
pin.images?.orig?.url || pin.images?.["736x"]?.url || pin.images?.["564x"]?.url ||
|
|
55
|
+
pin.images?.["474x"]?.url || pin.images?.["236x"]?.url || pin.image_medium_url ||
|
|
56
|
+
pin.image_large_url || pin.image_url || pin.story_pin_data?.pages?.[0]?.image?.images?.orig?.url ||
|
|
57
|
+
pin.videos?.video_list?.V_720P?.url || pin.videos?.video_list?.V_EXP3?.url || null;
|
|
58
|
+
|
|
59
|
+
const source = pin.domain || pin.rich_metadata?.site_name || pin.tracking_params?.domain || null;
|
|
60
|
+
|
|
61
|
+
if (!id && !title && !link && !image) return null;
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
id: id ? String(id) : null,
|
|
65
|
+
title: title || null,
|
|
66
|
+
link: link || null,
|
|
67
|
+
image: isValidUrl(image) ? image : null,
|
|
68
|
+
source: source || null,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function deepCollectPins(obj, bucket = []) {
|
|
73
|
+
if (!obj || typeof obj !== "object") return bucket;
|
|
74
|
+
|
|
75
|
+
if (Array.isArray(obj)) {
|
|
76
|
+
for (const item of obj) deepCollectPins(item, bucket);
|
|
77
|
+
return bucket;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const looksLikePin =
|
|
81
|
+
("id" in obj || "pinId" in obj || "pin_id" in obj) &&
|
|
82
|
+
("images" in obj || "image_url" in obj || "image_medium_url" in obj ||
|
|
83
|
+
"title" in obj || "grid_title" in obj || "description" in obj ||
|
|
84
|
+
"link" in obj || "url" in obj);
|
|
85
|
+
|
|
86
|
+
if (looksLikePin) {
|
|
87
|
+
const parsed = normalizePin(obj);
|
|
88
|
+
if (parsed) bucket.push(parsed);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
for (const value of Object.values(obj)) deepCollectPins(value, bucket);
|
|
92
|
+
return bucket;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function parseScriptJsonById(html, scriptId) {
|
|
96
|
+
const $ = cheerio.load(html);
|
|
97
|
+
const raw = $(`script#${scriptId}`).html();
|
|
98
|
+
if (!raw) return [];
|
|
99
|
+
const json = safeJsonParse(raw);
|
|
100
|
+
if (!json) return [];
|
|
101
|
+
return uniqBy(deepCollectPins(json), (x) => x.id || x.link || x.image);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function parseMetaFallback(html, pageUrl = null) {
|
|
105
|
+
const $ = cheerio.load(html);
|
|
106
|
+
|
|
107
|
+
const title =
|
|
108
|
+
cleanText($('meta[property="og:title"]').attr("content")) ||
|
|
109
|
+
cleanText($('meta[name="twitter:title"]').attr("content")) ||
|
|
110
|
+
cleanText($("title").text());
|
|
111
|
+
|
|
112
|
+
const image =
|
|
113
|
+
$('meta[property="og:image"]').attr("content") ||
|
|
114
|
+
$('meta[name="twitter:image"]').attr("content") || null;
|
|
115
|
+
|
|
116
|
+
const canonical =
|
|
117
|
+
$('link[rel="canonical"]').attr("href") ||
|
|
118
|
+
$('meta[property="og:url"]').attr("content") || pageUrl || null;
|
|
119
|
+
|
|
120
|
+
const normalizedLink = normalizePinterestPinUrl(canonical, null);
|
|
121
|
+
const idMatch = normalizedLink ? normalizedLink.match(/\/pin\/(\d+)\//) : null;
|
|
122
|
+
const id = idMatch ? idMatch[1] : null;
|
|
123
|
+
|
|
124
|
+
if (!id && !title && !image && !normalizedLink) return [];
|
|
125
|
+
|
|
126
|
+
return [{
|
|
127
|
+
id: id || null,
|
|
128
|
+
title: title || null,
|
|
129
|
+
link: normalizedLink || null,
|
|
130
|
+
image: isValidUrl(image) ? image : null,
|
|
131
|
+
source: null,
|
|
132
|
+
}];
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function parseFromRenderedDom(html) {
|
|
136
|
+
const $ = cheerio.load(html);
|
|
137
|
+
const results = [];
|
|
138
|
+
|
|
139
|
+
$("a[href*='/pin/']").each((_, el) => {
|
|
140
|
+
const a = $(el);
|
|
141
|
+
const href = a.attr("href");
|
|
142
|
+
if (!href) return;
|
|
143
|
+
|
|
144
|
+
const fullLink = href.startsWith("http") ? href : `https://www.pinterest.com${href}`;
|
|
145
|
+
if (/\/repin\/?/i.test(fullLink)) return;
|
|
146
|
+
if (/\/pin\/create\//i.test(fullLink)) return;
|
|
147
|
+
|
|
148
|
+
const normalizedLink = normalizePinterestPinUrl(fullLink);
|
|
149
|
+
const idMatch = normalizedLink.match(/\/pin\/(\d+)\//);
|
|
150
|
+
if (!idMatch) return;
|
|
151
|
+
|
|
152
|
+
const img = a.find("img").first().length ? a.find("img").first() : a.closest("*").find("img").first();
|
|
153
|
+
|
|
154
|
+
const title = cleanText(img.attr("alt") || a.attr("aria-label") || a.text().trim() || null);
|
|
155
|
+
|
|
156
|
+
const image = img.attr("src") || img.attr("data-src") ||
|
|
157
|
+
img.attr("srcset")?.split(",").pop()?.trim().split(" ")[0] || null;
|
|
158
|
+
|
|
159
|
+
results.push({
|
|
160
|
+
id: idMatch[1],
|
|
161
|
+
title: title || null,
|
|
162
|
+
link: normalizedLink,
|
|
163
|
+
image: isValidUrl(image) ? image : null,
|
|
164
|
+
source: null,
|
|
165
|
+
});
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
return uniqBy(results, (x) => x.id || x.link || x.image);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function scorePin(pin) {
|
|
172
|
+
let score = 0;
|
|
173
|
+
if (pin.id) score += 3;
|
|
174
|
+
if (pin.link && /\/pin\/\d+\/$/i.test(pin.link)) score += 4;
|
|
175
|
+
if (pin.image) score += 4;
|
|
176
|
+
if (pin.title) score += 2;
|
|
177
|
+
if (pin.source) score += 1;
|
|
178
|
+
if (pin.link && /\/repin\/?/i.test(pin.link)) score -= 10;
|
|
179
|
+
return score;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function cleanFinalResults(results) {
|
|
183
|
+
const cleaned = results
|
|
184
|
+
.map((item) => ({
|
|
185
|
+
id: item.id ? String(item.id) : null,
|
|
186
|
+
title: cleanText(item.title),
|
|
187
|
+
link: normalizePinterestPinUrl(item.link, item.id),
|
|
188
|
+
image: isValidUrl(item.image) ? item.image : null,
|
|
189
|
+
source: item.source || null,
|
|
190
|
+
}))
|
|
191
|
+
.filter((item) => item.id || item.title || item.link || item.image);
|
|
192
|
+
|
|
193
|
+
const deduped = uniqBy(cleaned, (x) => x.id || x.link || x.image);
|
|
194
|
+
deduped.sort((a, b) => scorePin(b) - scorePin(a));
|
|
195
|
+
return deduped;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async function fetchHtmlWithAxios(url) {
|
|
199
|
+
const res = await axios.get(url, {
|
|
200
|
+
headers: {
|
|
201
|
+
"User-Agent": DEFAULT_UA,
|
|
202
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
203
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
|
204
|
+
"Referer": "https://www.pinterest.com/",
|
|
205
|
+
},
|
|
206
|
+
timeout: 30000,
|
|
207
|
+
maxRedirects: 5,
|
|
208
|
+
});
|
|
209
|
+
return res.data;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async function fetchHtmlWithPuppeteer(url) {
|
|
213
|
+
let puppeteer;
|
|
214
|
+
try {
|
|
215
|
+
puppeteer = require("puppeteer");
|
|
216
|
+
} catch {
|
|
217
|
+
throw new Error("Puppeteer is required for this Pinterest URL but is not installed. Install it with: npm install puppeteer");
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const browser = await puppeteer.launch({
|
|
221
|
+
headless: true,
|
|
222
|
+
args: ["--no-sandbox", "--disable-setuid-sandbox"],
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
try {
|
|
226
|
+
const page = await browser.newPage();
|
|
227
|
+
await page.setUserAgent(DEFAULT_UA);
|
|
228
|
+
await page.setExtraHTTPHeaders({ "Accept-Language": "en-US,en;q=0.9" });
|
|
229
|
+
await page.goto(url, { waitUntil: "networkidle2", timeout: 60000 });
|
|
230
|
+
|
|
231
|
+
await new Promise((r) => setTimeout(r, 5000));
|
|
232
|
+
|
|
233
|
+
await page.evaluate(async () => {
|
|
234
|
+
await new Promise((resolve) => {
|
|
235
|
+
let totalHeight = 0;
|
|
236
|
+
const distance = 800;
|
|
237
|
+
const timer = setInterval(() => {
|
|
238
|
+
window.scrollBy(0, distance);
|
|
239
|
+
totalHeight += distance;
|
|
240
|
+
if (totalHeight >= 5000) { clearInterval(timer); resolve(); }
|
|
241
|
+
}, 400);
|
|
242
|
+
});
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
246
|
+
return await page.content();
|
|
247
|
+
} finally {
|
|
248
|
+
await browser.close();
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
async function pinterestDownloader(input, options = {}) {
|
|
253
|
+
const limit = options.limit || 10;
|
|
254
|
+
|
|
255
|
+
let isUrl = false;
|
|
256
|
+
try { new URL(input); isUrl = true; } catch { }
|
|
257
|
+
|
|
258
|
+
const inputIsPinterest = isUrl && isPinterestUrl(input);
|
|
259
|
+
const singleMode = isUrl && inputIsPinterest;
|
|
260
|
+
const url = isUrl ? input : buildSearchUrl(input);
|
|
261
|
+
|
|
262
|
+
let html;
|
|
263
|
+
let method = "axios";
|
|
264
|
+
|
|
265
|
+
try {
|
|
266
|
+
html = await fetchHtmlWithAxios(url);
|
|
267
|
+
const candidates = cleanFinalResults([
|
|
268
|
+
...parseScriptJsonById(html, "__PWS_DATA__"),
|
|
269
|
+
...parseScriptJsonById(html, "__PWS_INITIAL_PROPS__"),
|
|
270
|
+
...parseFromRenderedDom(html),
|
|
271
|
+
...parseMetaFallback(html, url),
|
|
272
|
+
]);
|
|
273
|
+
if (candidates.length === 0) throw new Error("No results from axios");
|
|
274
|
+
} catch {
|
|
275
|
+
html = await fetchHtmlWithPuppeteer(url);
|
|
276
|
+
method = "puppeteer";
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
let finalResults = cleanFinalResults([
|
|
280
|
+
...parseScriptJsonById(html, "__PWS_DATA__"),
|
|
281
|
+
...parseScriptJsonById(html, "__PWS_INITIAL_PROPS__"),
|
|
282
|
+
...parseFromRenderedDom(html),
|
|
283
|
+
...parseMetaFallback(html, url),
|
|
284
|
+
]);
|
|
285
|
+
|
|
286
|
+
if (singleMode) {
|
|
287
|
+
const exactPinId = url.match(/\/pin\/(\d+)/)?.[1] || null;
|
|
288
|
+
if (exactPinId) {
|
|
289
|
+
finalResults.sort((a, b) => {
|
|
290
|
+
const aExact = a.id === exactPinId ? 1 : 0;
|
|
291
|
+
const bExact = b.id === exactPinId ? 1 : 0;
|
|
292
|
+
return bExact - aExact || scorePin(b) - scorePin(a);
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
finalResults = finalResults.slice(0, 1);
|
|
296
|
+
} else {
|
|
297
|
+
finalResults = finalResults.slice(0, limit);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (finalResults.length === 0) {
|
|
301
|
+
throw new Error("No Pinterest pin data found. The page may require authentication or the URL is invalid.");
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
status: "ok",
|
|
306
|
+
platform: "pinterest",
|
|
307
|
+
method,
|
|
308
|
+
total: finalResults.length,
|
|
309
|
+
results: finalResults,
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
module.exports = pinterestDownloader;
|