shirayuki-anime-scraper-api 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +14 -0
- package/LICENSE +24 -0
- package/README.md +539 -0
- package/config/database.js +37 -0
- package/index.js +63 -0
- package/models/Episode.js +49 -0
- package/models/Schedule.js +50 -0
- package/package.json +46 -0
- package/routes/anime-list.js +67 -0
- package/routes/episodeStream.js +64 -0
- package/routes/genre.js +67 -0
- package/routes/home.js +30 -0
- package/routes/monthly.js +37 -0
- package/routes/schedule.js +174 -0
- package/routes/search.js +79 -0
- package/routes/top10.js +37 -0
- package/routes/weekly.js +37 -0
- package/save.txt +431 -0
- package/scrapeanime/A-Z/AnimeList/filter.js +43 -0
- package/scrapeanime/A-Z/Genre/genre.js +42 -0
- package/scrapeanime/AnimeDetails/animedetails.js +73 -0
- package/scrapeanime/Browse/Search/search.js +119 -0
- package/scrapeanime/Browse/Suggestion/suggestion.js +50 -0
- package/scrapeanime/Leaderboard/Monthly/scrapeHiAnimeMonthlyTop10.js +137 -0
- package/scrapeanime/Leaderboard/Top/scrapeHiAnimeTop10.js +125 -0
- package/scrapeanime/Leaderboard/Weekly/scrapeHiAnimeWeeklyTop10.js +188 -0
- package/scrapeanime/Schedule/schedule.js +174 -0
- package/scrapeanime/SingleEpisode/scrapeSingleEpisode.js +496 -0
- package/scrapeanime/homepage/latest/latest.js +118 -0
- package/scrapeanime/homepage/most_favorite/mostFavorite.js +55 -0
- package/scrapeanime/homepage/most_popular/mostPopular.js +55 -0
- package/scrapeanime/homepage/recently_updated/recentlyUpdated.js +56 -0
- package/scrapeanime/homepage/scrapeAnimeDetails.js +128 -0
- package/scrapeanime/homepage/scrapehomepage.js +2 -0
- package/scrapeanime/homepage/scrapeservice.js +158 -0
- package/scrapeanime/homepage/slider/slider.js +151 -0
- package/scrapeanime/homepage/top_airing/topAiring.js +55 -0
- package/scrapeanime/homepage/trending/trending.js +59 -0
- package/service/scraperService.js +38 -0
package/routes/top10.js
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
import express from 'express';
|
2
|
+
import { scrapeHiAnimeTop10 } from '../scrapeanime/Leaderboard/Top/scrapeHiAnimeTop10.js';
|
3
|
+
|
4
|
+
const router = express.Router();
|
5
|
+
|
6
|
+
router.get('/', async (req, res) => {
|
7
|
+
try {
|
8
|
+
const start = Date.now();
|
9
|
+
console.log('🔥 Starting HiAnime Top 10 scraping...');
|
10
|
+
|
11
|
+
const result = await scrapeHiAnimeTop10();
|
12
|
+
const duration = (Date.now() - start) / 1000;
|
13
|
+
|
14
|
+
console.log(`✅ Top 10 scraping completed in ${duration}s`);
|
15
|
+
|
16
|
+
res.json({
|
17
|
+
success: true,
|
18
|
+
data: result,
|
19
|
+
extraction_time_seconds: duration,
|
20
|
+
message: "Top 10 trending anime from HiAnime",
|
21
|
+
timestamp: new Date().toISOString()
|
22
|
+
});
|
23
|
+
|
24
|
+
} catch (error) {
|
25
|
+
const duration = (Date.now() - start) / 1000;
|
26
|
+
console.error('❌ Error scraping top 10:', error.message);
|
27
|
+
|
28
|
+
res.status(500).json({
|
29
|
+
success: false,
|
30
|
+
error: error.message,
|
31
|
+
extraction_time_seconds: duration,
|
32
|
+
timestamp: new Date().toISOString()
|
33
|
+
});
|
34
|
+
}
|
35
|
+
});
|
36
|
+
|
37
|
+
export default router;
|
package/routes/weekly.js
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
import express from 'express';
|
2
|
+
import { scrapeHiAnimeWeeklyTop10 } from '../scrapeanime/Leaderboard/Weekly/scrapeHiAnimeWeeklyTop10.js';
|
3
|
+
|
4
|
+
const router = express.Router();
|
5
|
+
|
6
|
+
router.get('/', async (req, res) => {
|
7
|
+
try {
|
8
|
+
const start = Date.now();
|
9
|
+
console.log('📅 Starting HiAnime Weekly Top 10 scraping...');
|
10
|
+
|
11
|
+
const result = await scrapeHiAnimeWeeklyTop10();
|
12
|
+
const duration = (Date.now() - start) / 1000;
|
13
|
+
|
14
|
+
console.log(`✅ Weekly Top 10 scraping completed in ${duration}s`);
|
15
|
+
|
16
|
+
res.json({
|
17
|
+
success: true,
|
18
|
+
data: result,
|
19
|
+
extraction_time_seconds: duration,
|
20
|
+
message: "Top 10 weekly viewed anime from HiAnime",
|
21
|
+
timestamp: new Date().toISOString()
|
22
|
+
});
|
23
|
+
|
24
|
+
} catch (error) {
|
25
|
+
const duration = (Date.now() - start) / 1000;
|
26
|
+
console.error('❌ Error scraping weekly top 10:', error.message);
|
27
|
+
|
28
|
+
res.status(500).json({
|
29
|
+
success: false,
|
30
|
+
error: error.message,
|
31
|
+
extraction_time_seconds: duration,
|
32
|
+
timestamp: new Date().toISOString()
|
33
|
+
});
|
34
|
+
}
|
35
|
+
});
|
36
|
+
|
37
|
+
export default router;
|
package/save.txt
ADDED
@@ -0,0 +1,431 @@
|
|
1
|
+
import puppeteer from 'puppeteer-extra';
|
2
|
+
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
3
|
+
|
4
|
+
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
5
|
+
|
6
|
+
puppeteer.use(StealthPlugin());
|
7
|
+
|
8
|
+
const scrapeCache = new Map();
|
9
|
+
const CACHE_TTL_MS = 1000 * 60 * 5;
|
10
|
+
|
11
|
+
let browserSingleton = null;
|
12
|
+
let browserLaunchPromise = null;
|
13
|
+
|
14
|
+
async function getBrowser() {
|
15
|
+
if (browserSingleton) return browserSingleton;
|
16
|
+
if (!browserLaunchPromise) {
|
17
|
+
browserLaunchPromise = (async () => {
|
18
|
+
const { executablePath } = await import('puppeteer');
|
19
|
+
const b = await puppeteer.launch({
|
20
|
+
headless: 'new',
|
21
|
+
executablePath: executablePath(),
|
22
|
+
args: [
|
23
|
+
'--no-sandbox',
|
24
|
+
'--disable-setuid-sandbox',
|
25
|
+
'--disable-dev-shm-usage',
|
26
|
+
'--no-first-run',
|
27
|
+
'--window-size=1920,1080',
|
28
|
+
'--disable-blink-features=AutomationControlled',
|
29
|
+
'--disable-infobars'
|
30
|
+
]
|
31
|
+
});
|
32
|
+
try {
|
33
|
+
if (typeof process !== 'undefined' && process && process.on) {
|
34
|
+
process.on('exit', () => { try { b.close(); } catch (e) { } });
|
35
|
+
}
|
36
|
+
} catch (e) { }
|
37
|
+
browserSingleton = b;
|
38
|
+
return browserSingleton;
|
39
|
+
})();
|
40
|
+
}
|
41
|
+
return browserLaunchPromise;
|
42
|
+
}
|
43
|
+
|
44
|
+
async function withRetries(fn, maxRetries = 3, delayMs = 3000) {
|
45
|
+
let lastError;
|
46
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
47
|
+
try {
|
48
|
+
return await fn();
|
49
|
+
} catch (err) {
|
50
|
+
lastError = err;
|
51
|
+
if (err.message && /detached|navigation|timeout|net::ERR|crash|closed/i.test(err.message)) {
|
52
|
+
console.warn(`Retry ${attempt}/${maxRetries} after error: ${err.message}`);
|
53
|
+
await delay(delayMs * attempt);
|
54
|
+
} else {
|
55
|
+
throw err;
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
throw lastError;
|
60
|
+
}
|
61
|
+
|
62
|
+
export const scrapeSingleEpisode = async (episodeUrl) => {
|
63
|
+
const cached = scrapeCache.get(episodeUrl);
|
64
|
+
if (cached && cached.expiresAt > Date.now()) {
|
65
|
+
return {
|
66
|
+
...cached.result,
|
67
|
+
extraction_time_seconds: 0.001,
|
68
|
+
cached: true
|
69
|
+
};
|
70
|
+
}
|
71
|
+
const browser = await getBrowser();
|
72
|
+
const page = await browser.newPage();
|
73
|
+
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
74
|
+
page.setDefaultNavigationTimeout(8000);
|
75
|
+
page.setDefaultTimeout(8000);
|
76
|
+
|
77
|
+
try {
|
78
|
+
try {
|
79
|
+
await page.setRequestInterception(true);
|
80
|
+
page.on('request', (req) => {
|
81
|
+
const resourceType = req.resourceType();
|
82
|
+
const url = req.url();
|
83
|
+
if (resourceType === 'image' || resourceType === 'stylesheet' || resourceType === 'font' || resourceType === 'media') {
|
84
|
+
try { req.abort(); } catch (e) { try { req.continue(); } catch (_) { } }
|
85
|
+
return;
|
86
|
+
}
|
87
|
+
if (url.includes('ads') || url.includes('doubleclick') || url.includes('googlesyndication') || url.includes('googletagmanager')) {
|
88
|
+
try { req.abort(); } catch (e) { try { req.continue(); } catch (_) { } }
|
89
|
+
return;
|
90
|
+
}
|
91
|
+
try { req.continue(); } catch (e) { }
|
92
|
+
});
|
93
|
+
} catch (e) {
|
94
|
+
}
|
95
|
+
|
96
|
+
const startTime = Date.now();
|
97
|
+
|
98
|
+
await page.goto(episodeUrl, { waitUntil: 'domcontentloaded', timeout: 8000 });
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
let streamingLink = null;
|
103
|
+
let attempts = 0;
|
104
|
+
const maxAttempts = 2;
|
105
|
+
|
106
|
+
while (!streamingLink && attempts < maxAttempts) {
|
107
|
+
attempts++;
|
108
|
+
streamingLink = await page.evaluate(() => {
|
109
|
+
const findValidIframeSource = () => {
|
110
|
+
const whitelistHosts = [
|
111
|
+
'bunnycdn.to',
|
112
|
+
'bunnycdn',
|
113
|
+
'bunnycdn.com',
|
114
|
+
'play.bunnycdn',
|
115
|
+
'play.bunnycdn.to',
|
116
|
+
'filemoon',
|
117
|
+
'doodstream',
|
118
|
+
'streamtape',
|
119
|
+
'mp4upload',
|
120
|
+
'mixdrop',
|
121
|
+
'upstream',
|
122
|
+
'streamwish',
|
123
|
+
'vids\.to',
|
124
|
+
'vidstream',
|
125
|
+
'fastcdn',
|
126
|
+
'embed',
|
127
|
+
'player',
|
128
|
+
'vid',
|
129
|
+
'video'
|
130
|
+
];
|
131
|
+
|
132
|
+
const blacklist = [
|
133
|
+
'disqus.com',
|
134
|
+
'dtscout.com',
|
135
|
+
'google-analytics',
|
136
|
+
'googletagmanager',
|
137
|
+
'doubleclick.net',
|
138
|
+
'googlesyndication',
|
139
|
+
'googleadservices',
|
140
|
+
'adsystem',
|
141
|
+
'facebook.com',
|
142
|
+
'twitter.com',
|
143
|
+
'instagram.com',
|
144
|
+
'tiktok.com'
|
145
|
+
];
|
146
|
+
|
147
|
+
const isValidStreamingLink = (src) => {
|
148
|
+
if (!src || src === 'about:blank' || !src.startsWith('http') || src.length < 30) return false;
|
149
|
+
const s = src.toLowerCase();
|
150
|
+
if (blacklist.some(b => s.includes(b))) return false;
|
151
|
+
return whitelistHosts.some(w => {
|
152
|
+
try {
|
153
|
+
if (w.includes('.') || w.includes('\\')) return s.includes(w);
|
154
|
+
return s.includes(w);
|
155
|
+
} catch (e) { return false; }
|
156
|
+
});
|
157
|
+
};
|
158
|
+
|
159
|
+
const prioritySelectors = [
|
160
|
+
'#iframe_ext82377 iframe',
|
161
|
+
'iframe[src*="bunnycdn"]',
|
162
|
+
'iframe[src*="embed"]',
|
163
|
+
'iframe[src*="play"]',
|
164
|
+
'iframe[src*="stream"]',
|
165
|
+
'iframe[src*="video"]',
|
166
|
+
'iframe[src*="player"]',
|
167
|
+
'iframe[src*="vid"]'
|
168
|
+
];
|
169
|
+
|
170
|
+
for (const selector of prioritySelectors) {
|
171
|
+
const iframe = document.querySelector(selector);
|
172
|
+
const src = iframe && (iframe.src || iframe.getAttribute('src'));
|
173
|
+
if (src && isValidStreamingLink(src)) return src;
|
174
|
+
}
|
175
|
+
|
176
|
+
const iframes = Array.from(document.querySelectorAll('iframe')).slice(0, 40);
|
177
|
+
for (const iframe of iframes) {
|
178
|
+
const src = iframe.src || iframe.getAttribute('src') || iframe.getAttribute('data-src') || iframe.getAttribute('data-lazy') || iframe.getAttribute('data-original');
|
179
|
+
if (!src) continue;
|
180
|
+
if (isValidStreamingLink(src)) return src;
|
181
|
+
}
|
182
|
+
|
183
|
+
return null;
|
184
|
+
};
|
185
|
+
|
186
|
+
return findValidIframeSource();
|
187
|
+
});
|
188
|
+
|
189
|
+
if (!streamingLink && attempts < maxAttempts) {
|
190
|
+
try {
|
191
|
+
await page.evaluate(() => {
|
192
|
+
const buttons = document.querySelectorAll('button, .play-btn, .load-btn, [onclick], .btn');
|
193
|
+
for (const btn of buttons) {
|
194
|
+
const text = btn.textContent?.toLowerCase() || '';
|
195
|
+
if (text.includes('play') || text.includes('load') || text.includes('watch')) {
|
196
|
+
try { btn.click(); } catch (e) { }
|
197
|
+
break;
|
198
|
+
}
|
199
|
+
}
|
200
|
+
});
|
201
|
+
} catch (e) { }
|
202
|
+
|
203
|
+
const pollStart = Date.now();
|
204
|
+
const pollTimeout = 3000;
|
205
|
+
const pollInterval = 300;
|
206
|
+
while (Date.now() - pollStart < pollTimeout && !streamingLink) {
|
207
|
+
try {
|
208
|
+
|
209
|
+
streamingLink = await page.evaluate(() => {
|
210
|
+
const whitelist = ['bunnycdn', 'filemoon', 'doodstream', 'streamtape', 'mp4upload', 'mixdrop', 'upstream', 'streamwish'];
|
211
|
+
const isCandidate = (s) => s && typeof s === 'string' && s.startsWith('http') && s.length > 30 && whitelist.some(w => s.toLowerCase().includes(w));
|
212
|
+
const p = document.querySelector('iframe');
|
213
|
+
if (p) {
|
214
|
+
const s = p.src || p.getAttribute('src') || p.getAttribute('data-src');
|
215
|
+
if (isCandidate(s)) return s;
|
216
|
+
}
|
217
|
+
const iframes = Array.from(document.querySelectorAll('iframe')).slice(0, 40);
|
218
|
+
for (const iframe of iframes) {
|
219
|
+
const s = iframe.src || iframe.getAttribute('src') || iframe.getAttribute('data-src');
|
220
|
+
if (isCandidate(s)) return s;
|
221
|
+
}
|
222
|
+
const anchors = Array.from(document.querySelectorAll('a[href]')).slice(0, 60);
|
223
|
+
for (const a of anchors) {
|
224
|
+
const s = a.href;
|
225
|
+
if (isCandidate(s)) return s;
|
226
|
+
}
|
227
|
+
return null;
|
228
|
+
});
|
229
|
+
} catch (e) { }
|
230
|
+
|
231
|
+
if (streamingLink) break;
|
232
|
+
await delay(pollInterval);
|
233
|
+
}
|
234
|
+
}
|
235
|
+
}
|
236
|
+
|
237
|
+
if (streamingLink) {
|
238
|
+
console.log(`✅ Found valid streaming link: ${streamingLink.substring(0, 60)}...`);
|
239
|
+
|
240
|
+
const episodePatterns = [
|
241
|
+
/episode[\/\-]?(\d+)/i,
|
242
|
+
/ep[\/\-]?(\d+)/i,
|
243
|
+
/\/(\d+)\/?$/,
|
244
|
+
/\-(\d+)\/?$/
|
245
|
+
];
|
246
|
+
|
247
|
+
let episodeNumber = 'Unknown';
|
248
|
+
for (const pattern of episodePatterns) {
|
249
|
+
const match = episodeUrl.match(pattern);
|
250
|
+
if (match) {
|
251
|
+
episodeNumber = match[1];
|
252
|
+
break;
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
let animeTitle = 'Unknown Anime';
|
257
|
+
let animeId = 'unknown';
|
258
|
+
const urlParts = episodeUrl.split('/');
|
259
|
+
const animeIndex = urlParts.findIndex(part => part === 'anime');
|
260
|
+
|
261
|
+
if (animeIndex !== -1 && urlParts[animeIndex + 1]) {
|
262
|
+
animeId = urlParts[animeIndex + 1];
|
263
|
+
animeTitle = animeId
|
264
|
+
.replace(/-/g, ' ')
|
265
|
+
.replace(/\b\w/g, l => l.toUpperCase());
|
266
|
+
}
|
267
|
+
|
268
|
+
const episodeRanges = await page.evaluate(() => {
|
269
|
+
const ranges = [];
|
270
|
+
|
271
|
+
const rangeSpans = document.querySelectorAll('span[data-range-id]');
|
272
|
+
|
273
|
+
for (const span of rangeSpans) {
|
274
|
+
const rangeText = span.textContent?.trim();
|
275
|
+
const rangeId = span.getAttribute('data-range-id');
|
276
|
+
|
277
|
+
if (rangeText && /^\d+\s*[-–]\s*\d+$/.test(rangeText)) {
|
278
|
+
ranges.push({
|
279
|
+
range_id: rangeId,
|
280
|
+
range_text: rangeText.replace(/\s+/g, '').replace('–', '-')
|
281
|
+
});
|
282
|
+
}
|
283
|
+
}
|
284
|
+
|
285
|
+
if (ranges.length === 0) {
|
286
|
+
const episodeRangeLists = document.querySelectorAll('ul.episodes_range, .episodes_range');
|
287
|
+
|
288
|
+
for (const element of episodeRangeLists) {
|
289
|
+
const rangeId = element.getAttribute('data-range-id');
|
290
|
+
if (rangeId) {
|
291
|
+
const textContent = element.textContent || '';
|
292
|
+
const rangeMatch = textContent.match(/(\d+)\s*[-–]\s*(\d+)/);
|
293
|
+
if (rangeMatch) {
|
294
|
+
ranges.push({
|
295
|
+
range_id: rangeId,
|
296
|
+
range_text: `${rangeMatch[1]}-${rangeMatch[2]}`
|
297
|
+
});
|
298
|
+
}
|
299
|
+
}
|
300
|
+
}
|
301
|
+
}
|
302
|
+
|
303
|
+
if (ranges.length === 0) {
|
304
|
+
const rangeElements = document.querySelectorAll('[class*="range"], [class*="episode"]');
|
305
|
+
|
306
|
+
for (const element of rangeElements) {
|
307
|
+
const textContent = element.textContent || '';
|
308
|
+
const rangeMatch = textContent.match(/(\d+)\s*[-–]\s*(\d+)/);
|
309
|
+
if (rangeMatch) {
|
310
|
+
const rangeText = `${rangeMatch[1]}-${rangeMatch[2]}`;
|
311
|
+
ranges.push({
|
312
|
+
range_id: element.getAttribute('data-range-id') || rangeText,
|
313
|
+
range_text: rangeText
|
314
|
+
});
|
315
|
+
}
|
316
|
+
}
|
317
|
+
}
|
318
|
+
|
319
|
+
return ranges;
|
320
|
+
});
|
321
|
+
|
322
|
+
let currentRange = 'single-episode';
|
323
|
+
if (episodeRanges.length > 0 && episodeNumber !== 'Unknown') {
|
324
|
+
const currentEpNum = parseInt(episodeNumber);
|
325
|
+
|
326
|
+
for (const range of episodeRanges) {
|
327
|
+
const [start, end] = range.range_text.split('-').map(n => parseInt(n.trim()));
|
328
|
+
if (currentEpNum >= start && currentEpNum <= end) {
|
329
|
+
currentRange = range.range_text;
|
330
|
+
break;
|
331
|
+
}
|
332
|
+
}
|
333
|
+
}
|
334
|
+
|
335
|
+
const allRanges = episodeRanges.map(range => range.range_text).sort((a, b) => {
|
336
|
+
const aStart = parseInt(a.split('-')[0]);
|
337
|
+
const bStart = parseInt(b.split('-')[0]);
|
338
|
+
return aStart - bStart;
|
339
|
+
});
|
340
|
+
|
341
|
+
const streamingData = {
|
342
|
+
title: animeTitle,
|
343
|
+
episode_number: episodeNumber,
|
344
|
+
streaming_link: streamingLink,
|
345
|
+
range_id: currentRange,
|
346
|
+
all_ranges: allRanges.length > 0 ? allRanges : ['single-episode']
|
347
|
+
};
|
348
|
+
|
349
|
+
try {
|
350
|
+
scrapeCache.set(episodeUrl, {
|
351
|
+
expiresAt: Date.now() + CACHE_TTL_MS,
|
352
|
+
result: {
|
353
|
+
success: true,
|
354
|
+
anime_id: animeId,
|
355
|
+
episode: episodeNumber,
|
356
|
+
data: streamingData
|
357
|
+
}
|
358
|
+
});
|
359
|
+
} catch (e) { }
|
360
|
+
|
361
|
+
console.log(`💾 Skipping database save (disabled): ${animeTitle} - Episode ${episodeNumber}`);
|
362
|
+
|
363
|
+
return {
|
364
|
+
success: true,
|
365
|
+
anime_id: animeId,
|
366
|
+
episode: episodeNumber,
|
367
|
+
data: streamingData,
|
368
|
+
extraction_time_seconds: parseFloat(((Date.now() - startTime) / 1000).toFixed(3))
|
369
|
+
};
|
370
|
+
} else {
|
371
|
+
console.log(`❌ No valid streaming link found for episode after ${maxAttempts} attempts`);
|
372
|
+
|
373
|
+
const debugInfo = await page.evaluate(() => {
|
374
|
+
const iframes = document.querySelectorAll('iframe');
|
375
|
+
const found = [];
|
376
|
+
|
377
|
+
for (const iframe of iframes) {
|
378
|
+
const src = iframe.src ||
|
379
|
+
iframe.getAttribute('src') ||
|
380
|
+
iframe.getAttribute('data-src') ||
|
381
|
+
iframe.getAttribute('data-lazy');
|
382
|
+
if (src) {
|
383
|
+
found.push({
|
384
|
+
src: src.substring(0, 100),
|
385
|
+
id: iframe.id || 'no-id',
|
386
|
+
class: iframe.className || 'no-class'
|
387
|
+
});
|
388
|
+
}
|
389
|
+
}
|
390
|
+
|
391
|
+
return {
|
392
|
+
totalIframes: iframes.length,
|
393
|
+
iframeSources: found,
|
394
|
+
pageTitle: document.title,
|
395
|
+
hasPlayButtons: document.querySelectorAll('button, .play-btn, .load-btn').length
|
396
|
+
};
|
397
|
+
});
|
398
|
+
|
399
|
+
console.log(`Debug info:`, debugInfo);
|
400
|
+
|
401
|
+
return {
|
402
|
+
success: false,
|
403
|
+
error: 'No valid streaming iframe found after multiple attempts',
|
404
|
+
episode_url: episodeUrl,
|
405
|
+
debug: debugInfo,
|
406
|
+
extraction_time_seconds: parseFloat(((Date.now() - startTime) / 1000).toFixed(3))
|
407
|
+
};
|
408
|
+
}
|
409
|
+
|
410
|
+
} catch (error) {
|
411
|
+
console.error('❌ Error scraping single episode:', error && error.message ? error.message : error);
|
412
|
+
return {
|
413
|
+
success: false,
|
414
|
+
error: error && error.message ? error.message : String(error),
|
415
|
+
episode_url: episodeUrl,
|
416
|
+
extraction_time_seconds: typeof startTime === 'number' ? parseFloat(((Date.now() - startTime) / 1000).toFixed(3)) : null
|
417
|
+
};
|
418
|
+
} finally {
|
419
|
+
try {
|
420
|
+
try { await page.close(); } catch (e) { }
|
421
|
+
} catch (e) { }
|
422
|
+
}
|
423
|
+
};
|
424
|
+
|
425
|
+
export async function closeSharedBrowser() {
|
426
|
+
if (browserSingleton) {
|
427
|
+
try { await browserSingleton.close(); } catch (e) { }
|
428
|
+
browserSingleton = null;
|
429
|
+
browserLaunchPromise = null;
|
430
|
+
}
|
431
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
import axios from 'axios';
|
2
|
+
import * as cheerio from 'cheerio';
|
3
|
+
|
4
|
+
async function scrapeAnimeByLetter(letter, page = 1) {
|
5
|
+
const url = `https://123animehub.cc/az-all-anime/${letter}/?page=${page}`;
|
6
|
+
const { data } = await axios.get(url);
|
7
|
+
const $ = cheerio.load(data);
|
8
|
+
const animeList = [];
|
9
|
+
|
10
|
+
$('.film-list .item').each((i, el) => {
|
11
|
+
const title = $(el).find('.name').text().trim();
|
12
|
+
|
13
|
+
const imgElement = $(el).find('.film-poster img, .poster img, img').first();
|
14
|
+
let image = imgElement.attr('data-src') ||
|
15
|
+
imgElement.attr('src') ||
|
16
|
+
imgElement.attr('data-lazy') || '';
|
17
|
+
|
18
|
+
if (image && !image.startsWith('http')) {
|
19
|
+
image = image.startsWith('/') ? 'https://123animehub.cc' + image : 'https://123animehub.cc/' + image;
|
20
|
+
}
|
21
|
+
|
22
|
+
if (!image || image.includes('no_poster.jpg')) {
|
23
|
+
image = '';
|
24
|
+
}
|
25
|
+
|
26
|
+
const sub = $(el).find('.status .sub').length > 0;
|
27
|
+
const dub = $(el).find('.status .dub').length > 0;
|
28
|
+
const episodes = $(el).find('.ep').text().replace('Ep ', '').trim();
|
29
|
+
|
30
|
+
animeList.push({
|
31
|
+
title,
|
32
|
+
image,
|
33
|
+
sub,
|
34
|
+
dub,
|
35
|
+
episodes
|
36
|
+
});
|
37
|
+
});
|
38
|
+
|
39
|
+
return animeList;
|
40
|
+
}
|
41
|
+
|
42
|
+
|
43
|
+
export { scrapeAnimeByLetter };
|
@@ -0,0 +1,42 @@
|
|
1
|
+
import axios from 'axios';
|
2
|
+
import * as cheerio from 'cheerio';
|
3
|
+
|
4
|
+
|
5
|
+
export async function scrapeAnimeByGenre(genre, page = 1) {
|
6
|
+
const url = `https://123animehub.cc/genere/${genre}?page=${page}`;
|
7
|
+
const { data } = await axios.get(url);
|
8
|
+
const $ = cheerio.load(data);
|
9
|
+
const animeList = [];
|
10
|
+
|
11
|
+
$('.film-list .item').each((i, el) => {
|
12
|
+
const title = $(el).find('.name').text().trim();
|
13
|
+
|
14
|
+
const imgElement = $(el).find('.film-poster img, .poster img, img').first();
|
15
|
+
let image = imgElement.attr('data-src') ||
|
16
|
+
imgElement.attr('src') ||
|
17
|
+
imgElement.attr('data-lazy') || '';
|
18
|
+
|
19
|
+
if (image && !image.startsWith('http')) {
|
20
|
+
image = image.startsWith('/') ? 'https://123animehub.cc' + image : 'https://123animehub.cc/' + image;
|
21
|
+
}
|
22
|
+
|
23
|
+
if (!image || image.includes('no_poster.jpg')) {
|
24
|
+
image = '';
|
25
|
+
}
|
26
|
+
|
27
|
+
const sub = $(el).find('.status .sub').length > 0;
|
28
|
+
const dub = $(el).find('.status .dub').length > 0;
|
29
|
+
const episodes = $(el).find('.ep').text().replace('Ep ', '').trim();
|
30
|
+
|
31
|
+
animeList.push({
|
32
|
+
index: i + 1,
|
33
|
+
title,
|
34
|
+
image,
|
35
|
+
sub,
|
36
|
+
dub,
|
37
|
+
episodes
|
38
|
+
});
|
39
|
+
});
|
40
|
+
|
41
|
+
return animeList;
|
42
|
+
}
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import express from 'express';
|
2
|
+
import axios from 'axios';
|
3
|
+
import * as cheerio from 'cheerio';
|
4
|
+
const router = express.Router();
|
5
|
+
|
6
|
+
router.get('/anime/:slug', async (req, res) => {
|
7
|
+
const { slug } = req.params;
|
8
|
+
const animeUrl = `https://123animehub.cc/anime/${slug}`;
|
9
|
+
|
10
|
+
try {
|
11
|
+
const { data: html } = await axios.get(animeUrl);
|
12
|
+
const $ = cheerio.load(html);
|
13
|
+
|
14
|
+
// Title
|
15
|
+
const title = $('h2.title').text().trim() || $('h1').first().text().trim();
|
16
|
+
|
17
|
+
// Image
|
18
|
+
const image = $('.thumb img').attr('src') || $('img').first().attr('src');
|
19
|
+
|
20
|
+
// Description
|
21
|
+
let description = '';
|
22
|
+
if ($('.desc .long').length) {
|
23
|
+
description = $('.desc .long').text().replace(/\s+/g, ' ').trim();
|
24
|
+
} else if ($('.desc').length) {
|
25
|
+
description = $('.desc').text().replace(/\s+/g, ' ').trim();
|
26
|
+
}
|
27
|
+
|
28
|
+
let type = '', country = '', genres = [], status = '', released = '', quality = '';
|
29
|
+
$('.meta').each((i, el) => {
|
30
|
+
const meta = $(el);
|
31
|
+
meta.find('dt').each((j, dt) => {
|
32
|
+
const key = $(dt).text().trim().toLowerCase();
|
33
|
+
const value = $(dt).next('dd');
|
34
|
+
if (key === 'type:') type = value.text().trim();
|
35
|
+
if (key === 'country:') country = value.text().trim();
|
36
|
+
if (key === 'genre:') {
|
37
|
+
genres = value.find('a').map((i, a) => $(a).text().trim()).get();
|
38
|
+
}
|
39
|
+
if (key === 'status:') status = value.text().trim();
|
40
|
+
if (key === 'released:') released = value.text().trim();
|
41
|
+
});
|
42
|
+
});
|
43
|
+
|
44
|
+
// Quality
|
45
|
+
const qualityDiv = $("div:contains('Quality:')");
|
46
|
+
if (qualityDiv.length) {
|
47
|
+
const qualitySpan = qualityDiv.find('span.quality').first();
|
48
|
+
if (qualitySpan.length) {
|
49
|
+
quality = qualitySpan.text().trim();
|
50
|
+
} else {
|
51
|
+
const text = qualityDiv.text();
|
52
|
+
const match = text.match(/Quality:\s*(\w+)/i);
|
53
|
+
if (match) quality = match[1];
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
res.json({
|
58
|
+
title,
|
59
|
+
image,
|
60
|
+
description,
|
61
|
+
type,
|
62
|
+
country,
|
63
|
+
genres,
|
64
|
+
status,
|
65
|
+
released,
|
66
|
+
quality
|
67
|
+
});
|
68
|
+
} catch (error) {
|
69
|
+
res.status(500).json({ error: 'Failed to fetch anime details', details: error.message });
|
70
|
+
}
|
71
|
+
});
|
72
|
+
|
73
|
+
export default router;
|