shirayuki-anime-scraper-api 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +14 -0
- package/LICENSE +24 -0
- package/README.md +539 -0
- package/config/database.js +37 -0
- package/index.js +63 -0
- package/models/Episode.js +49 -0
- package/models/Schedule.js +50 -0
- package/package.json +46 -0
- package/routes/anime-list.js +67 -0
- package/routes/episodeStream.js +64 -0
- package/routes/genre.js +67 -0
- package/routes/home.js +30 -0
- package/routes/monthly.js +37 -0
- package/routes/schedule.js +174 -0
- package/routes/search.js +79 -0
- package/routes/top10.js +37 -0
- package/routes/weekly.js +37 -0
- package/save.txt +431 -0
- package/scrapeanime/A-Z/AnimeList/filter.js +43 -0
- package/scrapeanime/A-Z/Genre/genre.js +42 -0
- package/scrapeanime/AnimeDetails/animedetails.js +73 -0
- package/scrapeanime/Browse/Search/search.js +119 -0
- package/scrapeanime/Browse/Suggestion/suggestion.js +50 -0
- package/scrapeanime/Leaderboard/Monthly/scrapeHiAnimeMonthlyTop10.js +137 -0
- package/scrapeanime/Leaderboard/Top/scrapeHiAnimeTop10.js +125 -0
- package/scrapeanime/Leaderboard/Weekly/scrapeHiAnimeWeeklyTop10.js +188 -0
- package/scrapeanime/Schedule/schedule.js +174 -0
- package/scrapeanime/SingleEpisode/scrapeSingleEpisode.js +496 -0
- package/scrapeanime/homepage/latest/latest.js +118 -0
- package/scrapeanime/homepage/most_favorite/mostFavorite.js +55 -0
- package/scrapeanime/homepage/most_popular/mostPopular.js +55 -0
- package/scrapeanime/homepage/recently_updated/recentlyUpdated.js +56 -0
- package/scrapeanime/homepage/scrapeAnimeDetails.js +128 -0
- package/scrapeanime/homepage/scrapehomepage.js +2 -0
- package/scrapeanime/homepage/scrapeservice.js +158 -0
- package/scrapeanime/homepage/slider/slider.js +151 -0
- package/scrapeanime/homepage/top_airing/topAiring.js +55 -0
- package/scrapeanime/homepage/trending/trending.js +59 -0
- package/service/scraperService.js +38 -0
@@ -0,0 +1,174 @@
|
|
1
|
+
import * as cheerio from 'cheerio';
|
2
|
+
import puppeteer from 'puppeteer';
|
3
|
+
|
4
|
+
async function scrapeSchedule() {
|
5
|
+
let browser;
|
6
|
+
try {
|
7
|
+
browser = await puppeteer.launch({
|
8
|
+
headless: true,
|
9
|
+
args: [
|
10
|
+
'--no-sandbox',
|
11
|
+
'--disable-setuid-sandbox',
|
12
|
+
'--disable-dev-shm-usage',
|
13
|
+
'--disable-accelerated-2d-canvas',
|
14
|
+
'--no-first-run',
|
15
|
+
'--no-zygote',
|
16
|
+
'--disable-gpu',
|
17
|
+
'--disable-background-timer-throttling',
|
18
|
+
'--disable-renderer-backgrounding',
|
19
|
+
'--disable-backgrounding-occluded-windows',
|
20
|
+
'--disable-features=TranslateUI',
|
21
|
+
'--disable-web-security',
|
22
|
+
'--disable-features=VizDisplayCompositor',
|
23
|
+
'--disable-extensions',
|
24
|
+
'--memory-pressure-off',
|
25
|
+
'--max_old_space_size=4096',
|
26
|
+
'--disable-background-networking',
|
27
|
+
'--disable-sync',
|
28
|
+
'--disable-translate',
|
29
|
+
'--disable-ipc-flooding-protection'
|
30
|
+
]
|
31
|
+
});
|
32
|
+
|
33
|
+
const page = await browser.newPage();
|
34
|
+
|
35
|
+
await page.setRequestInterception(true);
|
36
|
+
page.on('request', (req) => {
|
37
|
+
const resourceType = req.resourceType();
|
38
|
+
if(['stylesheet', 'image', 'font', 'media', 'texttrack', 'websocket', 'manifest', 'other'].includes(resourceType)) {
|
39
|
+
req.abort();
|
40
|
+
} else {
|
41
|
+
req.continue();
|
42
|
+
}
|
43
|
+
});
|
44
|
+
|
45
|
+
await page.setViewport({ width: 1024, height: 576 });
|
46
|
+
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
|
47
|
+
|
48
|
+
await page.goto('https://123animehub.cc', {
|
49
|
+
waitUntil: 'domcontentloaded',
|
50
|
+
timeout: 10000
|
51
|
+
});
|
52
|
+
|
53
|
+
let bodyFound = false;
|
54
|
+
for (let i = 0; i < 2 && !bodyFound; i++) {
|
55
|
+
try {
|
56
|
+
await page.waitForSelector('body', { timeout: 3000 });
|
57
|
+
bodyFound = true;
|
58
|
+
} catch (e) {
|
59
|
+
console.log(`Body selector attempt ${i + 1} failed, retrying...`);
|
60
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
try {
|
65
|
+
await page.evaluate(() => {
|
66
|
+
if (typeof showschedulemenu === 'function') {
|
67
|
+
showschedulemenu();
|
68
|
+
}
|
69
|
+
|
70
|
+
const scheduleBtn = document.querySelector('#recomendedclosebtn, button[onclick*="schedule"]');
|
71
|
+
if (scheduleBtn) {
|
72
|
+
scheduleBtn.click();
|
73
|
+
}
|
74
|
+
});
|
75
|
+
} catch (evalError) {
|
76
|
+
console.log('Schedule trigger failed, continuing with static content...');
|
77
|
+
}
|
78
|
+
|
79
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
80
|
+
|
81
|
+
const content = await page.content();
|
82
|
+
const $ = cheerio.load(content);
|
83
|
+
|
84
|
+
const schedule = [];
|
85
|
+
|
86
|
+
$('.scheduletitle').each((i, titleElem) => {
|
87
|
+
const day = $(titleElem).text().trim();
|
88
|
+
|
89
|
+
let current = $(titleElem).next();
|
90
|
+
|
91
|
+
while (current.length && !current.hasClass('scheduletitle')) {
|
92
|
+
if (current.hasClass('schedulelist')) {
|
93
|
+
const animeLink = current.find('a');
|
94
|
+
const anime = animeLink.text().trim();
|
95
|
+
|
96
|
+
let timeElem = current.next();
|
97
|
+
let time = '';
|
98
|
+
|
99
|
+
if (timeElem.hasClass('airtime')) {
|
100
|
+
time = timeElem.text().trim();
|
101
|
+
}
|
102
|
+
|
103
|
+
if (anime) {
|
104
|
+
schedule.push({
|
105
|
+
day,
|
106
|
+
anime,
|
107
|
+
time: time || 'No time specified'
|
108
|
+
});
|
109
|
+
}
|
110
|
+
}
|
111
|
+
current = current.next();
|
112
|
+
}
|
113
|
+
});
|
114
|
+
|
115
|
+
if (schedule.length === 0) {
|
116
|
+
try {
|
117
|
+
const scheduleData = await page.$$eval('.scheduletitle, .schedulelist, .airtime', elements => {
|
118
|
+
const result = [];
|
119
|
+
let currentDay = '';
|
120
|
+
|
121
|
+
elements.forEach(el => {
|
122
|
+
if (el.classList.contains('scheduletitle')) {
|
123
|
+
currentDay = el.textContent.trim();
|
124
|
+
} else if (el.classList.contains('schedulelist')) {
|
125
|
+
const link = el.querySelector('a');
|
126
|
+
const anime = link ? link.textContent.trim() : el.textContent.trim();
|
127
|
+
|
128
|
+
let nextEl = el.nextElementSibling;
|
129
|
+
let time = 'No time specified';
|
130
|
+
|
131
|
+
while (nextEl && !nextEl.classList.contains('scheduletitle') && !nextEl.classList.contains('schedulelist')) {
|
132
|
+
if (nextEl.classList.contains('airtime')) {
|
133
|
+
time = nextEl.textContent.trim();
|
134
|
+
break;
|
135
|
+
}
|
136
|
+
nextEl = nextEl.nextElementSibling;
|
137
|
+
}
|
138
|
+
|
139
|
+
if (anime && currentDay) {
|
140
|
+
result.push({
|
141
|
+
day: currentDay,
|
142
|
+
anime,
|
143
|
+
time
|
144
|
+
});
|
145
|
+
}
|
146
|
+
}
|
147
|
+
});
|
148
|
+
|
149
|
+
return result;
|
150
|
+
});
|
151
|
+
|
152
|
+
schedule.push(...scheduleData);
|
153
|
+
} catch (altError) {
|
154
|
+
console.log('Alternative parsing method failed:', altError.message);
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
return schedule;
|
159
|
+
|
160
|
+
} catch (error) {
|
161
|
+
console.error('Error scraping schedule with Puppeteer:', error.message);
|
162
|
+
return [{
|
163
|
+
day: "Error",
|
164
|
+
anime: `Failed to scrape: ${error.message}`,
|
165
|
+
time: "Error occurred"
|
166
|
+
}];
|
167
|
+
} finally {
|
168
|
+
if (browser) {
|
169
|
+
await browser.close();
|
170
|
+
}
|
171
|
+
}
|
172
|
+
}
|
173
|
+
|
174
|
+
export default scrapeSchedule;
|
@@ -0,0 +1,496 @@
|
|
1
|
+
import puppeteer from 'puppeteer-extra';
|
2
|
+
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
3
|
+
import connectDB from '../../config/database.js';
|
4
|
+
import Episode from '../../models/Episode.js';
|
5
|
+
|
6
|
+
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
7
|
+
|
8
|
+
puppeteer.use(StealthPlugin());
|
9
|
+
|
10
|
+
const scrapeCache = new Map();
|
11
|
+
const CACHE_TTL_MS = 1000 * 60 * 5;
|
12
|
+
|
13
|
+
let browserSingleton = null;
|
14
|
+
let browserLaunchPromise = null;
|
15
|
+
|
16
|
+
async function getBrowser() {
|
17
|
+
if (browserSingleton) return browserSingleton;
|
18
|
+
if (!browserLaunchPromise) {
|
19
|
+
browserLaunchPromise = (async () => {
|
20
|
+
const { executablePath } = await import('puppeteer');
|
21
|
+
const b = await puppeteer.launch({
|
22
|
+
headless: 'new',
|
23
|
+
executablePath: executablePath(),
|
24
|
+
args: [
|
25
|
+
'--no-sandbox',
|
26
|
+
'--disable-setuid-sandbox',
|
27
|
+
'--disable-dev-shm-usage',
|
28
|
+
'--no-first-run',
|
29
|
+
'--window-size=1280,720',
|
30
|
+
'--disable-blink-features=AutomationControlled',
|
31
|
+
'--disable-infobars',
|
32
|
+
'--disable-background-timer-throttling',
|
33
|
+
'--disable-renderer-backgrounding',
|
34
|
+
'--disable-backgrounding-occluded-windows',
|
35
|
+
'--disable-features=TranslateUI',
|
36
|
+
'--disable-extensions'
|
37
|
+
]
|
38
|
+
});
|
39
|
+
try {
|
40
|
+
if (typeof process !== 'undefined' && process && process.on) {
|
41
|
+
process.on('exit', () => { try { b.close(); } catch (e) { } });
|
42
|
+
}
|
43
|
+
} catch (e) { }
|
44
|
+
browserSingleton = b;
|
45
|
+
return browserSingleton;
|
46
|
+
})();
|
47
|
+
}
|
48
|
+
return browserLaunchPromise;
|
49
|
+
}
|
50
|
+
|
51
|
+
async function withRetries(fn, maxRetries = 3, delayMs = 3000) {
|
52
|
+
let lastError;
|
53
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
54
|
+
try {
|
55
|
+
return await fn();
|
56
|
+
} catch (err) {
|
57
|
+
lastError = err;
|
58
|
+
if (err.message && /detached|navigation|timeout|net::ERR|crash|closed/i.test(err.message)) {
|
59
|
+
console.warn(`Retry ${attempt}/${maxRetries} after error: ${err.message}`);
|
60
|
+
await delay(delayMs * attempt);
|
61
|
+
} else {
|
62
|
+
throw err;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
throw lastError;
|
67
|
+
}
|
68
|
+
|
69
|
+
export const scrapeSingleEpisode = async (episodeUrl) => {
|
70
|
+
const startTime = Date.now();
|
71
|
+
|
72
|
+
try {
|
73
|
+
await connectDB();
|
74
|
+
|
75
|
+
const existingEpisode = await Episode.findOne({
|
76
|
+
episode_url: episodeUrl,
|
77
|
+
cache_expires_at: { $gt: new Date() }
|
78
|
+
}).sort({ last_updated: -1 });
|
79
|
+
|
80
|
+
if (existingEpisode) {
|
81
|
+
console.log(`📋 Returning cached episode data for ${episodeUrl}`);
|
82
|
+
return {
|
83
|
+
success: true,
|
84
|
+
anime_id: existingEpisode.anime_id,
|
85
|
+
episode: existingEpisode.episode_number,
|
86
|
+
data: existingEpisode.streaming_data,
|
87
|
+
extraction_time_seconds: 0.001,
|
88
|
+
cached: true,
|
89
|
+
last_updated: existingEpisode.last_updated
|
90
|
+
};
|
91
|
+
}
|
92
|
+
|
93
|
+
console.log(`🔄 Scraping fresh episode data for ${episodeUrl}`);
|
94
|
+
|
95
|
+
} catch (dbError) {
|
96
|
+
console.warn('⚠️ Database error, falling back to memory cache:', dbError.message);
|
97
|
+
}
|
98
|
+
|
99
|
+
const cached = scrapeCache.get(episodeUrl);
|
100
|
+
if (cached && cached.expiresAt > Date.now()) {
|
101
|
+
return {
|
102
|
+
...cached.result,
|
103
|
+
extraction_time_seconds: 0.001,
|
104
|
+
cached: true
|
105
|
+
};
|
106
|
+
}
|
107
|
+
const browser = await getBrowser();
|
108
|
+
const page = await browser.newPage();
|
109
|
+
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
110
|
+
page.setDefaultNavigationTimeout(6000);
|
111
|
+
page.setDefaultTimeout(6000);
|
112
|
+
|
113
|
+
try {
|
114
|
+
try {
|
115
|
+
await page.setRequestInterception(true);
|
116
|
+
page.on('request', (req) => {
|
117
|
+
const resourceType = req.resourceType();
|
118
|
+
const url = req.url();
|
119
|
+
if (resourceType === 'image' || resourceType === 'stylesheet' || resourceType === 'font' || resourceType === 'media') {
|
120
|
+
try { req.abort(); } catch (e) { try { req.continue(); } catch (_) { } }
|
121
|
+
return;
|
122
|
+
}
|
123
|
+
if (url.includes('ads') || url.includes('doubleclick') || url.includes('googlesyndication') || url.includes('googletagmanager')) {
|
124
|
+
try { req.abort(); } catch (e) { try { req.continue(); } catch (_) { } }
|
125
|
+
return;
|
126
|
+
}
|
127
|
+
try { req.continue(); } catch (e) { }
|
128
|
+
});
|
129
|
+
} catch (e) {
|
130
|
+
}
|
131
|
+
|
132
|
+
const scrapingStartTime = Date.now();
|
133
|
+
|
134
|
+
await page.goto(episodeUrl, { waitUntil: 'domcontentloaded', timeout: 6000 });
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
let streamingLink = null;
|
139
|
+
let attempts = 0;
|
140
|
+
const maxAttempts = 2;
|
141
|
+
|
142
|
+
while (!streamingLink && attempts < maxAttempts) {
|
143
|
+
attempts++;
|
144
|
+
streamingLink = await page.evaluate(() => {
|
145
|
+
const findValidIframeSource = () => {
|
146
|
+
const whitelistHosts = [
|
147
|
+
'bunnycdn.to',
|
148
|
+
'bunnycdn',
|
149
|
+
'bunnycdn.com',
|
150
|
+
'play.bunnycdn',
|
151
|
+
'play.bunnycdn.to',
|
152
|
+
'filemoon',
|
153
|
+
'doodstream',
|
154
|
+
'streamtape',
|
155
|
+
'mp4upload',
|
156
|
+
'mixdrop',
|
157
|
+
'upstream',
|
158
|
+
'streamwish',
|
159
|
+
'vids\.to',
|
160
|
+
'vidstream',
|
161
|
+
'fastcdn',
|
162
|
+
'embed',
|
163
|
+
'player',
|
164
|
+
'vid',
|
165
|
+
'video'
|
166
|
+
];
|
167
|
+
|
168
|
+
const blacklist = [
|
169
|
+
'disqus.com',
|
170
|
+
'dtscout.com',
|
171
|
+
'google-analytics',
|
172
|
+
'googletagmanager',
|
173
|
+
'doubleclick.net',
|
174
|
+
'googlesyndication',
|
175
|
+
'googleadservices',
|
176
|
+
'adsystem',
|
177
|
+
'facebook.com',
|
178
|
+
'twitter.com',
|
179
|
+
'instagram.com',
|
180
|
+
'tiktok.com'
|
181
|
+
];
|
182
|
+
|
183
|
+
const isValidStreamingLink = (src) => {
|
184
|
+
if (!src || src === 'about:blank' || !src.startsWith('http') || src.length < 30) return false;
|
185
|
+
const s = src.toLowerCase();
|
186
|
+
if (blacklist.some(b => s.includes(b))) return false;
|
187
|
+
return whitelistHosts.some(w => {
|
188
|
+
try {
|
189
|
+
if (w.includes('.') || w.includes('\\')) return s.includes(w);
|
190
|
+
return s.includes(w);
|
191
|
+
} catch (e) { return false; }
|
192
|
+
});
|
193
|
+
};
|
194
|
+
|
195
|
+
const prioritySelectors = [
|
196
|
+
'#iframe_ext82377 iframe',
|
197
|
+
'iframe[src*="bunnycdn"]',
|
198
|
+
'iframe[src*="embed"]',
|
199
|
+
'iframe[src*="play"]',
|
200
|
+
'iframe[src*="stream"]',
|
201
|
+
'iframe[src*="video"]',
|
202
|
+
'iframe[src*="player"]',
|
203
|
+
'iframe[src*="vid"]'
|
204
|
+
];
|
205
|
+
|
206
|
+
for (const selector of prioritySelectors) {
|
207
|
+
const iframe = document.querySelector(selector);
|
208
|
+
const src = iframe && (iframe.src || iframe.getAttribute('src'));
|
209
|
+
if (src && isValidStreamingLink(src)) return src;
|
210
|
+
}
|
211
|
+
|
212
|
+
const iframes = Array.from(document.querySelectorAll('iframe')).slice(0, 20);
|
213
|
+
for (const iframe of iframes) {
|
214
|
+
const src = iframe.src || iframe.getAttribute('src') || iframe.getAttribute('data-src') || iframe.getAttribute('data-lazy') || iframe.getAttribute('data-original');
|
215
|
+
if (!src) continue;
|
216
|
+
if (isValidStreamingLink(src)) return src;
|
217
|
+
}
|
218
|
+
|
219
|
+
return null;
|
220
|
+
};
|
221
|
+
|
222
|
+
return findValidIframeSource();
|
223
|
+
});
|
224
|
+
|
225
|
+
if (!streamingLink && attempts < maxAttempts) {
|
226
|
+
try {
|
227
|
+
await page.evaluate(() => {
|
228
|
+
const buttons = document.querySelectorAll('button, .play-btn, .load-btn, [onclick], .btn');
|
229
|
+
for (const btn of buttons) {
|
230
|
+
const text = btn.textContent?.toLowerCase() || '';
|
231
|
+
if (text.includes('play') || text.includes('load') || text.includes('watch')) {
|
232
|
+
try { btn.click(); } catch (e) { }
|
233
|
+
break;
|
234
|
+
}
|
235
|
+
}
|
236
|
+
});
|
237
|
+
} catch (e) { }
|
238
|
+
|
239
|
+
const pollStart = Date.now();
|
240
|
+
const pollTimeout = 2000;
|
241
|
+
const pollInterval = 200;
|
242
|
+
while (Date.now() - pollStart < pollTimeout && !streamingLink) {
|
243
|
+
try {
|
244
|
+
|
245
|
+
streamingLink = await page.evaluate(() => {
|
246
|
+
const whitelist = ['bunnycdn', 'filemoon', 'doodstream', 'streamtape', 'mp4upload', 'mixdrop', 'upstream', 'streamwish'];
|
247
|
+
const isCandidate = (s) => s && typeof s === 'string' && s.startsWith('http') && s.length > 30 && whitelist.some(w => s.toLowerCase().includes(w));
|
248
|
+
const p = document.querySelector('iframe');
|
249
|
+
if (p) {
|
250
|
+
const s = p.src || p.getAttribute('src') || p.getAttribute('data-src');
|
251
|
+
if (isCandidate(s)) return s;
|
252
|
+
}
|
253
|
+
const iframes = Array.from(document.querySelectorAll('iframe')).slice(0, 20);
|
254
|
+
for (const iframe of iframes) {
|
255
|
+
const s = iframe.src || iframe.getAttribute('src') || iframe.getAttribute('data-src');
|
256
|
+
if (isCandidate(s)) return s;
|
257
|
+
}
|
258
|
+
const anchors = Array.from(document.querySelectorAll('a[href]')).slice(0, 30);
|
259
|
+
for (const a of anchors) {
|
260
|
+
const s = a.href;
|
261
|
+
if (isCandidate(s)) return s;
|
262
|
+
}
|
263
|
+
return null;
|
264
|
+
});
|
265
|
+
} catch (e) { }
|
266
|
+
|
267
|
+
if (streamingLink) break;
|
268
|
+
await delay(pollInterval);
|
269
|
+
}
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
if (streamingLink) {
|
274
|
+
console.log(`✅ Found valid streaming link: ${streamingLink.substring(0, 60)}...`);
|
275
|
+
|
276
|
+
const episodePatterns = [
|
277
|
+
/episode[\/\-]?(\d+)/i,
|
278
|
+
/ep[\/\-]?(\d+)/i,
|
279
|
+
/\/(\d+)\/?$/,
|
280
|
+
/\-(\d+)\/?$/
|
281
|
+
];
|
282
|
+
|
283
|
+
let episodeNumber = 'Unknown';
|
284
|
+
for (const pattern of episodePatterns) {
|
285
|
+
const match = episodeUrl.match(pattern);
|
286
|
+
if (match) {
|
287
|
+
episodeNumber = match[1];
|
288
|
+
break;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
|
292
|
+
let animeTitle = 'Unknown Anime';
|
293
|
+
let animeId = 'unknown';
|
294
|
+
const urlParts = episodeUrl.split('/');
|
295
|
+
const animeIndex = urlParts.findIndex(part => part === 'anime');
|
296
|
+
|
297
|
+
if (animeIndex !== -1 && urlParts[animeIndex + 1]) {
|
298
|
+
animeId = urlParts[animeIndex + 1];
|
299
|
+
animeTitle = animeId
|
300
|
+
.replace(/-/g, ' ')
|
301
|
+
.replace(/\b\w/g, l => l.toUpperCase());
|
302
|
+
}
|
303
|
+
|
304
|
+
const episodeRanges = await page.evaluate(() => {
|
305
|
+
const ranges = [];
|
306
|
+
|
307
|
+
const rangeSpans = document.querySelectorAll('span[data-range-id]');
|
308
|
+
|
309
|
+
for (const span of rangeSpans) {
|
310
|
+
const rangeText = span.textContent?.trim();
|
311
|
+
const rangeId = span.getAttribute('data-range-id');
|
312
|
+
|
313
|
+
if (rangeText && /^\d+\s*[-–]\s*\d+$/.test(rangeText)) {
|
314
|
+
ranges.push({
|
315
|
+
range_id: rangeId,
|
316
|
+
range_text: rangeText.replace(/\s+/g, '').replace('–', '-')
|
317
|
+
});
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
if (ranges.length === 0) {
|
322
|
+
const episodeRangeLists = document.querySelectorAll('ul.episodes_range, .episodes_range');
|
323
|
+
|
324
|
+
for (const element of episodeRangeLists) {
|
325
|
+
const rangeId = element.getAttribute('data-range-id');
|
326
|
+
if (rangeId) {
|
327
|
+
const textContent = element.textContent || '';
|
328
|
+
const rangeMatch = textContent.match(/(\d+)\s*[-–]\s*(\d+)/);
|
329
|
+
if (rangeMatch) {
|
330
|
+
ranges.push({
|
331
|
+
range_id: rangeId,
|
332
|
+
range_text: `${rangeMatch[1]}-${rangeMatch[2]}`
|
333
|
+
});
|
334
|
+
}
|
335
|
+
}
|
336
|
+
}
|
337
|
+
}
|
338
|
+
|
339
|
+
if (ranges.length === 0) {
|
340
|
+
const rangeElements = document.querySelectorAll('[class*="range"], [class*="episode"]');
|
341
|
+
|
342
|
+
for (const element of rangeElements) {
|
343
|
+
const textContent = element.textContent || '';
|
344
|
+
const rangeMatch = textContent.match(/(\d+)\s*[-–]\s*(\d+)/);
|
345
|
+
if (rangeMatch) {
|
346
|
+
const rangeText = `${rangeMatch[1]}-${rangeMatch[2]}`;
|
347
|
+
ranges.push({
|
348
|
+
range_id: element.getAttribute('data-range-id') || rangeText,
|
349
|
+
range_text: rangeText
|
350
|
+
});
|
351
|
+
}
|
352
|
+
}
|
353
|
+
}
|
354
|
+
|
355
|
+
return ranges;
|
356
|
+
});
|
357
|
+
|
358
|
+
let currentRange = 'single-episode';
|
359
|
+
if (episodeRanges.length > 0 && episodeNumber !== 'Unknown') {
|
360
|
+
const currentEpNum = parseInt(episodeNumber);
|
361
|
+
|
362
|
+
for (const range of episodeRanges) {
|
363
|
+
const [start, end] = range.range_text.split('-').map(n => parseInt(n.trim()));
|
364
|
+
if (currentEpNum >= start && currentEpNum <= end) {
|
365
|
+
currentRange = range.range_text;
|
366
|
+
break;
|
367
|
+
}
|
368
|
+
}
|
369
|
+
}
|
370
|
+
|
371
|
+
const allRanges = episodeRanges.map(range => range.range_text).sort((a, b) => {
|
372
|
+
const aStart = parseInt(a.split('-')[0]);
|
373
|
+
const bStart = parseInt(b.split('-')[0]);
|
374
|
+
return aStart - bStart;
|
375
|
+
});
|
376
|
+
|
377
|
+
const streamingData = {
|
378
|
+
title: animeTitle,
|
379
|
+
episode_number: episodeNumber,
|
380
|
+
streaming_link: streamingLink,
|
381
|
+
range_id: currentRange,
|
382
|
+
all_ranges: allRanges.length > 0 ? allRanges : ['single-episode']
|
383
|
+
};
|
384
|
+
|
385
|
+
const result = {
|
386
|
+
success: true,
|
387
|
+
anime_id: animeId,
|
388
|
+
episode: episodeNumber,
|
389
|
+
data: streamingData
|
390
|
+
};
|
391
|
+
|
392
|
+
try {
|
393
|
+
scrapeCache.set(episodeUrl, {
|
394
|
+
expiresAt: Date.now() + CACHE_TTL_MS,
|
395
|
+
result: result
|
396
|
+
});
|
397
|
+
} catch (e) { }
|
398
|
+
|
399
|
+
try {
|
400
|
+
const cacheExpiresAt = new Date(Date.now() + CACHE_TTL_MS);
|
401
|
+
const newEpisode = new Episode({
|
402
|
+
anime_id: animeId,
|
403
|
+
episode_number: episodeNumber,
|
404
|
+
episode_url: episodeUrl,
|
405
|
+
streaming_data: streamingData,
|
406
|
+
extraction_time_seconds: parseFloat(((Date.now() - scrapingStartTime) / 1000).toFixed(3)),
|
407
|
+
cache_expires_at: cacheExpiresAt,
|
408
|
+
last_updated: new Date()
|
409
|
+
});
|
410
|
+
|
411
|
+
await newEpisode.save();
|
412
|
+
console.log(`💾 Saved episode data to MongoDB: ${animeTitle} - Episode ${episodeNumber}`);
|
413
|
+
|
414
|
+
const oneDayAgo = new Date();
|
415
|
+
oneDayAgo.setDate(oneDayAgo.getDate() - 1);
|
416
|
+
|
417
|
+
const deleteResult = await Episode.deleteMany({
|
418
|
+
cache_expires_at: { $lt: new Date() },
|
419
|
+
last_updated: { $lt: oneDayAgo }
|
420
|
+
});
|
421
|
+
|
422
|
+
if (deleteResult.deletedCount > 0) {
|
423
|
+
console.log(`🧹 Cleaned up ${deleteResult.deletedCount} expired episode records`);
|
424
|
+
}
|
425
|
+
} catch (dbError) {
|
426
|
+
console.warn(`⚠️ Failed to save episode to database: ${dbError.message}`);
|
427
|
+
}
|
428
|
+
|
429
|
+
return {
|
430
|
+
...result,
|
431
|
+
extraction_time_seconds: parseFloat(((Date.now() - scrapingStartTime) / 1000).toFixed(3)),
|
432
|
+
cached: false,
|
433
|
+
saved_to_db: true
|
434
|
+
};
|
435
|
+
} else {
|
436
|
+
console.log(`❌ No valid streaming link found for episode after ${maxAttempts} attempts`);
|
437
|
+
|
438
|
+
const debugInfo = await page.evaluate(() => {
|
439
|
+
const iframes = document.querySelectorAll('iframe');
|
440
|
+
const found = [];
|
441
|
+
|
442
|
+
for (const iframe of iframes) {
|
443
|
+
const src = iframe.src ||
|
444
|
+
iframe.getAttribute('src') ||
|
445
|
+
iframe.getAttribute('data-src') ||
|
446
|
+
iframe.getAttribute('data-lazy');
|
447
|
+
if (src) {
|
448
|
+
found.push({
|
449
|
+
src: src.substring(0, 100),
|
450
|
+
id: iframe.id || 'no-id',
|
451
|
+
class: iframe.className || 'no-class'
|
452
|
+
});
|
453
|
+
}
|
454
|
+
}
|
455
|
+
|
456
|
+
return {
|
457
|
+
totalIframes: iframes.length,
|
458
|
+
iframeSources: found,
|
459
|
+
pageTitle: document.title,
|
460
|
+
hasPlayButtons: document.querySelectorAll('button, .play-btn, .load-btn').length
|
461
|
+
};
|
462
|
+
});
|
463
|
+
|
464
|
+
console.log(`Debug info:`, debugInfo);
|
465
|
+
|
466
|
+
return {
|
467
|
+
success: false,
|
468
|
+
error: 'No valid streaming iframe found after multiple attempts',
|
469
|
+
episode_url: episodeUrl,
|
470
|
+
debug: debugInfo,
|
471
|
+
extraction_time_seconds: parseFloat(((Date.now() - scrapingStartTime) / 1000).toFixed(3))
|
472
|
+
};
|
473
|
+
}
|
474
|
+
|
475
|
+
} catch (error) {
|
476
|
+
console.error('❌ Error scraping single episode:', error && error.message ? error.message : error);
|
477
|
+
return {
|
478
|
+
success: false,
|
479
|
+
error: error && error.message ? error.message : String(error),
|
480
|
+
episode_url: episodeUrl,
|
481
|
+
extraction_time_seconds: parseFloat(((Date.now() - startTime) / 1000).toFixed(3))
|
482
|
+
};
|
483
|
+
} finally {
|
484
|
+
try {
|
485
|
+
try { await page.close(); } catch (e) { }
|
486
|
+
} catch (e) { }
|
487
|
+
}
|
488
|
+
};
|
489
|
+
|
490
|
+
export async function closeSharedBrowser() {
|
491
|
+
if (browserSingleton) {
|
492
|
+
try { await browserSingleton.close(); } catch (e) { }
|
493
|
+
browserSingleton = null;
|
494
|
+
browserLaunchPromise = null;
|
495
|
+
}
|
496
|
+
}
|