shirayuki-anime-scraper-api 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +14 -0
- package/LICENSE +24 -0
- package/README.md +539 -0
- package/config/database.js +37 -0
- package/index.js +63 -0
- package/models/Episode.js +49 -0
- package/models/Schedule.js +50 -0
- package/package.json +46 -0
- package/routes/anime-list.js +67 -0
- package/routes/episodeStream.js +64 -0
- package/routes/genre.js +67 -0
- package/routes/home.js +30 -0
- package/routes/monthly.js +37 -0
- package/routes/schedule.js +174 -0
- package/routes/search.js +79 -0
- package/routes/top10.js +37 -0
- package/routes/weekly.js +37 -0
- package/save.txt +431 -0
- package/scrapeanime/A-Z/AnimeList/filter.js +43 -0
- package/scrapeanime/A-Z/Genre/genre.js +42 -0
- package/scrapeanime/AnimeDetails/animedetails.js +73 -0
- package/scrapeanime/Browse/Search/search.js +119 -0
- package/scrapeanime/Browse/Suggestion/suggestion.js +50 -0
- package/scrapeanime/Leaderboard/Monthly/scrapeHiAnimeMonthlyTop10.js +137 -0
- package/scrapeanime/Leaderboard/Top/scrapeHiAnimeTop10.js +125 -0
- package/scrapeanime/Leaderboard/Weekly/scrapeHiAnimeWeeklyTop10.js +188 -0
- package/scrapeanime/Schedule/schedule.js +174 -0
- package/scrapeanime/SingleEpisode/scrapeSingleEpisode.js +496 -0
- package/scrapeanime/homepage/latest/latest.js +118 -0
- package/scrapeanime/homepage/most_favorite/mostFavorite.js +55 -0
- package/scrapeanime/homepage/most_popular/mostPopular.js +55 -0
- package/scrapeanime/homepage/recently_updated/recentlyUpdated.js +56 -0
- package/scrapeanime/homepage/scrapeAnimeDetails.js +128 -0
- package/scrapeanime/homepage/scrapehomepage.js +2 -0
- package/scrapeanime/homepage/scrapeservice.js +158 -0
- package/scrapeanime/homepage/slider/slider.js +151 -0
- package/scrapeanime/homepage/top_airing/topAiring.js +55 -0
- package/scrapeanime/homepage/trending/trending.js +59 -0
- package/service/scraperService.js +38 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
export default function scrapeSlider($, resolveUrl, source) {
|
2
|
+
const items = [];
|
3
|
+
|
4
|
+
const selectors = ['.swiper-slide', '.slider .item', '.home-slider .slide', '.featured-slider .item', '.swiper-slide.item-qtip', '.film-poster'];
|
5
|
+
|
6
|
+
for (const sel of selectors) {
|
7
|
+
const found = $(sel);
|
8
|
+
if (!found || !found.length) continue;
|
9
|
+
|
10
|
+
found.each((i, el) => {
|
11
|
+
const el$ = $(el);
|
12
|
+
const a = el$.find('a').first();
|
13
|
+
let href = a.attr('href') || el$.attr('href') || '';
|
14
|
+
href = href ? resolveUrl(href) : null;
|
15
|
+
|
16
|
+
let title = el$.find('.desi-head-title').text() ||
|
17
|
+
el$.find('.film-title').text() ||
|
18
|
+
el$.find('.title').text() ||
|
19
|
+
el$.find('h3').text() ||
|
20
|
+
a.attr('title') ||
|
21
|
+
el$.find('img').attr('alt') ||
|
22
|
+
el$.find('.film-title').attr('data-iname') || null;
|
23
|
+
if (title) title = title.trim();
|
24
|
+
|
25
|
+
let img = null;
|
26
|
+
const imgEl = el$.find('img').first();
|
27
|
+
if (imgEl && imgEl.length) img = imgEl.attr('data-src') || imgEl.attr('data-lazy') || imgEl.attr('src') || imgEl.attr('data-original') || null;
|
28
|
+
if (!img) img = el$.attr('data-background') || el$.attr('data-image') || null;
|
29
|
+
if (img) img = resolveUrl(img);
|
30
|
+
|
31
|
+
let description = el$.find('.desi-description').text() ||
|
32
|
+
el$.closest('.swiper-slide').find('.desi-description').text() ||
|
33
|
+
el$.parent().find('.desi-description').text() ||
|
34
|
+
el$.find('.description').text() ||
|
35
|
+
el$.find('.synopsis').text() ||
|
36
|
+
el$.find('.summary').text() ||
|
37
|
+
el$.find('[class*="desc"]').text() || null;
|
38
|
+
if (description) description = description.trim();
|
39
|
+
|
40
|
+
let isTV = el$.find('.scd-item').filter((i, elem) => {
|
41
|
+
const $elem = $(elem);
|
42
|
+
return $elem.find('.fas.fa-play-circle').length > 0 && $elem.text().includes('TV');
|
43
|
+
}).length > 0 ||
|
44
|
+
el$.closest('.swiper-slide').find('.scd-item').filter((i, elem) => {
|
45
|
+
const $elem = $(elem);
|
46
|
+
return $elem.find('.fas.fa-play-circle').length > 0 && $elem.text().includes('TV');
|
47
|
+
}).length > 0 ||
|
48
|
+
el$.find('[class*="tv"]').length > 0 ||
|
49
|
+
el$.find('.film-detail .fd-infor .fdi-item').filter((i, elem) => {
|
50
|
+
return $(elem).text().toLowerCase().includes('tv');
|
51
|
+
}).length > 0 ||
|
52
|
+
(title && title.toLowerCase().includes('season'));
|
53
|
+
|
54
|
+
let duration = null;
|
55
|
+
let durationEl = el$.find('.scd-item').filter((i, elem) => {
|
56
|
+
return $(elem).find('.fas.fa-clock').length > 0;
|
57
|
+
});
|
58
|
+
if (!durationEl.length) {
|
59
|
+
durationEl = el$.closest('.swiper-slide').find('.scd-item').filter((i, elem) => {
|
60
|
+
return $(elem).find('.fas.fa-clock').length > 0;
|
61
|
+
});
|
62
|
+
}
|
63
|
+
if (durationEl.length) {
|
64
|
+
const durationText = durationEl.text().trim();
|
65
|
+
const match = durationText.match(/(\d+)m/i);
|
66
|
+
duration = match ? match[1] : null;
|
67
|
+
}
|
68
|
+
|
69
|
+
let releaseDate = null;
|
70
|
+
let dateEl = el$.find('.scd-item').filter((i, elem) => {
|
71
|
+
return $(elem).find('.fas.fa-calendar').length > 0;
|
72
|
+
});
|
73
|
+
if (!dateEl.length) {
|
74
|
+
dateEl = el$.closest('.swiper-slide').find('.scd-item').filter((i, elem) => {
|
75
|
+
return $(elem).find('.fas.fa-calendar').length > 0;
|
76
|
+
});
|
77
|
+
}
|
78
|
+
if (dateEl.length) {
|
79
|
+
const dateText = dateEl.text().trim();
|
80
|
+
releaseDate = dateText || null;
|
81
|
+
}
|
82
|
+
|
83
|
+
let quality = el$.find('.scd-item .quality').text() ||
|
84
|
+
el$.closest('.swiper-slide').find('.scd-item .quality').text() ||
|
85
|
+
el$.find('.quality').text() ||
|
86
|
+
el$.find('[class*="quality"]').text() ||
|
87
|
+
el$.find('.film-poster-quality').text() ||
|
88
|
+
el$.find('.badge').text() ||
|
89
|
+
el$.find('.resolution').text() || null;
|
90
|
+
if (quality) quality = quality.trim();
|
91
|
+
|
92
|
+
let subtitles = null;
|
93
|
+
let dubbed = false;
|
94
|
+
|
95
|
+
let subEl = el$.find('.tick-item').filter((i, elem) => {
|
96
|
+
return $(elem).find('.fas.fa-closed-captioning').length > 0;
|
97
|
+
});
|
98
|
+
if (!subEl.length) {
|
99
|
+
subEl = el$.closest('.swiper-slide').find('.tick-item').filter((i, elem) => {
|
100
|
+
return $(elem).find('.fas.fa-closed-captioning').length > 0;
|
101
|
+
});
|
102
|
+
}
|
103
|
+
if (subEl.length) {
|
104
|
+
const subText = subEl.text().trim();
|
105
|
+
const subMatch = subText.match(/(\d+)/);
|
106
|
+
subtitles = subMatch ? subMatch[1] : null;
|
107
|
+
}
|
108
|
+
|
109
|
+
let dubEl = el$.find('.tick-item').filter((i, elem) => {
|
110
|
+
const $elem = $(elem);
|
111
|
+
const text = $elem.text().toLowerCase();
|
112
|
+
return text.includes('dub') || text.includes('english') || $elem.hasClass('dub');
|
113
|
+
});
|
114
|
+
if (!dubEl.length) {
|
115
|
+
dubEl = el$.closest('.swiper-slide').find('.tick-item').filter((i, elem) => {
|
116
|
+
const $elem = $(elem);
|
117
|
+
const text = $elem.text().toLowerCase();
|
118
|
+
return text.includes('dub') || text.includes('english') || $elem.hasClass('dub');
|
119
|
+
});
|
120
|
+
}
|
121
|
+
if (dubEl.length) {
|
122
|
+
dubbed = true;
|
123
|
+
}
|
124
|
+
|
125
|
+
if (href || title) {
|
126
|
+
// Only include items from hianime source or with hianime URLs and limit to first 8 items
|
127
|
+
if (href && (href.includes('hianime.to') || href.includes('/watch/')) && items.length < 8) {
|
128
|
+
const item = {
|
129
|
+
title: title || null,
|
130
|
+
href: href || null,
|
131
|
+
image: img || null,
|
132
|
+
description: description || null,
|
133
|
+
isTV: isTV || false,
|
134
|
+
duration: duration || null,
|
135
|
+
releaseDate: releaseDate || null,
|
136
|
+
quality: quality || null,
|
137
|
+
subtitles: subtitles || null,
|
138
|
+
dubbed: dubbed || false,
|
139
|
+
source,
|
140
|
+
section: 'slider'
|
141
|
+
};
|
142
|
+
items.push(item);
|
143
|
+
}
|
144
|
+
}
|
145
|
+
});
|
146
|
+
|
147
|
+
if (items.length) break;
|
148
|
+
}
|
149
|
+
|
150
|
+
return items;
|
151
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
export default function scrapeTopAiring($, resolveUrl, source) {
|
2
|
+
const items = [];
|
3
|
+
|
4
|
+
$('div.anif-block').each((i, block) => {
|
5
|
+
const block$ = $(block);
|
6
|
+
const header = block$.find('.anif-block-header').text() || '';
|
7
|
+
if (!/top\s*airing/i.test(header)) return;
|
8
|
+
|
9
|
+
block$.find('.anif-block-ul ul.ulclear > li').slice(0, 6).each((j, li) => {
|
10
|
+
const el$ = $(li);
|
11
|
+
const a = el$.find('h3.film-name a').first();
|
12
|
+
let href = a.attr('href') || el$.find('a').first().attr('href') || '';
|
13
|
+
href = href ? resolveUrl(href) : null;
|
14
|
+
|
15
|
+
let title = a.attr('title') || a.attr('data-jname') || a.text() || null;
|
16
|
+
if (title) title = title.trim();
|
17
|
+
|
18
|
+
let img = null;
|
19
|
+
const poster = el$.find('.film-poster').first();
|
20
|
+
if (poster && poster.length) {
|
21
|
+
const imgEl = poster.find('img').first();
|
22
|
+
if (imgEl && imgEl.length) {
|
23
|
+
img = imgEl.attr('data-src') || imgEl.attr('data-lazy') || imgEl.attr('src') || imgEl.attr('data-original') || null;
|
24
|
+
}
|
25
|
+
if (!img) {
|
26
|
+
const style = poster.attr('style') || poster.find('a').attr('style') || '';
|
27
|
+
const m = /url\(['"]?(.*?)['"]?\)/.exec(style);
|
28
|
+
if (m && m[1]) img = m[1];
|
29
|
+
}
|
30
|
+
}
|
31
|
+
if (img) img = resolveUrl(img);
|
32
|
+
|
33
|
+
const dubText = el$.find('.tick .tick-item.tick-dub').text() || el$.find('.tick-item.tick-dub').text() || '';
|
34
|
+
const subText = el$.find('.tick .tick-item.tick-sub').text() || el$.find('.tick-item.tick-sub').text() || '';
|
35
|
+
const dub = (dubText || '').toString().replace(/[,\s"']/g, '').match(/(\d+)/);
|
36
|
+
const sub = (subText || '').toString().replace(/[,\s"']/g, '').match(/(\d+)/);
|
37
|
+
|
38
|
+
const fdi = el$.find('.fdi-item').text() || el$.find('.fd-infor .fdi-item').text() || '';
|
39
|
+
const tv = /\bTV\b/i.test(fdi);
|
40
|
+
|
41
|
+
items.push({
|
42
|
+
title: title || null,
|
43
|
+
href: href || null,
|
44
|
+
image: img || null,
|
45
|
+
dub: dub ? parseInt(dub[1], 10) : null,
|
46
|
+
sub: sub ? parseInt(sub[1], 10) : null,
|
47
|
+
tv: !!tv,
|
48
|
+
source,
|
49
|
+
section: 'top_airing',
|
50
|
+
});
|
51
|
+
});
|
52
|
+
});
|
53
|
+
|
54
|
+
return items;
|
55
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
export default function scrapeTrending($, resolveUrl, source) {
|
2
|
+
const items = [];
|
3
|
+
|
4
|
+
const selectors = ['.swiper-slide.item-qtip', '.trending-list .swiper-slide', '.block_area-content .swiper-slide'];
|
5
|
+
|
6
|
+
for (const sel of selectors) {
|
7
|
+
const found = $(sel);
|
8
|
+
if (!found || !found.length) continue;
|
9
|
+
|
10
|
+
found.slice(0, 6).each((i, el) => {
|
11
|
+
const el$ = $(el);
|
12
|
+
|
13
|
+
let title = el$.find('.film-title.dynamic-name').attr('data-jname') ||
|
14
|
+
el$.find('.film-title').text() ||
|
15
|
+
el$.find('[data-jname]').attr('data-jname') ||
|
16
|
+
el$.find('a').attr('title') ||
|
17
|
+
el$.find('.title').text() || null;
|
18
|
+
if (title) title = title.trim();
|
19
|
+
|
20
|
+
let href = el$.find('a.film-poster').attr('href') ||
|
21
|
+
el$.find('a').first().attr('href') || '';
|
22
|
+
href = href ? resolveUrl(href) : null;
|
23
|
+
|
24
|
+
let image = null;
|
25
|
+
const imgEl = el$.find('.film-poster-img').first();
|
26
|
+
if (imgEl && imgEl.length) {
|
27
|
+
image = imgEl.attr('data-src') ||
|
28
|
+
imgEl.attr('data-lazy') ||
|
29
|
+
imgEl.attr('src') ||
|
30
|
+
imgEl.attr('data-original') || null;
|
31
|
+
}
|
32
|
+
if (image) image = resolveUrl(image);
|
33
|
+
|
34
|
+
let number = null;
|
35
|
+
const numberEl = el$.find('.number').first();
|
36
|
+
if (numberEl.length) {
|
37
|
+
const numberText = numberEl.text().trim();
|
38
|
+
const numberMatch = numberText.match(/(\d+)/);
|
39
|
+
number = numberMatch ? parseInt(numberMatch[1]) : null;
|
40
|
+
}
|
41
|
+
|
42
|
+
if (title && href) {
|
43
|
+
const item = {
|
44
|
+
title: title || null,
|
45
|
+
href: href || null,
|
46
|
+
image: image || null,
|
47
|
+
number: number || null,
|
48
|
+
source,
|
49
|
+
section: 'trending'
|
50
|
+
};
|
51
|
+
items.push(item);
|
52
|
+
}
|
53
|
+
});
|
54
|
+
|
55
|
+
if (items.length) break;
|
56
|
+
}
|
57
|
+
|
58
|
+
return items;
|
59
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
import axios from 'axios';
|
2
|
+
import { load } from 'cheerio';
|
3
|
+
|
4
|
+
export const defaultHeaders = {
|
5
|
+
'User-Agent': 'Mozilla/5.0 (compatible; ShirayukiBot/1.0; +https://example.com/bot)',
|
6
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
7
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
8
|
+
'Accept-Encoding': 'gzip, deflate',
|
9
|
+
'Connection': 'keep-alive',
|
10
|
+
'Upgrade-Insecure-Requests': '1'
|
11
|
+
};
|
12
|
+
|
13
|
+
export function resolveUrlFactory(base) {
|
14
|
+
return (u) => {
|
15
|
+
if (!u) return null;
|
16
|
+
try {
|
17
|
+
return new URL(u, base).href;
|
18
|
+
} catch (e) {
|
19
|
+
if (u.startsWith('//')) return 'https:' + u;
|
20
|
+
if (u.startsWith('/')) return base + u;
|
21
|
+
return u;
|
22
|
+
}
|
23
|
+
};
|
24
|
+
}
|
25
|
+
|
26
|
+
export async function fetchAndLoad(url) {
|
27
|
+
const resp = await axios.get(url, {
|
28
|
+
headers: defaultHeaders,
|
29
|
+
timeout: 5000, // Reduced to 5 seconds for production
|
30
|
+
maxRedirects: 2, // Reduce redirects further
|
31
|
+
validateStatus: function (status) {
|
32
|
+
return status >= 200 && status < 300; // Only accept 2xx status codes
|
33
|
+
}
|
34
|
+
});
|
35
|
+
return load(resp.data);
|
36
|
+
}
|
37
|
+
|
38
|
+
export default { fetchAndLoad, resolveUrlFactory, defaultHeaders };
|