better-ani-scraped 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DOCUMENTATION.md +70 -22
- package/examples/example_usage_05.js +13 -0
- package/package.json +6 -3
- package/scrapers/animesama.js +1 -2
- package/scrapers/crunchyroll.js +89 -0
- package/scrapers/scrapers.js +14 -3
package/DOCUMENTATION.md
CHANGED
|
@@ -8,6 +8,7 @@ A set of utility functions for scraping anime data from multiple sources (only [
|
|
|
8
8
|
- [Main class](#main-class)
|
|
9
9
|
- [`AnimeScrapper("animesama")` methods](#animescrapperanimesama-methods)
|
|
10
10
|
- [`AnimeScrapper("animepahe")` methods](#animescrapperanimepahe-methods)
|
|
11
|
+
- [`AnimeScrapper("crunchyroll")` methods](#animescrappercrunchyroll-methods)
|
|
11
12
|
- [Functions](#functions)
|
|
12
13
|
|
|
13
14
|
---
|
|
@@ -15,23 +16,23 @@ A set of utility functions for scraping anime data from multiple sources (only [
|
|
|
15
16
|
## Main class
|
|
16
17
|
|
|
17
18
|
### `AnimeScraper(source)`
|
|
18
|
-
Creates a scrapper for the given source (only "animesama" and "
|
|
19
|
+
Creates a scrapper for the given source (only "animesama", "animepahe" and "crunchyroll" available at the moment).
|
|
19
20
|
|
|
20
21
|
---
|
|
21
22
|
|
|
22
23
|
## `AnimeScrapper("animesama")` methods
|
|
23
24
|
|
|
24
|
-
- [searchAnime](#
|
|
25
|
-
- [getSeasons](#
|
|
26
|
-
- [getEmbed](#
|
|
27
|
-
- [getAnimeInfo](#
|
|
28
|
-
- [getAvailableLanguages](#
|
|
29
|
-
- [getAllAnime](#
|
|
30
|
-
- [getLatestEpisodes](#
|
|
31
|
-
- [getRandomAnime](#
|
|
32
|
-
- [getEpisodeTitles](#
|
|
33
|
-
|
|
34
|
-
### `searchAnime(query, limit = 10)`
|
|
25
|
+
- [searchAnime](#animesamasearchanimequery-limit--10)
|
|
26
|
+
- [getSeasons](#animesamagetseasonsanimeurl-language--vostfr)
|
|
27
|
+
- [getEmbed](#animesamagetembedanimeurl-hostpriority--sibnet-vidmoly)
|
|
28
|
+
- [getAnimeInfo](#animesamagetanimeinfoanimeurl)
|
|
29
|
+
- [getAvailableLanguages](#animesamagetavailablelanguagesseasonurl-wantedlanguages--vostfr-vf-va-vkr-vcn-vqc)
|
|
30
|
+
- [getAllAnime](#animesamagetallanimeoutput--anime_listjson-get_seasons--false)
|
|
31
|
+
- [getLatestEpisodes](#animesamagetlatestepisodeslanguagefilter--null)
|
|
32
|
+
- [getRandomAnime](#animesamagetrandomanime)
|
|
33
|
+
- [getEpisodeTitles](#animesamagetepisodetitlesanimeurl-customChromiumPath)
|
|
34
|
+
|
|
35
|
+
### `animesama.searchAnime(query, limit = 10)`
|
|
35
36
|
Searches for anime titles that match the given query.
|
|
36
37
|
|
|
37
38
|
- **Parameters:**
|
|
@@ -54,7 +55,7 @@ Searches for anime titles that match the given query.
|
|
|
54
55
|
|
|
55
56
|
---
|
|
56
57
|
|
|
57
|
-
### `getSeasons(animeUrl, language = "vostfr")`
|
|
58
|
+
### `animesama.getSeasons(animeUrl, language = "vostfr")`
|
|
58
59
|
Fetches all available seasons of an anime in the specified language.
|
|
59
60
|
|
|
60
61
|
- **Parameters:**
|
|
@@ -75,7 +76,7 @@ Fetches all available seasons of an anime in the specified language.
|
|
|
75
76
|
|
|
76
77
|
---
|
|
77
78
|
|
|
78
|
-
### `getEmbed(animeUrl, hostPriority = ["sibnet", "vidmoly"])`
|
|
79
|
+
### `animesama.getEmbed(animeUrl, hostPriority = ["sibnet", "vidmoly"])`
|
|
79
80
|
Retrieves embed URLs for episodes, prioritizing by host.
|
|
80
81
|
|
|
81
82
|
- **Parameters:**
|
|
@@ -91,7 +92,7 @@ Retrieves embed URLs for episodes, prioritizing by host.
|
|
|
91
92
|
```
|
|
92
93
|
---
|
|
93
94
|
|
|
94
|
-
### `getAnimeInfo(animeUrl)`
|
|
95
|
+
### `animesama.getAnimeInfo(animeUrl)`
|
|
95
96
|
Extracts basic information from an anime page.
|
|
96
97
|
|
|
97
98
|
- **Parameters:**
|
|
@@ -110,7 +111,7 @@ Extracts basic information from an anime page.
|
|
|
110
111
|
|
|
111
112
|
---
|
|
112
113
|
|
|
113
|
-
### `getAvailableLanguages(seasonUrl, wantedLanguages = ["vostfr", "vf", "va", "vkr", "vcn", "vqc"])`
|
|
114
|
+
### `animesama.getAvailableLanguages(seasonUrl, wantedLanguages = ["vostfr", "vf", "va", "vkr", "vcn", "vqc"])`
|
|
114
115
|
Checks which languages are available for a given anime season (not recommended to use the default value of wantedLanguages, the more languages there is the more the function is long to run, only checks for languages you want).
|
|
115
116
|
|
|
116
117
|
- **Parameters:**
|
|
@@ -130,7 +131,7 @@ Checks which languages are available for a given anime season (not recommended t
|
|
|
130
131
|
|
|
131
132
|
---
|
|
132
133
|
|
|
133
|
-
### `getAllAnime(output = "anime_list.json", get_seasons = false)`
|
|
134
|
+
### `animesama.getAllAnime(output = "anime_list.json", get_seasons = false)`
|
|
134
135
|
Fetches the full anime catalog, optionally including season information.
|
|
135
136
|
|
|
136
137
|
- **Parameters:**
|
|
@@ -141,7 +142,7 @@ Fetches the full anime catalog, optionally including season information.
|
|
|
141
142
|
|
|
142
143
|
---
|
|
143
144
|
|
|
144
|
-
### `getLatestEpisodes(languageFilter = null)`
|
|
145
|
+
### `animesama.getLatestEpisodes(languageFilter = null)`
|
|
145
146
|
Scrapes the latest released episodes, optionally filtered by language.
|
|
146
147
|
|
|
147
148
|
- **Parameters:**
|
|
@@ -160,7 +161,7 @@ Scrapes the latest released episodes, optionally filtered by language.
|
|
|
160
161
|
|
|
161
162
|
---
|
|
162
163
|
|
|
163
|
-
### `getRandomAnime()`
|
|
164
|
+
### `animesama.getRandomAnime()`
|
|
164
165
|
Fetches a random anime from the catalogue.
|
|
165
166
|
|
|
166
167
|
- **Returns:**
|
|
@@ -177,11 +178,12 @@ Fetches a random anime from the catalogue.
|
|
|
177
178
|
|
|
178
179
|
---
|
|
179
180
|
|
|
180
|
-
### `getEpisodeTitles(AnimeUrl)`
|
|
181
|
+
### `animesama.getEpisodeTitles(AnimeUrl, customChromiumPath)`
|
|
181
182
|
Fetches the names of all episodes in a season
|
|
182
183
|
|
|
183
184
|
- **Parameters:**
|
|
184
185
|
- `animeUrl` *(string)*: URL of the anime’s season/episode page.
|
|
186
|
+
- `animeUrl` *(string)*: Path of the Chromium folder
|
|
185
187
|
- **Returns:**
|
|
186
188
|
An array of episode titles.
|
|
187
189
|
|
|
@@ -189,10 +191,10 @@ Fetches the names of all episodes in a season
|
|
|
189
191
|
|
|
190
192
|
## `AnimeScrapper("animepahe")` methods
|
|
191
193
|
|
|
192
|
-
- [searchAnime](#
|
|
194
|
+
- [searchAnime](#animepahesearchanimequery)
|
|
193
195
|
|
|
194
196
|
|
|
195
|
-
### `searchAnime(query)`
|
|
197
|
+
### `animepahe.searchAnime(query)`
|
|
196
198
|
Searches for anime titles that match the given query.
|
|
197
199
|
|
|
198
200
|
- **Parameters:**
|
|
@@ -220,6 +222,52 @@ Searches for anime titles that match the given query.
|
|
|
220
222
|
|
|
221
223
|
---
|
|
222
224
|
|
|
225
|
+
## `AnimeScrapper("crunchyroll")` methods
|
|
226
|
+
|
|
227
|
+
- [searchAnime](#crunchyrollsearchanimequery-limit--10)
|
|
228
|
+
- [getEpisodeInfo](#crunchyrollgetepisodeinfoanimeurl-seasontitle)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
### `crunchyroll.searchAnime(query, limit = 10)`
|
|
232
|
+
Searches for anime titles that match the given query.
|
|
233
|
+
|
|
234
|
+
- **Parameters:**
|
|
235
|
+
- `query` *(string)*: The search keyword.
|
|
236
|
+
- `limit` *(number)*: Maximum number of results to return (default: 10).
|
|
237
|
+
- **Returns:**
|
|
238
|
+
An array of anime objects:
|
|
239
|
+
```js
|
|
240
|
+
[
|
|
241
|
+
{
|
|
242
|
+
title: string,
|
|
243
|
+
url: string,
|
|
244
|
+
cover: string
|
|
245
|
+
},
|
|
246
|
+
...
|
|
247
|
+
]
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### `crunchyroll.getEpisodeInfo(animeUrl, seasonTitle)`
|
|
251
|
+
Extracts information from all episodes of a season of an anime.
|
|
252
|
+
|
|
253
|
+
- **Parameters:**
|
|
254
|
+
- `animeUrl` *(string)*: Anime page URL.
|
|
255
|
+
- `seasonTitle` *(string)*: Name of the season for which you want episode information. If null, returns episodes from season 1.
|
|
256
|
+
- **Returns:**
|
|
257
|
+
An array of episode objects:
|
|
258
|
+
```js
|
|
259
|
+
[
|
|
260
|
+
{
|
|
261
|
+
title: string,
|
|
262
|
+
synopsis: string,
|
|
263
|
+
releaseDate: string,
|
|
264
|
+
cover: string
|
|
265
|
+
},
|
|
266
|
+
...
|
|
267
|
+
]
|
|
268
|
+
```
|
|
269
|
+
---
|
|
270
|
+
|
|
223
271
|
## Functions
|
|
224
272
|
|
|
225
273
|
- [getVideoUrlFromEmbed](#getvideourlfromembedsource-embedurl)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { AnimeScraper } from "../index.js"; // REPLACE BY "from 'better-ani-scraped';"
|
|
2
|
+
|
|
3
|
+
const main = async () => {
|
|
4
|
+
const scraper = new AnimeScraper('crunchyroll');
|
|
5
|
+
const search = await scraper.searchAnime("86");
|
|
6
|
+
console.log("Search Results:", search);
|
|
7
|
+
|
|
8
|
+
const episodeInfo = await scraper.getEpisodeInfo(search[0].url, "S2")
|
|
9
|
+
console.log("Episode Info:", episodeInfo)
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
main().catch(console.error);
|
|
13
|
+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "better-ani-scraped",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "Scrape anime data from different sources (only anime-sama.fr for the moment)",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -17,10 +17,13 @@
|
|
|
17
17
|
"license": "MIT",
|
|
18
18
|
"type": "module",
|
|
19
19
|
"dependencies": {
|
|
20
|
-
"
|
|
20
|
+
"@ablanc/crunchyroll": "^2.4.0",
|
|
21
21
|
"axios": "^1.8.4",
|
|
22
22
|
"cheerio": "^1.0.0",
|
|
23
|
-
"playwright": "^1.52.0"
|
|
23
|
+
"playwright": "^1.52.0",
|
|
24
|
+
"puppeteer": "^24.7.2",
|
|
25
|
+
"puppeteer-extra": "^3.3.6",
|
|
26
|
+
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
24
27
|
},
|
|
25
28
|
"repository": {
|
|
26
29
|
"type": "git",
|
package/scrapers/animesama.js
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import axios from "axios";
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
3
|
import fs from "fs";
|
|
4
|
-
import puppeteer from'puppeteer';
|
|
5
4
|
|
|
6
5
|
const BASE_URL = "https://anime-sama.fr";
|
|
7
|
-
const CATALOGUE_URL = `${BASE_URL}/catalogue`;
|
|
6
|
+
const CATALOGUE_URL = `${BASE_URL}/catalogue`;
|
|
8
7
|
|
|
9
8
|
function getHeaders(referer = BASE_URL) {
|
|
10
9
|
return {
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import puppeteer from 'puppeteer-extra';
|
|
2
|
+
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
3
|
+
puppeteer.use(StealthPlugin());
|
|
4
|
+
|
|
5
|
+
const LANGUAGE = "fr";
|
|
6
|
+
const CATALOGUE_URL = `https://www.crunchyroll.com/${LANGUAGE}`;
|
|
7
|
+
|
|
8
|
+
export async function searchAnime(query, limit = 10) {
|
|
9
|
+
const url = `${CATALOGUE_URL}/search?q=${encodeURIComponent(query)}`;
|
|
10
|
+
const browser = await puppeteer.launch({ headless: true });
|
|
11
|
+
const page = await browser.newPage();
|
|
12
|
+
|
|
13
|
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
14
|
+
await page.waitForSelector('.series-results-cards-wrapper [data-t="search-series-card"]');
|
|
15
|
+
const results = await page.evaluate((limit) => {
|
|
16
|
+
const cards = document.querySelectorAll('.series-results-cards-wrapper [data-t="search-series-card"]');
|
|
17
|
+
const results = [];
|
|
18
|
+
|
|
19
|
+
cards.forEach(card => {
|
|
20
|
+
if (results.length < limit) {
|
|
21
|
+
const title = card.querySelector('.search-show-card__title-link--7ilnY')?.innerText;
|
|
22
|
+
const url = card.querySelector('.search-show-card__title-link--7ilnY')?.href;
|
|
23
|
+
const cover = card.querySelector('.content-image__image--7tGlg').src?.replace(/cdn-cgi\/image\/[^\/]+(\/catalog\/.*)/, 'cdn-cgi/image/$1') || null;
|
|
24
|
+
|
|
25
|
+
if (title && url && !results.some(result => result.url === url)) {
|
|
26
|
+
results.push({ title, url, cover });
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
return results;
|
|
31
|
+
}, limit);
|
|
32
|
+
|
|
33
|
+
await browser.close();
|
|
34
|
+
return results;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
export async function getEpisodeInfo(animeUrl, seasonTitle) {
|
|
40
|
+
const browser = await puppeteer.launch({ headless: true });
|
|
41
|
+
const page = await browser.newPage();
|
|
42
|
+
await page.goto(animeUrl, { waitUntil: 'domcontentloaded' });
|
|
43
|
+
try {
|
|
44
|
+
await page.waitForSelector('.erc-seasons-select .dropdown-trigger--P--FX', { timeout: 5000 });
|
|
45
|
+
await page.click('.erc-seasons-select .dropdown-trigger--P--FX');
|
|
46
|
+
await page.evaluate((seasonTitle) => {
|
|
47
|
+
const options = Array.from(document.querySelectorAll('.extended-option--Wk-jL'));
|
|
48
|
+
const target = options.find(opt => {
|
|
49
|
+
const label = opt.querySelector('.extended-option__text--MQWp1');
|
|
50
|
+
return label && label.textContent.includes(seasonTitle);
|
|
51
|
+
});
|
|
52
|
+
if (target) {
|
|
53
|
+
target.click();
|
|
54
|
+
} else {
|
|
55
|
+
console.warn('Saison non trouvée:', seasonTitle);
|
|
56
|
+
}
|
|
57
|
+
}, seasonTitle);
|
|
58
|
+
} catch { }
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
await page.waitForSelector('.show-more-button-boxed button', { timeout: 1000 });
|
|
63
|
+
await page.click('.show-more-button-boxed button');
|
|
64
|
+
} catch { }
|
|
65
|
+
|
|
66
|
+
await page.waitForSelector('div.card:not(.placeholder-card)', { timeout: 10000 });
|
|
67
|
+
const allCardInfo = await page.evaluate(() => {
|
|
68
|
+
const cards = document.querySelectorAll('div.card:not(.placeholder-card)');
|
|
69
|
+
const episodeInfo = [];
|
|
70
|
+
|
|
71
|
+
cards.forEach(card => {
|
|
72
|
+
const title = card?.querySelector('.playable-card__title-link--96psl')?.textContent || null;
|
|
73
|
+
const synopsis = card?.querySelector('.playable-card-hover__description--4Lpe4')?.textContent || null;
|
|
74
|
+
const releaseDate = card?.querySelector('.playable-card-hover__release--3Xg35 .text--gq6o-')?.textContent || null;
|
|
75
|
+
const cover = card?.querySelector('img.progressive-image-loading__original--k-k-7')?.src?.replace(/cdn-cgi\/image\/[^\/]+(\/catalog\/.*)/, 'cdn-cgi/image/$1') || null;
|
|
76
|
+
episodeInfo.push({
|
|
77
|
+
title,
|
|
78
|
+
synopsis,
|
|
79
|
+
releaseDate,
|
|
80
|
+
cover,
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
return episodeInfo;
|
|
85
|
+
});
|
|
86
|
+
await browser.close();
|
|
87
|
+
return allCardInfo;
|
|
88
|
+
}
|
|
89
|
+
|
package/scrapers/scrapers.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as animesama from "./animesama.js";
|
|
2
2
|
import * as animepahe from "./animepahe.js";
|
|
3
|
+
import * as crunchyroll from "./crunchyroll.js";
|
|
3
4
|
|
|
4
5
|
export class AnimeScraper {
|
|
5
6
|
constructor(source) {
|
|
@@ -7,8 +8,10 @@ export class AnimeScraper {
|
|
|
7
8
|
this.source = animepahe;
|
|
8
9
|
} else if (source === 'animesama') {
|
|
9
10
|
this.source = animesama;
|
|
10
|
-
} else {
|
|
11
|
-
|
|
11
|
+
} else if (source === 'crunchyroll') {
|
|
12
|
+
this.source = crunchyroll;
|
|
13
|
+
} else {
|
|
14
|
+
throw new Error('Invalid source. Choose either "animepahe", "crunchyroll" or "animesama".');
|
|
12
15
|
}
|
|
13
16
|
}
|
|
14
17
|
|
|
@@ -88,7 +91,15 @@ export class AnimeScraper {
|
|
|
88
91
|
try {
|
|
89
92
|
return await this.source.getEpisodeTitles(animeUrl, ...rest);
|
|
90
93
|
} catch (error) {
|
|
91
|
-
console.error(`This scraper does not have the
|
|
94
|
+
console.error(`This scraper does not have the getEpisodeTitles function implemented or an error happened -> ${error}`);
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
async getEpisodeInfo(animeUrl, ...rest) {
|
|
99
|
+
try {
|
|
100
|
+
return await this.source.getEpisodeInfo(animeUrl, ...rest);
|
|
101
|
+
} catch (error) {
|
|
102
|
+
console.error(`This scraper does not have the getEpisodeInfo function implemented or an error happened -> ${error}`);
|
|
92
103
|
return null;
|
|
93
104
|
}
|
|
94
105
|
}
|