better-ani-scraped 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/DOCUMENTATION.md CHANGED
@@ -8,6 +8,7 @@ A set of utility functions for scraping anime data from multiple sources (only [
8
8
  - [Main class](#main-class)
9
9
  - [`AnimeScrapper("animesama")` methods](#animescrapperanimesama-methods)
10
10
  - [`AnimeScrapper("animepahe")` methods](#animescrapperanimepahe-methods)
11
+ - [`AnimeScrapper("crunchyroll")` methods](#animescrappercrunchyroll-methods)
11
12
  - [Functions](#functions)
12
13
 
13
14
  ---
@@ -15,23 +16,23 @@ A set of utility functions for scraping anime data from multiple sources (only [
15
16
  ## Main class
16
17
 
17
18
  ### `AnimeScraper(source)`
18
- Creates a scrapper for the given source (only "animesama" and "animepahe" available at the moment).
19
+ Creates a scrapper for the given source (only "animesama", "animepahe" and "crunchyroll" available at the moment).
19
20
 
20
21
  ---
21
22
 
22
23
  ## `AnimeScrapper("animesama")` methods
23
24
 
24
- - [searchAnime](#searchanimequery-limit--10)
25
- - [getSeasons](#getseasonsanimeurl-language--vostfr)
26
- - [getEmbed](#getembedanimeurl-hostpriority--sibnet-vidmoly)
27
- - [getAnimeInfo](#getanimeinfoanimeurl)
28
- - [getAvailableLanguages](#getavailablelanguagesseasonurl-wantedlanguages--vostfr-vf-va-vkr-vcn-vqc)
29
- - [getAllAnime](#getallanimeoutput--anime_listjson-get_seasons--false)
30
- - [getLatestEpisodes](#getlatestepisodeslanguagefilter--null)
31
- - [getRandomAnime](#getrandomanime)
32
- - [getEpisodeTitles](#getepisodetitlesanimeurl)
33
-
34
- ### `searchAnime(query, limit = 10)`
25
+ - [searchAnime](#animesamasearchanimequery-limit--10)
26
+ - [getSeasons](#animesamagetseasonsanimeurl-language--vostfr)
27
+ - [getEmbed](#animesamagetembedanimeurl-hostpriority--sibnet-vidmoly)
28
+ - [getAnimeInfo](#animesamagetanimeinfoanimeurl)
29
+ - [getAvailableLanguages](#animesamagetavailablelanguagesseasonurl-wantedlanguages--vostfr-vf-va-vkr-vcn-vqc)
30
+ - [getAllAnime](#animesamagetallanimeoutput--anime_listjson-get_seasons--false)
31
+ - [getLatestEpisodes](#animesamagetlatestepisodeslanguagefilter--null)
32
+ - [getRandomAnime](#animesamagetrandomanime)
33
+ - [getEpisodeTitles](#animesamagetepisodetitlesanimeurl-customChromiumPath)
34
+
35
+ ### `animesama.searchAnime(query, limit = 10)`
35
36
  Searches for anime titles that match the given query.
36
37
 
37
38
  - **Parameters:**
@@ -54,7 +55,7 @@ Searches for anime titles that match the given query.
54
55
 
55
56
  ---
56
57
 
57
- ### `getSeasons(animeUrl, language = "vostfr")`
58
+ ### `animesama.getSeasons(animeUrl, language = "vostfr")`
58
59
  Fetches all available seasons of an anime in the specified language.
59
60
 
60
61
  - **Parameters:**
@@ -75,7 +76,7 @@ Fetches all available seasons of an anime in the specified language.
75
76
 
76
77
  ---
77
78
 
78
- ### `getEmbed(animeUrl, hostPriority = ["sibnet", "vidmoly"])`
79
+ ### `animesama.getEmbed(animeUrl, hostPriority = ["sibnet", "vidmoly"])`
79
80
  Retrieves embed URLs for episodes, prioritizing by host.
80
81
 
81
82
  - **Parameters:**
@@ -91,7 +92,7 @@ Retrieves embed URLs for episodes, prioritizing by host.
91
92
  ```
92
93
  ---
93
94
 
94
- ### `getAnimeInfo(animeUrl)`
95
+ ### `animesama.getAnimeInfo(animeUrl)`
95
96
  Extracts basic information from an anime page.
96
97
 
97
98
  - **Parameters:**
@@ -110,7 +111,7 @@ Extracts basic information from an anime page.
110
111
 
111
112
  ---
112
113
 
113
- ### `getAvailableLanguages(seasonUrl, wantedLanguages = ["vostfr", "vf", "va", "vkr", "vcn", "vqc"])`
114
+ ### `animesama.getAvailableLanguages(seasonUrl, wantedLanguages = ["vostfr", "vf", "va", "vkr", "vcn", "vqc"])`
114
115
  Checks which languages are available for a given anime season (not recommended to use the default value of wantedLanguages, the more languages there is the more the function is long to run, only checks for languages you want).
115
116
 
116
117
  - **Parameters:**
@@ -130,7 +131,7 @@ Checks which languages are available for a given anime season (not recommended t
130
131
 
131
132
  ---
132
133
 
133
- ### `getAllAnime(output = "anime_list.json", get_seasons = false)`
134
+ ### `animesama.getAllAnime(output = "anime_list.json", get_seasons = false)`
134
135
  Fetches the full anime catalog, optionally including season information.
135
136
 
136
137
  - **Parameters:**
@@ -141,7 +142,7 @@ Fetches the full anime catalog, optionally including season information.
141
142
 
142
143
  ---
143
144
 
144
- ### `getLatestEpisodes(languageFilter = null)`
145
+ ### `animesama.getLatestEpisodes(languageFilter = null)`
145
146
  Scrapes the latest released episodes, optionally filtered by language.
146
147
 
147
148
  - **Parameters:**
@@ -160,7 +161,7 @@ Scrapes the latest released episodes, optionally filtered by language.
160
161
 
161
162
  ---
162
163
 
163
- ### `getRandomAnime()`
164
+ ### `animesama.getRandomAnime()`
164
165
  Fetches a random anime from the catalogue.
165
166
 
166
167
  - **Returns:**
@@ -177,11 +178,12 @@ Fetches a random anime from the catalogue.
177
178
 
178
179
  ---
179
180
 
180
- ### `getEpisodeTitles(AnimeUrl)`
181
+ ### `animesama.getEpisodeTitles(AnimeUrl, customChromiumPath)`
181
182
  Fetches the names of all episodes in a season
182
183
 
183
184
  - **Parameters:**
184
185
  - `animeUrl` *(string)*: URL of the anime’s season/episode page.
186
+ - `animeUrl` *(string)*: Path of the Chromium folder
185
187
  - **Returns:**
186
188
  An array of episode titles.
187
189
 
@@ -189,10 +191,10 @@ Fetches the names of all episodes in a season
189
191
 
190
192
  ## `AnimeScrapper("animepahe")` methods
191
193
 
192
- - [searchAnime](#searchanimequery)
194
+ - [searchAnime](#animepahesearchanimequery)
193
195
 
194
196
 
195
- ### `searchAnime(query)`
197
+ ### `animepahe.searchAnime(query)`
196
198
  Searches for anime titles that match the given query.
197
199
 
198
200
  - **Parameters:**
@@ -220,6 +222,52 @@ Searches for anime titles that match the given query.
220
222
 
221
223
  ---
222
224
 
225
+ ## `AnimeScrapper("crunchyroll")` methods
226
+
227
+ - [searchAnime](#crunchyrollsearchanimequery-limit--10)
228
+ - [getEpisodeInfo](#crunchyrollgetepisodeinfoanimeurl-seasontitle)
229
+
230
+
231
+ ### `crunchyroll.searchAnime(query, limit = 10)`
232
+ Searches for anime titles that match the given query.
233
+
234
+ - **Parameters:**
235
+ - `query` *(string)*: The search keyword.
236
+ - `limit` *(number)*: Maximum number of results to return (default: 10).
237
+ - **Returns:**
238
+ An array of anime objects:
239
+ ```js
240
+ [
241
+ {
242
+ title: string,
243
+ url: string,
244
+ cover: string
245
+ },
246
+ ...
247
+ ]
248
+ ```
249
+
250
+ ### `crunchyroll.getEpisodeInfo(animeUrl, seasonTitle)`
251
+ Extracts information from all episodes of a season of an anime.
252
+
253
+ - **Parameters:**
254
+ - `animeUrl` *(string)*: Anime page URL.
255
+ - `seasonTitle` *(string)*: Name of the season for which you want episode information. If null, returns episodes from season 1.
256
+ - **Returns:**
257
+ An array of episode objects:
258
+ ```js
259
+ [
260
+ {
261
+ title: string,
262
+ synopsis: string,
263
+ releaseDate: string,
264
+ cover: string
265
+ },
266
+ ...
267
+ ]
268
+ ```
269
+ ---
270
+
223
271
  ## Functions
224
272
 
225
273
  - [getVideoUrlFromEmbed](#getvideourlfromembedsource-embedurl)
@@ -0,0 +1,13 @@
1
+ import { AnimeScraper } from "../index.js"; // REPLACE BY "from 'better-ani-scraped';"
2
+
3
+ const main = async () => {
4
+ const scraper = new AnimeScraper('crunchyroll');
5
+ const search = await scraper.searchAnime("86");
6
+ console.log("Search Results:", search);
7
+
8
+ const episodeInfo = await scraper.getEpisodeInfo(search[0].url, "S2")
9
+ console.log("Episode Info:", episodeInfo)
10
+ };
11
+
12
+ main().catch(console.error);
13
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "better-ani-scraped",
3
- "version": "1.3.2",
3
+ "version": "1.4.0",
4
4
  "description": "Scrape anime data from different sources (only anime-sama.fr for the moment)",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -17,10 +17,13 @@
17
17
  "license": "MIT",
18
18
  "type": "module",
19
19
  "dependencies": {
20
- "ani-scraped": "^1.2.8",
20
+ "@ablanc/crunchyroll": "^2.4.0",
21
21
  "axios": "^1.8.4",
22
22
  "cheerio": "^1.0.0",
23
- "playwright": "^1.52.0"
23
+ "playwright": "^1.52.0",
24
+ "puppeteer": "^24.7.2",
25
+ "puppeteer-extra": "^3.3.6",
26
+ "puppeteer-extra-plugin-stealth": "^2.11.2"
24
27
  },
25
28
  "repository": {
26
29
  "type": "git",
@@ -1,10 +1,9 @@
1
1
  import axios from "axios";
2
2
  import * as cheerio from "cheerio";
3
3
  import fs from "fs";
4
- import puppeteer from'puppeteer';
5
4
 
6
5
  const BASE_URL = "https://anime-sama.fr";
7
- const CATALOGUE_URL = `${BASE_URL}/catalogue`;
6
+ const CATALOGUE_URL = `${BASE_URL}/catalogue`;
8
7
 
9
8
  function getHeaders(referer = BASE_URL) {
10
9
  return {
@@ -0,0 +1,89 @@
1
+ import puppeteer from 'puppeteer-extra';
2
+ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
3
+ puppeteer.use(StealthPlugin());
4
+
5
+ const LANGUAGE = "fr";
6
+ const CATALOGUE_URL = `https://www.crunchyroll.com/${LANGUAGE}`;
7
+
8
+ export async function searchAnime(query, limit = 10) {
9
+ const url = `${CATALOGUE_URL}/search?q=${encodeURIComponent(query)}`;
10
+ const browser = await puppeteer.launch({ headless: true });
11
+ const page = await browser.newPage();
12
+
13
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
14
+ await page.waitForSelector('.series-results-cards-wrapper [data-t="search-series-card"]');
15
+ const results = await page.evaluate((limit) => {
16
+ const cards = document.querySelectorAll('.series-results-cards-wrapper [data-t="search-series-card"]');
17
+ const results = [];
18
+
19
+ cards.forEach(card => {
20
+ if (results.length < limit) {
21
+ const title = card.querySelector('.search-show-card__title-link--7ilnY')?.innerText;
22
+ const url = card.querySelector('.search-show-card__title-link--7ilnY')?.href;
23
+ const cover = card.querySelector('.content-image__image--7tGlg').src?.replace(/cdn-cgi\/image\/[^\/]+(\/catalog\/.*)/, 'cdn-cgi/image/$1') || null;
24
+
25
+ if (title && url && !results.some(result => result.url === url)) {
26
+ results.push({ title, url, cover });
27
+ }
28
+ }
29
+ });
30
+ return results;
31
+ }, limit);
32
+
33
+ await browser.close();
34
+ return results;
35
+ }
36
+
37
+
38
+
39
+ export async function getEpisodeInfo(animeUrl, seasonTitle) {
40
+ const browser = await puppeteer.launch({ headless: true });
41
+ const page = await browser.newPage();
42
+ await page.goto(animeUrl, { waitUntil: 'domcontentloaded' });
43
+ try {
44
+ await page.waitForSelector('.erc-seasons-select .dropdown-trigger--P--FX', { timeout: 5000 });
45
+ await page.click('.erc-seasons-select .dropdown-trigger--P--FX');
46
+ await page.evaluate((seasonTitle) => {
47
+ const options = Array.from(document.querySelectorAll('.extended-option--Wk-jL'));
48
+ const target = options.find(opt => {
49
+ const label = opt.querySelector('.extended-option__text--MQWp1');
50
+ return label && label.textContent.includes(seasonTitle);
51
+ });
52
+ if (target) {
53
+ target.click();
54
+ } else {
55
+ console.warn('Saison non trouvée:', seasonTitle);
56
+ }
57
+ }, seasonTitle);
58
+ } catch { }
59
+
60
+
61
+ try {
62
+ await page.waitForSelector('.show-more-button-boxed button', { timeout: 1000 });
63
+ await page.click('.show-more-button-boxed button');
64
+ } catch { }
65
+
66
+ await page.waitForSelector('div.card:not(.placeholder-card)', { timeout: 10000 });
67
+ const allCardInfo = await page.evaluate(() => {
68
+ const cards = document.querySelectorAll('div.card:not(.placeholder-card)');
69
+ const episodeInfo = [];
70
+
71
+ cards.forEach(card => {
72
+ const title = card?.querySelector('.playable-card__title-link--96psl')?.textContent || null;
73
+ const synopsis = card?.querySelector('.playable-card-hover__description--4Lpe4')?.textContent || null;
74
+ const releaseDate = card?.querySelector('.playable-card-hover__release--3Xg35 .text--gq6o-')?.textContent || null;
75
+ const cover = card?.querySelector('img.progressive-image-loading__original--k-k-7')?.src?.replace(/cdn-cgi\/image\/[^\/]+(\/catalog\/.*)/, 'cdn-cgi/image/$1') || null;
76
+ episodeInfo.push({
77
+ title,
78
+ synopsis,
79
+ releaseDate,
80
+ cover,
81
+ });
82
+ });
83
+
84
+ return episodeInfo;
85
+ });
86
+ await browser.close();
87
+ return allCardInfo;
88
+ }
89
+
@@ -1,5 +1,6 @@
1
1
  import * as animesama from "./animesama.js";
2
2
  import * as animepahe from "./animepahe.js";
3
+ import * as crunchyroll from "./crunchyroll.js";
3
4
 
4
5
  export class AnimeScraper {
5
6
  constructor(source) {
@@ -7,8 +8,10 @@ export class AnimeScraper {
7
8
  this.source = animepahe;
8
9
  } else if (source === 'animesama') {
9
10
  this.source = animesama;
10
- } else {
11
- throw new Error('Invalid source. Choose either "animepahe" or "animesama".');
11
+ } else if (source === 'crunchyroll') {
12
+ this.source = crunchyroll;
13
+ } else {
14
+ throw new Error('Invalid source. Choose either "animepahe", "crunchyroll" or "animesama".');
12
15
  }
13
16
  }
14
17
 
@@ -88,7 +91,15 @@ export class AnimeScraper {
88
91
  try {
89
92
  return await this.source.getEpisodeTitles(animeUrl, ...rest);
90
93
  } catch (error) {
91
- console.error(`This scraper does not have the getRandomAnime function implemented or an error happened -> ${error}`);
94
+ console.error(`This scraper does not have the getEpisodeTitles function implemented or an error happened -> ${error}`);
95
+ return null;
96
+ }
97
+ }
98
+ async getEpisodeInfo(animeUrl, ...rest) {
99
+ try {
100
+ return await this.source.getEpisodeInfo(animeUrl, ...rest);
101
+ } catch (error) {
102
+ console.error(`This scraper does not have the getEpisodeInfo function implemented or an error happened -> ${error}`);
92
103
  return null;
93
104
  }
94
105
  }