better-ani-scraped 1.7.3 → 1.7.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,14 @@ import { AnimeScraper } from "../../index.js"; // REPLACE BY "from 'better-ani-s
2
2
 
3
3
  const main = async () => {
4
4
  const animesama = new AnimeScraper('animesama');
5
+ const browser = await animesama.createBrowser();
6
+ const sharedPage = await animesama.createSharedPage(browser);
5
7
 
6
- const search = await animesama.searchAnime("gachiakuta", 100, [], ["Anime", "Film", "Autres", "Scans"]);
7
- console.log("Search Results:", search);
8
+ const search = await animesama.searchAnime("gachiakuta", 100, [], ["Anime", "Film", "Autres", "Scans"], sharedPage);
9
+ const search2 = await animesama.searchAnime("one piece", 100, [], ["Anime", "Film", "Autres", "Scans"], sharedPage);
10
+ console.log("Search \"gachiakuta\" Results:", search);
11
+ console.log("Search \"one piece\" Results:", search2);
12
+ await browser.close();
8
13
  };
9
14
 
10
15
  main().catch(console.error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "better-ani-scraped",
3
- "version": "1.7.3",
3
+ "version": "1.7.51",
4
4
  "description": "Scrape anime data from different sources (only anime-sama.org, animepahe and crunchyroll for the moment)",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -22,6 +22,7 @@
22
22
  "cheerio": "^1.0.0",
23
23
  "playwright": "^1.52.0",
24
24
  "puppeteer": "^24.8.1",
25
+ "puppeteer-core": "^24.29.1",
25
26
  "puppeteer-extra": "^3.3.6",
26
27
  "puppeteer-extra-plugin-stealth": "^2.11.2"
27
28
  },
@@ -1,12 +1,15 @@
1
1
  import axios from "axios";
2
2
  import * as cheerio from "cheerio";
3
3
  import fs from "fs";
4
- import path from 'path';
5
- import { exec as execCallback } from 'child_process';
6
- import { promisify } from 'util';
7
- import { title } from "process";
8
- const execAsync = promisify(execCallback);
9
-
4
+ import path from "path";
5
+ import util from "util";
6
+ import { exec as execCallback } from "child_process";
7
+ import puppeteerExtra from "puppeteer-extra";
8
+ import StealthPlugin from "puppeteer-extra-plugin-stealth";
9
+ const execAsync = util.promisify(execCallback);
10
+
11
+ puppeteerExtra.use(StealthPlugin());
12
+
10
13
 
11
14
  const BASE_URL = "https://anime-sama.org";
12
15
  const CATALOGUE_URL = `${BASE_URL}/catalogue`;
@@ -44,12 +47,41 @@ function getHeaders(referer = BASE_URL) {
44
47
  };
45
48
  }
46
49
 
50
+ export async function createBrowser(customChromiumPath) {
51
+ const executablePath = await ensureChromiumInstalled(customChromiumPath);
52
+ const browser = await puppeteerExtra.launch({
53
+ headless: true,
54
+ executablePath,
55
+ args: ["--no-sandbox", "--disable-setuid-sandbox"],
56
+ });
57
+ return browser;
58
+ }
59
+
60
+ export async function createSharedPage(browser) {
61
+ const page = await browser.newPage();
62
+ await page.setExtraHTTPHeaders(getHeaders());
63
+ await page.setUserAgent(
64
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
65
+ );
66
+ // block heavy resources but NOT scripts
67
+ await page.setRequestInterception(true);
68
+ page.on("request", (req) => {
69
+ const blocked = ["image", "stylesheet", "font", "media"]; // keep scripts
70
+ if (blocked.includes(req.resourceType())) req.abort();
71
+ else req.continue();
72
+ });
73
+ await page.evaluateOnNewDocument(() => {
74
+ Object.defineProperty(navigator, "webdriver", { get: () => false });
75
+ });
76
+ return page;
77
+ }
47
78
  export async function searchAnime(
48
79
  query,
49
80
  limit = 10,
50
81
  wantedLanguages = null,
51
82
  wantedTypes = null,
52
- page = null
83
+ page = null,
84
+ wantedPage = 1
53
85
  ) {
54
86
  const languages = Array.isArray(wantedLanguages)
55
87
  ? wantedLanguages
@@ -62,6 +94,20 @@ export async function searchAnime(
62
94
 
63
95
  const results = [];
64
96
 
97
+ const fetchHtml = async (url) => {
98
+ if (page) {
99
+ await page.goto("about:blank"); // Réinitialise la page
100
+ await new Promise((resolve) => setTimeout(resolve, 100));
101
+ const html = await page
102
+ .goto(url, { waitUntil: "domcontentloaded" })
103
+ .then(() => page.content());
104
+ return html;
105
+ } else {
106
+ const res = await axios.get(url, { headers: getHeaders(CATALOGUE_URL) });
107
+ return res.data;
108
+ }
109
+ };
110
+
65
111
  const fetchPage = async (pageNum) => {
66
112
  const url =
67
113
  pageNum === 1
@@ -70,9 +116,10 @@ export async function searchAnime(
70
116
  query
71
117
  )}&page=${pageNum}`;
72
118
 
73
- const res = await axios.get(url, { headers: getHeaders(CATALOGUE_URL) });
74
- const $ = cheerio.load(res.data);
119
+ const html = await fetchHtml(url);
120
+ const $ = cheerio.load(html);
75
121
  const containers = $("div.catalog-card > a");
122
+
76
123
  containers.each((_, el) => {
77
124
  if (results.length >= limit) return false;
78
125
 
@@ -81,7 +128,11 @@ export async function searchAnime(
81
128
  const title = anchor.find("h2").first().text().trim();
82
129
  const altRaw = anchor.find("p").first().text().trim();
83
130
  const cover = anchor.find("img").first().attr("src");
84
- const synopsis = anchor.find("div.synopsis-content").first().text();
131
+ const synopsis = anchor
132
+ .find("div.synopsis-content")
133
+ .first()
134
+ .text()
135
+ .trim();
85
136
 
86
137
  const typesRaw = anchor
87
138
  .find(".info-row")
@@ -92,10 +143,12 @@ export async function searchAnime(
92
143
  .toArray()
93
144
  .map((el) => $(el).text().trim())
94
145
  .filter((t) => isWanted(t, types));
95
- const filteredTypes = wantedTypes.length === 0 ? typesRaw : typesRaw;
96
- const hasScans = filteredTypes.some(
97
- (t) => t.toLowerCase() === "scans".toLowerCase()
98
- );
146
+ const filteredTypes =
147
+ Array.isArray(wantedTypes) && wantedTypes.length === 0
148
+ ? typesRaw
149
+ : typesRaw;
150
+ const hasScans = filteredTypes.some((t) => t.toLowerCase() === "scans");
151
+
99
152
  const languagesRaw = anchor
100
153
  .find(".info-row")
101
154
  .filter(
@@ -107,9 +160,12 @@ export async function searchAnime(
107
160
  .filter((l) => isWanted(l, languages));
108
161
 
109
162
  const filteredLanguages =
110
- wantedLanguages.length === 0 || hasScans
163
+ (Array.isArray(wantedLanguages) && wantedLanguages.length === 0) ||
164
+ hasScans
111
165
  ? languagesRaw
112
- : languagesRaw.filter((lang) => isWanted(lang, wantedLanguages));
166
+ : languagesRaw.filter((lang) =>
167
+ isWanted(lang, wantedLanguages || languages)
168
+ );
113
169
 
114
170
  const altTitles = altRaw
115
171
  ? altRaw
@@ -126,8 +182,9 @@ export async function searchAnime(
126
182
  .filter(Boolean)
127
183
  : [];
128
184
 
129
- const hasValidType = types.length === 0 || filteredTypes;
130
- const hasValidLanguage = languages.length === 0 || filteredLanguages;
185
+ const hasValidType = types.length === 0 || filteredTypes.length;
186
+ const hasValidLanguage =
187
+ languages.length === 0 || filteredLanguages.length;
131
188
 
132
189
  if (title && link && hasValidType && hasValidLanguage) {
133
190
  results.push({
@@ -144,8 +201,8 @@ export async function searchAnime(
144
201
  return containers.length > 0;
145
202
  };
146
203
 
147
- if (page) {
148
- await fetchPage(page);
204
+ if (wantedPage) {
205
+ await fetchPage(wantedPage);
149
206
  } else {
150
207
  let currentPage = 1;
151
208
  while ((await fetchPage(currentPage++)) && results.length < limit) {
@@ -16,7 +16,26 @@ export class AnimeScraper {
16
16
  );
17
17
  }
18
18
  }
19
-
19
+ async createBrowser(query, ...rest) {
20
+ try {
21
+ return await this.source.createBrowser(query, ...rest);
22
+ } catch (error) {
23
+ console.error(
24
+ `This scraper does not have the createBrowser function implemented or an error happened -> ${error}`
25
+ );
26
+ return null;
27
+ }
28
+ }
29
+ async createSharedPage(query, ...rest) {
30
+ try {
31
+ return await this.source.createSharedPage(query, ...rest);
32
+ } catch (error) {
33
+ console.error(
34
+ `This scraper does not have the createSharedPage function implemented or an error happened -> ${error}`
35
+ );
36
+ return null;
37
+ }
38
+ }
20
39
  async searchAnime(query, ...rest) {
21
40
  try {
22
41
  return await this.source.searchAnime(query, ...rest);