better-ani-scraped 1.7.4 → 1.7.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -2,9 +2,14 @@ import { AnimeScraper } from "../../index.js"; // REPLACE BY "from 'better-ani-s
|
|
|
2
2
|
|
|
3
3
|
const main = async () => {
|
|
4
4
|
const animesama = new AnimeScraper('animesama');
|
|
5
|
+
const browser = await animesama.createBrowser();
|
|
6
|
+
const sharedPage = await animesama.createSharedPage(browser);
|
|
5
7
|
|
|
6
|
-
const search = await animesama.searchAnime("gachiakuta", 100, [], ["Anime", "Film", "Autres", "Scans"]);
|
|
7
|
-
|
|
8
|
+
const search = await animesama.searchAnime("gachiakuta", 100, [], ["Anime", "Film", "Autres", "Scans"], sharedPage);
|
|
9
|
+
const search2 = await animesama.searchAnime("one piece", 100, [], ["Anime", "Film", "Autres", "Scans"], sharedPage);
|
|
10
|
+
console.log("Search \"gachiakuta\" Results:", search);
|
|
11
|
+
console.log("Search \"one piece\" Results:", search2);
|
|
12
|
+
await browser.close();
|
|
8
13
|
};
|
|
9
14
|
|
|
10
15
|
main().catch(console.error);
|
package/package.json
CHANGED
package/scrapers/animesama.js
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import axios from "axios";
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
3
|
import fs from "fs";
|
|
4
|
-
import path from
|
|
5
|
-
import
|
|
6
|
-
import {
|
|
7
|
-
import
|
|
8
|
-
|
|
9
|
-
|
|
4
|
+
import path from "path";
|
|
5
|
+
import util from "util";
|
|
6
|
+
import { exec as execCallback } from "child_process";
|
|
7
|
+
import puppeteerExtra from "puppeteer-extra";
|
|
8
|
+
import StealthPlugin from "puppeteer-extra-plugin-stealth";
|
|
9
|
+
const execAsync = util.promisify(execCallback);
|
|
10
|
+
|
|
11
|
+
puppeteerExtra.use(StealthPlugin());
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
const BASE_URL = "https://anime-sama.org";
|
|
12
15
|
const CATALOGUE_URL = `${BASE_URL}/catalogue`;
|
|
@@ -44,12 +47,41 @@ function getHeaders(referer = BASE_URL) {
|
|
|
44
47
|
};
|
|
45
48
|
}
|
|
46
49
|
|
|
50
|
+
export async function createBrowser(customChromiumPath) {
|
|
51
|
+
const executablePath = await ensureChromiumInstalled(customChromiumPath);
|
|
52
|
+
const browser = await puppeteerExtra.launch({
|
|
53
|
+
headless: true,
|
|
54
|
+
executablePath,
|
|
55
|
+
args: ["--no-sandbox", "--disable-setuid-sandbox"],
|
|
56
|
+
});
|
|
57
|
+
return browser;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export async function createSharedPage(browser) {
|
|
61
|
+
const page = await browser.newPage();
|
|
62
|
+
await page.setExtraHTTPHeaders(getHeaders());
|
|
63
|
+
await page.setUserAgent(
|
|
64
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
|
65
|
+
);
|
|
66
|
+
// block heavy resources but NOT scripts
|
|
67
|
+
await page.setRequestInterception(true);
|
|
68
|
+
page.on("request", (req) => {
|
|
69
|
+
const blocked = ["image", "stylesheet", "font", "media"]; // keep scripts
|
|
70
|
+
if (blocked.includes(req.resourceType())) req.abort();
|
|
71
|
+
else req.continue();
|
|
72
|
+
});
|
|
73
|
+
await page.evaluateOnNewDocument(() => {
|
|
74
|
+
Object.defineProperty(navigator, "webdriver", { get: () => false });
|
|
75
|
+
});
|
|
76
|
+
return page;
|
|
77
|
+
}
|
|
47
78
|
export async function searchAnime(
|
|
48
79
|
query,
|
|
49
80
|
limit = 10,
|
|
50
81
|
wantedLanguages = null,
|
|
51
82
|
wantedTypes = null,
|
|
52
|
-
page = null
|
|
83
|
+
page = null,
|
|
84
|
+
wantedPage = 1
|
|
53
85
|
) {
|
|
54
86
|
const languages = Array.isArray(wantedLanguages)
|
|
55
87
|
? wantedLanguages
|
|
@@ -62,6 +94,20 @@ export async function searchAnime(
|
|
|
62
94
|
|
|
63
95
|
const results = [];
|
|
64
96
|
|
|
97
|
+
const fetchHtml = async (url) => {
|
|
98
|
+
if (page) {
|
|
99
|
+
await page.goto("about:blank"); // Réinitialise la page
|
|
100
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
101
|
+
const html = await page
|
|
102
|
+
.goto(url, { waitUntil: "domcontentloaded" })
|
|
103
|
+
.then(() => page.content());
|
|
104
|
+
return html;
|
|
105
|
+
} else {
|
|
106
|
+
const res = await axios.get(url, { headers: getHeaders(CATALOGUE_URL) });
|
|
107
|
+
return res.data;
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
|
|
65
111
|
const fetchPage = async (pageNum) => {
|
|
66
112
|
const url =
|
|
67
113
|
pageNum === 1
|
|
@@ -70,9 +116,10 @@ export async function searchAnime(
|
|
|
70
116
|
query
|
|
71
117
|
)}&page=${pageNum}`;
|
|
72
118
|
|
|
73
|
-
const
|
|
74
|
-
const $ = cheerio.load(
|
|
119
|
+
const html = await fetchHtml(url);
|
|
120
|
+
const $ = cheerio.load(html);
|
|
75
121
|
const containers = $("div.catalog-card > a");
|
|
122
|
+
|
|
76
123
|
containers.each((_, el) => {
|
|
77
124
|
if (results.length >= limit) return false;
|
|
78
125
|
|
|
@@ -81,7 +128,11 @@ export async function searchAnime(
|
|
|
81
128
|
const title = anchor.find("h2").first().text().trim();
|
|
82
129
|
const altRaw = anchor.find("p").first().text().trim();
|
|
83
130
|
const cover = anchor.find("img").first().attr("src");
|
|
84
|
-
const synopsis = anchor
|
|
131
|
+
const synopsis = anchor
|
|
132
|
+
.find("div.synopsis-content")
|
|
133
|
+
.first()
|
|
134
|
+
.text()
|
|
135
|
+
.trim();
|
|
85
136
|
|
|
86
137
|
const typesRaw = anchor
|
|
87
138
|
.find(".info-row")
|
|
@@ -92,10 +143,12 @@ export async function searchAnime(
|
|
|
92
143
|
.toArray()
|
|
93
144
|
.map((el) => $(el).text().trim())
|
|
94
145
|
.filter((t) => isWanted(t, types));
|
|
95
|
-
const filteredTypes =
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
146
|
+
const filteredTypes =
|
|
147
|
+
Array.isArray(wantedTypes) && wantedTypes.length === 0
|
|
148
|
+
? typesRaw
|
|
149
|
+
: typesRaw;
|
|
150
|
+
const hasScans = filteredTypes.some((t) => t.toLowerCase() === "scans");
|
|
151
|
+
|
|
99
152
|
const languagesRaw = anchor
|
|
100
153
|
.find(".info-row")
|
|
101
154
|
.filter(
|
|
@@ -107,9 +160,12 @@ export async function searchAnime(
|
|
|
107
160
|
.filter((l) => isWanted(l, languages));
|
|
108
161
|
|
|
109
162
|
const filteredLanguages =
|
|
110
|
-
wantedLanguages.length === 0 ||
|
|
163
|
+
(Array.isArray(wantedLanguages) && wantedLanguages.length === 0) ||
|
|
164
|
+
hasScans
|
|
111
165
|
? languagesRaw
|
|
112
|
-
: languagesRaw.filter((lang) =>
|
|
166
|
+
: languagesRaw.filter((lang) =>
|
|
167
|
+
isWanted(lang, wantedLanguages || languages)
|
|
168
|
+
);
|
|
113
169
|
|
|
114
170
|
const altTitles = altRaw
|
|
115
171
|
? altRaw
|
|
@@ -126,8 +182,9 @@ export async function searchAnime(
|
|
|
126
182
|
.filter(Boolean)
|
|
127
183
|
: [];
|
|
128
184
|
|
|
129
|
-
const hasValidType = types.length === 0 || filteredTypes;
|
|
130
|
-
const hasValidLanguage =
|
|
185
|
+
const hasValidType = types.length === 0 || filteredTypes.length;
|
|
186
|
+
const hasValidLanguage =
|
|
187
|
+
languages.length === 0 || filteredLanguages.length;
|
|
131
188
|
|
|
132
189
|
if (title && link && hasValidType && hasValidLanguage) {
|
|
133
190
|
results.push({
|
|
@@ -144,8 +201,8 @@ export async function searchAnime(
|
|
|
144
201
|
return containers.length > 0;
|
|
145
202
|
};
|
|
146
203
|
|
|
147
|
-
if (
|
|
148
|
-
await fetchPage(
|
|
204
|
+
if (wantedPage) {
|
|
205
|
+
await fetchPage(wantedPage);
|
|
149
206
|
} else {
|
|
150
207
|
let currentPage = 1;
|
|
151
208
|
while ((await fetchPage(currentPage++)) && results.length < limit) {
|
package/scrapers/scrapers.js
CHANGED
|
@@ -16,7 +16,26 @@ export class AnimeScraper {
|
|
|
16
16
|
);
|
|
17
17
|
}
|
|
18
18
|
}
|
|
19
|
-
|
|
19
|
+
async createBrowser(query, ...rest) {
|
|
20
|
+
try {
|
|
21
|
+
return await this.source.createBrowser(query, ...rest);
|
|
22
|
+
} catch (error) {
|
|
23
|
+
console.error(
|
|
24
|
+
`This scraper does not have the createBrowser function implemented or an error happened -> ${error}`
|
|
25
|
+
);
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
async createSharedPage(query, ...rest) {
|
|
30
|
+
try {
|
|
31
|
+
return await this.source.createSharedPage(query, ...rest);
|
|
32
|
+
} catch (error) {
|
|
33
|
+
console.error(
|
|
34
|
+
`This scraper does not have the createSharedPage function implemented or an error happened -> ${error}`
|
|
35
|
+
);
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
20
39
|
async searchAnime(query, ...rest) {
|
|
21
40
|
try {
|
|
22
41
|
return await this.source.searchAnime(query, ...rest);
|