better-ani-scraped 1.2.3 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/scrapers/animesama.js +31 -76
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "better-ani-scraped",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.4",
|
|
4
4
|
"description": "Scrape anime data from different sources (only anime-sama.fr for the moment)",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"ani-scraped": "^1.2.8",
|
|
21
21
|
"axios": "^1.8.4",
|
|
22
22
|
"cheerio": "^1.0.0",
|
|
23
|
-
"
|
|
23
|
+
"playwright": "^1.52.0"
|
|
24
24
|
},
|
|
25
25
|
"repository": {
|
|
26
26
|
"type": "git",
|
package/scrapers/animesama.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import axios from "axios";
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
3
|
import fs from "fs";
|
|
4
|
-
import puppeteer from'puppeteer';
|
|
5
4
|
|
|
6
5
|
const BASE_URL = "https://anime-sama.fr";
|
|
7
6
|
const CATALOGUE_URL = `${BASE_URL}/catalogue`;
|
|
@@ -124,87 +123,43 @@ export async function getSeasons(animeUrl, language = "vostfr") {
|
|
|
124
123
|
return seasons;
|
|
125
124
|
}
|
|
126
125
|
|
|
126
|
+
import { chromium } from 'playwright';
|
|
127
|
+
|
|
127
128
|
export async function getEpisodeTitles(animeUrl) {
|
|
128
|
-
|
|
129
|
-
try {
|
|
129
|
+
let browser;
|
|
130
130
|
try {
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
console.log("🔐 [3] Configuration des headers...");
|
|
146
|
-
await page.setExtraHTTPHeaders({
|
|
147
|
-
"User-Agent": "Mozilla/5.0",
|
|
148
|
-
"Accept-Language": "fr-FR,fr;q=0.9,en;q=0.8",
|
|
149
|
-
Referer: animeUrl,
|
|
150
|
-
});
|
|
151
|
-
|
|
152
|
-
console.log("🚫 [4] Blocage des ressources inutiles...");
|
|
153
|
-
await page.setRequestInterception(true);
|
|
154
|
-
page.on('request', (req) => {
|
|
155
|
-
const blocked = ['image', 'stylesheet', 'font', 'media'];
|
|
156
|
-
if (blocked.includes(req.resourceType())) {
|
|
157
|
-
req.abort();
|
|
158
|
-
} else {
|
|
159
|
-
req.continue();
|
|
160
|
-
}
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
console.log(`🌍 [5] Chargement de la page : ${animeUrl}`);
|
|
164
|
-
await page.goto(animeUrl, { waitUntil: 'domcontentloaded', timeout: 20000 });
|
|
165
|
-
console.log("✅ [6] Page chargée.");
|
|
166
|
-
|
|
167
|
-
console.log("⏳ [7] Attente du sélecteur #selectEpisodes...");
|
|
168
|
-
await page.waitForSelector('#selectEpisodes', { timeout: 10000 });
|
|
169
|
-
console.log("✅ [8] Sélecteur trouvé.");
|
|
170
|
-
|
|
171
|
-
const timestamp = Date.now();
|
|
172
|
-
const screenshotPath = path.resolve(`debug-${timestamp}.png`);
|
|
173
|
-
const htmlPath = path.resolve(`page-${timestamp}.html`);
|
|
174
|
-
|
|
175
|
-
console.log("📸 [9] Screenshot pour debug...");
|
|
176
|
-
await page.screenshot({ path: screenshotPath });
|
|
177
|
-
console.log("📷 Screenshot enregistré :", screenshotPath);
|
|
178
|
-
|
|
179
|
-
console.log("📄 [10] Sauvegarde du HTML...");
|
|
180
|
-
const html = await page.content();
|
|
181
|
-
fs.writeFileSync(htmlPath, html);
|
|
182
|
-
console.log("💾 HTML sauvegardé :", htmlPath);
|
|
183
|
-
|
|
184
|
-
console.log("🔍 [11] Vérification existence du select...");
|
|
185
|
-
const selectExists = await page.$('#selectEpisodes') !== null;
|
|
186
|
-
console.log("#selectEpisodes existe :", selectExists);
|
|
187
|
-
|
|
188
|
-
console.log("🧠 [12] Récupération des titres d'épisodes...");
|
|
189
|
-
const titres = await page.$$eval('#selectEpisodes option', options =>
|
|
190
|
-
options.map(o => o.textContent.trim())
|
|
191
|
-
);
|
|
192
|
-
|
|
193
|
-
console.log("📋 [13] Titres récupérés :", titres);
|
|
194
|
-
return titres;
|
|
131
|
+
browser = await chromium.launch({ headless: true });
|
|
132
|
+
const context = await browser.newContext({
|
|
133
|
+
locale: 'fr-FR',
|
|
134
|
+
userAgent: 'Mozilla/5.0'
|
|
135
|
+
});
|
|
136
|
+
const page = await context.newPage();
|
|
137
|
+
await page.route('**/*', (route) => {
|
|
138
|
+
const blocked = ['image', 'stylesheet', 'font', 'media'];
|
|
139
|
+
if (blocked.includes(route.request().resourceType())) {
|
|
140
|
+
route.abort();
|
|
141
|
+
} else {
|
|
142
|
+
route.continue();
|
|
143
|
+
}
|
|
144
|
+
});
|
|
195
145
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
146
|
+
await page.goto(animeUrl, { waitUntil: 'domcontentloaded', timeout: 20000 });
|
|
147
|
+
await page.waitForSelector('#selectEpisodes', { timeout: 10000 });
|
|
148
|
+
const titres = await page.$$eval('#selectEpisodes option', options =>
|
|
149
|
+
options.map(o => o.textContent.trim())
|
|
150
|
+
);
|
|
151
|
+
return titres;
|
|
152
|
+
} catch (error) {
|
|
153
|
+
console.error('❌ Erreur dans la récupération des titres :', error);
|
|
154
|
+
return [];
|
|
155
|
+
} finally {
|
|
156
|
+
if (browser) {
|
|
157
|
+
await browser.close();
|
|
158
|
+
}
|
|
203
159
|
}
|
|
204
|
-
console.log("✅ [15] Fin de la fonction getEpisodeTitles.");
|
|
205
|
-
}
|
|
206
160
|
}
|
|
207
161
|
|
|
162
|
+
|
|
208
163
|
export async function getEmbed(animeUrl, hostPriority = ["vidmoly"]) {
|
|
209
164
|
const res = await axios.get(animeUrl, {
|
|
210
165
|
headers: getHeaders(animeUrl.split("/").slice(0, 5).join("/")),
|