better-ani-scraped 1.2.3 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/scrapers/animesama.js +32 -68
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "better-ani-scraped",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.5",
|
|
4
4
|
"description": "Scrape anime data from different sources (only anime-sama.fr for the moment)",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"ani-scraped": "^1.2.8",
|
|
21
21
|
"axios": "^1.8.4",
|
|
22
22
|
"cheerio": "^1.0.0",
|
|
23
|
-
"
|
|
23
|
+
"playwright": "^1.52.0"
|
|
24
24
|
},
|
|
25
25
|
"repository": {
|
|
26
26
|
"type": "git",
|
package/scrapers/animesama.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import axios from "axios";
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
3
|
import fs from "fs";
|
|
4
|
-
import puppeteer from'puppeteer';
|
|
5
4
|
|
|
6
5
|
const BASE_URL = "https://anime-sama.fr";
|
|
7
6
|
const CATALOGUE_URL = `${BASE_URL}/catalogue`;
|
|
@@ -124,84 +123,49 @@ export async function getSeasons(animeUrl, language = "vostfr") {
|
|
|
124
123
|
return seasons;
|
|
125
124
|
}
|
|
126
125
|
|
|
127
|
-
|
|
126
|
+
import { chromium } from 'playwright';
|
|
127
|
+
|
|
128
|
+
export async function getEpisodeTitles(animeUrl, options = {}) {
|
|
129
|
+
const { executablePath = null } = options; // <-- ajout ici
|
|
128
130
|
let browser;
|
|
131
|
+
|
|
129
132
|
try {
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
headless: true,
|
|
134
|
-
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
133
|
+
browser = await chromium.launch({
|
|
134
|
+
headless: true,
|
|
135
|
+
...(executablePath ? { executablePath } : {}) // <-- on injecte le chemin si dispo
|
|
135
136
|
});
|
|
136
|
-
console.log("✅ [1.1] Puppeteer lancé avec succès :", !!browser);
|
|
137
|
-
} catch (launchError) {
|
|
138
|
-
console.error("❌ [1.2] Échec du lancement de Puppeteer :", launchError);
|
|
139
|
-
return [];
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
console.log("🌐 [2] Création d'une nouvelle page...");
|
|
143
|
-
const page = await browser.newPage();
|
|
144
|
-
|
|
145
|
-
console.log("🔐 [3] Configuration des headers...");
|
|
146
|
-
await page.setExtraHTTPHeaders({
|
|
147
|
-
"User-Agent": "Mozilla/5.0",
|
|
148
|
-
"Accept-Language": "fr-FR,fr;q=0.9,en;q=0.8",
|
|
149
|
-
Referer: animeUrl,
|
|
150
|
-
});
|
|
151
|
-
|
|
152
|
-
console.log("🚫 [4] Blocage des ressources inutiles...");
|
|
153
|
-
await page.setRequestInterception(true);
|
|
154
|
-
page.on('request', (req) => {
|
|
155
|
-
const blocked = ['image', 'stylesheet', 'font', 'media'];
|
|
156
|
-
if (blocked.includes(req.resourceType())) {
|
|
157
|
-
req.abort();
|
|
158
|
-
} else {
|
|
159
|
-
req.continue();
|
|
160
|
-
}
|
|
161
|
-
});
|
|
162
137
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
console.log("⏳ [7] Attente du sélecteur #selectEpisodes...");
|
|
168
|
-
await page.waitForSelector('#selectEpisodes', { timeout: 10000 });
|
|
169
|
-
console.log("✅ [8] Sélecteur trouvé.");
|
|
170
|
-
|
|
171
|
-
const timestamp = Date.now();
|
|
172
|
-
const screenshotPath = path.resolve(`debug-${timestamp}.png`);
|
|
173
|
-
const htmlPath = path.resolve(`page-${timestamp}.html`);
|
|
174
|
-
|
|
175
|
-
console.log("📸 [9] Screenshot pour debug...");
|
|
176
|
-
await page.screenshot({ path: screenshotPath });
|
|
177
|
-
console.log("📷 Screenshot enregistré :", screenshotPath);
|
|
138
|
+
const context = await browser.newContext({
|
|
139
|
+
locale: 'fr-FR',
|
|
140
|
+
userAgent: 'Mozilla/5.0'
|
|
141
|
+
});
|
|
178
142
|
|
|
179
|
-
|
|
180
|
-
const html = await page.content();
|
|
181
|
-
fs.writeFileSync(htmlPath, html);
|
|
182
|
-
console.log("💾 HTML sauvegardé :", htmlPath);
|
|
143
|
+
const page = await context.newPage();
|
|
183
144
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
145
|
+
await page.route('**/*', (route) => {
|
|
146
|
+
const blocked = ['image', 'stylesheet', 'font', 'media'];
|
|
147
|
+
if (blocked.includes(route.request().resourceType())) {
|
|
148
|
+
route.abort();
|
|
149
|
+
} else {
|
|
150
|
+
route.continue();
|
|
151
|
+
}
|
|
152
|
+
});
|
|
187
153
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
options.map(o => o.textContent.trim())
|
|
191
|
-
);
|
|
154
|
+
await page.goto(animeUrl, { waitUntil: 'domcontentloaded', timeout: 20000 });
|
|
155
|
+
await page.waitForSelector('#selectEpisodes', { timeout: 10000 });
|
|
192
156
|
|
|
193
|
-
|
|
194
|
-
|
|
157
|
+
const titres = await page.$$eval('#selectEpisodes option', options =>
|
|
158
|
+
options.map(o => o.textContent.trim())
|
|
159
|
+
);
|
|
195
160
|
|
|
161
|
+
return titres;
|
|
196
162
|
} catch (error) {
|
|
197
|
-
|
|
198
|
-
|
|
163
|
+
console.error('❌ Erreur dans la récupération des titres :', error);
|
|
164
|
+
return [];
|
|
199
165
|
} finally {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
}
|
|
204
|
-
console.log("✅ [15] Fin de la fonction getEpisodeTitles.");
|
|
166
|
+
if (browser) {
|
|
167
|
+
await browser.close();
|
|
168
|
+
}
|
|
205
169
|
}
|
|
206
170
|
}
|
|
207
171
|
|