@kadaliao/geektime-downloader 1.0.4 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/README.md +43 -6
- package/download.js +1127 -36
- package/package.json +5 -2
package/download.js
CHANGED
|
@@ -9,9 +9,11 @@ import path from 'path';
|
|
|
9
9
|
import { fileURLToPath } from 'url';
|
|
10
10
|
import * as pdfLib from 'pdf-lib';
|
|
11
11
|
import { outlinePdfFactory } from '@lillallol/outline-pdf';
|
|
12
|
+
import epubGenMemory from 'epub-gen-memory';
|
|
12
13
|
|
|
13
14
|
const { PDFDocument } = pdfLib;
|
|
14
15
|
const outlinePdf = outlinePdfFactory(pdfLib);
|
|
16
|
+
const epub = epubGenMemory.default || epubGenMemory;
|
|
15
17
|
|
|
16
18
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
17
19
|
|
|
@@ -197,6 +199,21 @@ const PRINT_FIX_CSS = `
|
|
|
197
199
|
overflow: visible !important;
|
|
198
200
|
max-width: 100% !important;
|
|
199
201
|
box-sizing: border-box !important;
|
|
202
|
+
/* 由于关闭了printBackground,用边框区分代码块 */
|
|
203
|
+
border: 1px solid #ddd !important;
|
|
204
|
+
padding: 10px !important;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/* 内联代码样式 */
|
|
208
|
+
code {
|
|
209
|
+
padding: 2px 6px !important;
|
|
210
|
+
border-radius: 3px !important;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/* 代码块容器样式 */
|
|
214
|
+
pre {
|
|
215
|
+
border-radius: 5px !important;
|
|
216
|
+
padding: 15px !important;
|
|
200
217
|
}
|
|
201
218
|
|
|
202
219
|
/* 确保图片适应页面且不溢出 */
|
|
@@ -238,7 +255,91 @@ function parseCookies(cookieString) {
|
|
|
238
255
|
}
|
|
239
256
|
|
|
240
257
|
// 获取专栏所有文章列表(通过API)
|
|
241
|
-
|
|
258
|
+
function getValueByPath(obj, path) {
|
|
259
|
+
if (!obj || !path) return undefined;
|
|
260
|
+
return path.split('.').reduce((acc, key) => {
|
|
261
|
+
if (acc && Object.prototype.hasOwnProperty.call(acc, key)) {
|
|
262
|
+
return acc[key];
|
|
263
|
+
}
|
|
264
|
+
return undefined;
|
|
265
|
+
}, obj);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
function collectAuthorNamesFromData(data) {
|
|
269
|
+
if (!data || typeof data !== 'object') {
|
|
270
|
+
return [];
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const candidates = new Set();
|
|
274
|
+
|
|
275
|
+
const pushCandidate = (value) => {
|
|
276
|
+
if (!value) return;
|
|
277
|
+
if (Array.isArray(value)) {
|
|
278
|
+
value.forEach(pushCandidate);
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
if (typeof value === 'object') {
|
|
282
|
+
const possibleKeys = ['name', 'nickname', 'author_name', 'teacher_name', 'lecturer_name'];
|
|
283
|
+
possibleKeys.forEach(key => {
|
|
284
|
+
if (value[key]) {
|
|
285
|
+
pushCandidate(value[key]);
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
// 遍历其他 name 相关字段
|
|
289
|
+
Object.keys(value).forEach(key => {
|
|
290
|
+
if (typeof value[key] === 'string' && key.toLowerCase().includes('name')) {
|
|
291
|
+
pushCandidate(value[key]);
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
const text = String(value).trim();
|
|
297
|
+
if (text) {
|
|
298
|
+
candidates.add(text);
|
|
299
|
+
}
|
|
300
|
+
};
|
|
301
|
+
|
|
302
|
+
const fieldPaths = [
|
|
303
|
+
'author', 'author_name', 'authorName',
|
|
304
|
+
'teachers', 'teacher', 'teacher_name', 'teacherName', 'teacher_info', 'teacherInfo',
|
|
305
|
+
'lecturer', 'lecturer_name', 'lecturerName', 'lecturers',
|
|
306
|
+
'authors', 'column_author', 'columnAuthor', 'column_author_name',
|
|
307
|
+
'column_teacher', 'columnTeacher', 'product_author', 'productAuthor',
|
|
308
|
+
'product_teacher', 'productTeacher',
|
|
309
|
+
'owner', 'owner_name', 'speaker', 'speaker_name',
|
|
310
|
+
'contributors', 'writer', 'writers', 'author_list', 'authorList'
|
|
311
|
+
];
|
|
312
|
+
|
|
313
|
+
fieldPaths.forEach(path => {
|
|
314
|
+
const value = getValueByPath(data, path);
|
|
315
|
+
pushCandidate(value);
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
return Array.from(candidates);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
function extractColumnAuthor(columnInfoData, articlesData) {
|
|
322
|
+
const sources = [];
|
|
323
|
+
if (columnInfoData && columnInfoData.data) {
|
|
324
|
+
sources.push(columnInfoData.data);
|
|
325
|
+
}
|
|
326
|
+
if (articlesData && articlesData.data) {
|
|
327
|
+
sources.push(articlesData.data);
|
|
328
|
+
}
|
|
329
|
+
if (articlesData && articlesData.data && Array.isArray(articlesData.data.list) && articlesData.data.list.length > 0) {
|
|
330
|
+
sources.push(articlesData.data.list[0]);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
for (const data of sources) {
|
|
334
|
+
const names = collectAuthorNamesFromData(data);
|
|
335
|
+
if (names.length > 0) {
|
|
336
|
+
return names.join(', ');
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
return '';
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
async function getArticleList(page, columnUrl, timeout = 60000) {
|
|
242
343
|
const spinner = ora('正在获取专栏信息...').start();
|
|
243
344
|
|
|
244
345
|
// 从 URL 提取专栏 ID
|
|
@@ -298,7 +399,7 @@ async function getArticleList(page, columnUrl) {
|
|
|
298
399
|
try {
|
|
299
400
|
// 先设置监听器,再访问页面
|
|
300
401
|
spinner.text = '正在加载页面...';
|
|
301
|
-
await page.goto(columnUrl, { waitUntil: 'networkidle', timeout
|
|
402
|
+
await page.goto(columnUrl, { waitUntil: 'networkidle', timeout });
|
|
302
403
|
|
|
303
404
|
spinner.text = '正在获取文章列表...';
|
|
304
405
|
|
|
@@ -349,6 +450,25 @@ async function getArticleList(page, columnUrl) {
|
|
|
349
450
|
|
|
350
451
|
if (!articlesData || !articlesData.data || !articlesData.data.list) {
|
|
351
452
|
spinner.fail('API响应数据格式错误');
|
|
453
|
+
|
|
454
|
+
// 智能判断可能的原因
|
|
455
|
+
if (!articlesData) {
|
|
456
|
+
console.log(chalk.yellow('\n⚠️ 未能获取到文章列表数据\n'));
|
|
457
|
+
console.log(chalk.cyan('可能的原因:'));
|
|
458
|
+
console.log(chalk.gray(' 1. Cookie 已过期或无效 - 请重新获取 Cookie'));
|
|
459
|
+
console.log(chalk.gray(' 2. 网络连接问题 - 请检查网络'));
|
|
460
|
+
console.log(chalk.gray(' 3. 专栏 ID 不正确 - 请检查 URL\n'));
|
|
461
|
+
} else if (articlesData.code === -3000 || articlesData.code === -3001) {
|
|
462
|
+
console.log(chalk.red('\n❌ Cookie 已失效\n'));
|
|
463
|
+
console.log(chalk.cyan('📖 请重新获取 Cookie:'));
|
|
464
|
+
console.log(chalk.gray(' 1. 浏览器登录极客时间'));
|
|
465
|
+
console.log(chalk.gray(' 2. 按 F12 打开开发者工具'));
|
|
466
|
+
console.log(chalk.gray(' 3. Network 标签 → 刷新页面'));
|
|
467
|
+
console.log(chalk.gray(' 4. 点击任意请求 → 复制 Cookie\n'));
|
|
468
|
+
} else if (articlesData.error) {
|
|
469
|
+
console.log(chalk.yellow(`\n⚠️ API 返回错误: ${articlesData.error.msg || articlesData.error}\n`));
|
|
470
|
+
}
|
|
471
|
+
|
|
352
472
|
return { articles: [], columnTitle: 'unknown' };
|
|
353
473
|
}
|
|
354
474
|
|
|
@@ -460,6 +580,8 @@ async function getArticleList(page, columnUrl) {
|
|
|
460
580
|
console.log(chalk.gray(` 提取的专栏名: ${columnTitle}\n`));
|
|
461
581
|
}
|
|
462
582
|
|
|
583
|
+
const columnAuthor = extractColumnAuthor(columnInfoData, articlesData) || '极客时间';
|
|
584
|
+
|
|
463
585
|
// 解析文章列表
|
|
464
586
|
const rawArticles = articlesData.data.list;
|
|
465
587
|
|
|
@@ -486,11 +608,11 @@ async function getArticleList(page, columnUrl) {
|
|
|
486
608
|
});
|
|
487
609
|
|
|
488
610
|
spinner.succeed(`找到 ${chalk.green(articles.length)} 篇文章 - ${columnTitle}`);
|
|
489
|
-
return { articles, columnTitle };
|
|
611
|
+
return { articles, columnTitle, columnAuthor };
|
|
490
612
|
}
|
|
491
613
|
|
|
492
614
|
// 并发下载控制器
|
|
493
|
-
async function downloadWithConcurrency(context, articles, outputDir, concurrency = 5, delay = 2000) {
|
|
615
|
+
async function downloadWithConcurrency(context, articles, outputDir, concurrency = 5, delay = 2000, timeout = 60000) {
|
|
494
616
|
const results = [];
|
|
495
617
|
const total = articles.length;
|
|
496
618
|
let completed = 0;
|
|
@@ -505,6 +627,12 @@ async function downloadWithConcurrency(context, articles, outputDir, concurrency
|
|
|
505
627
|
}
|
|
506
628
|
const pages = await Promise.all(pool);
|
|
507
629
|
|
|
630
|
+
// 为每个页面设置默认超时
|
|
631
|
+
pages.forEach(page => {
|
|
632
|
+
page.setDefaultTimeout(timeout);
|
|
633
|
+
page.setDefaultNavigationTimeout(timeout);
|
|
634
|
+
});
|
|
635
|
+
|
|
508
636
|
// 处理队列
|
|
509
637
|
let currentIndex = 0;
|
|
510
638
|
|
|
@@ -680,7 +808,66 @@ async function downloadArticleSilent(page, article, outputDir, index, total) {
|
|
|
680
808
|
}, article.originalTitle || article.title);
|
|
681
809
|
|
|
682
810
|
// 等待文章内容加载
|
|
683
|
-
await page.waitForSelector('.Index_articleContent_QBG5G, .content'
|
|
811
|
+
await page.waitForSelector('.Index_articleContent_QBG5G, .content');
|
|
812
|
+
|
|
813
|
+
// 优化图片大小:将大图片转换为合适的尺寸,减小PDF体积
|
|
814
|
+
await page.evaluate(() => {
|
|
815
|
+
const images = document.querySelectorAll('img');
|
|
816
|
+
const promises = Array.from(images).map(img => {
|
|
817
|
+
return new Promise((resolve) => {
|
|
818
|
+
// 如果图片还未加载完成,等待加载
|
|
819
|
+
if (!img.complete) {
|
|
820
|
+
img.onload = () => processImage(img, resolve);
|
|
821
|
+
img.onerror = () => resolve(); // 图片加载失败,跳过
|
|
822
|
+
} else {
|
|
823
|
+
processImage(img, resolve);
|
|
824
|
+
}
|
|
825
|
+
});
|
|
826
|
+
});
|
|
827
|
+
|
|
828
|
+
function processImage(img, resolve) {
|
|
829
|
+
try {
|
|
830
|
+
const maxWidth = 800; // 最大宽度
|
|
831
|
+
const quality = 0.7; // JPEG质量(0-1)
|
|
832
|
+
|
|
833
|
+
// 只处理较大的图片
|
|
834
|
+
if (img.naturalWidth <= maxWidth) {
|
|
835
|
+
resolve();
|
|
836
|
+
return;
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
// 创建canvas压缩图片
|
|
840
|
+
const canvas = document.createElement('canvas');
|
|
841
|
+
const ctx = canvas.getContext('2d');
|
|
842
|
+
|
|
843
|
+
const ratio = maxWidth / img.naturalWidth;
|
|
844
|
+
canvas.width = maxWidth;
|
|
845
|
+
canvas.height = img.naturalHeight * ratio;
|
|
846
|
+
|
|
847
|
+
// 绘制图片
|
|
848
|
+
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
|
849
|
+
|
|
850
|
+
// 转换为压缩后的data URL
|
|
851
|
+
canvas.toBlob((blob) => {
|
|
852
|
+
const url = URL.createObjectURL(blob);
|
|
853
|
+
img.src = url;
|
|
854
|
+
img.style.width = maxWidth + 'px';
|
|
855
|
+
img.style.height = 'auto';
|
|
856
|
+
resolve();
|
|
857
|
+
}, 'image/jpeg', quality);
|
|
858
|
+
} catch (e) {
|
|
859
|
+
// 如果压缩失败,至少限制大小
|
|
860
|
+
img.style.maxWidth = '800px';
|
|
861
|
+
img.style.height = 'auto';
|
|
862
|
+
resolve();
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
return Promise.all(promises);
|
|
867
|
+
});
|
|
868
|
+
|
|
869
|
+
// 等待图片处理完成
|
|
870
|
+
await page.waitForTimeout(1000);
|
|
684
871
|
|
|
685
872
|
// 生成 PDF
|
|
686
873
|
const filename = `${String(index).padStart(3, '0')}_${article.title}.pdf`;
|
|
@@ -695,7 +882,8 @@ async function downloadArticleSilent(page, article, outputDir, index, total) {
|
|
|
695
882
|
bottom: '20mm',
|
|
696
883
|
left: '15mm'
|
|
697
884
|
},
|
|
698
|
-
printBackground:
|
|
885
|
+
printBackground: false, // 关闭背景打印,显著减小文件大小
|
|
886
|
+
preferCSSPageSize: false
|
|
699
887
|
});
|
|
700
888
|
|
|
701
889
|
return { success: true, title: article.title };
|
|
@@ -819,7 +1007,66 @@ async function downloadArticle(page, article, outputDir, index, total) {
|
|
|
819
1007
|
}, article.originalTitle || article.title);
|
|
820
1008
|
|
|
821
1009
|
// 等待文章内容加载
|
|
822
|
-
await page.waitForSelector('.Index_articleContent_QBG5G, .content'
|
|
1010
|
+
await page.waitForSelector('.Index_articleContent_QBG5G, .content');
|
|
1011
|
+
|
|
1012
|
+
// 优化图片大小:将大图片转换为合适的尺寸,减小PDF体积
|
|
1013
|
+
await page.evaluate(() => {
|
|
1014
|
+
const images = document.querySelectorAll('img');
|
|
1015
|
+
const promises = Array.from(images).map(img => {
|
|
1016
|
+
return new Promise((resolve) => {
|
|
1017
|
+
// 如果图片还未加载完成,等待加载
|
|
1018
|
+
if (!img.complete) {
|
|
1019
|
+
img.onload = () => processImage(img, resolve);
|
|
1020
|
+
img.onerror = () => resolve(); // 图片加载失败,跳过
|
|
1021
|
+
} else {
|
|
1022
|
+
processImage(img, resolve);
|
|
1023
|
+
}
|
|
1024
|
+
});
|
|
1025
|
+
});
|
|
1026
|
+
|
|
1027
|
+
function processImage(img, resolve) {
|
|
1028
|
+
try {
|
|
1029
|
+
const maxWidth = 800; // 最大宽度
|
|
1030
|
+
const quality = 0.7; // JPEG质量(0-1)
|
|
1031
|
+
|
|
1032
|
+
// 只处理较大的图片
|
|
1033
|
+
if (img.naturalWidth <= maxWidth) {
|
|
1034
|
+
resolve();
|
|
1035
|
+
return;
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
// 创建canvas压缩图片
|
|
1039
|
+
const canvas = document.createElement('canvas');
|
|
1040
|
+
const ctx = canvas.getContext('2d');
|
|
1041
|
+
|
|
1042
|
+
const ratio = maxWidth / img.naturalWidth;
|
|
1043
|
+
canvas.width = maxWidth;
|
|
1044
|
+
canvas.height = img.naturalHeight * ratio;
|
|
1045
|
+
|
|
1046
|
+
// 绘制图片
|
|
1047
|
+
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
|
|
1048
|
+
|
|
1049
|
+
// 转换为压缩后的data URL
|
|
1050
|
+
canvas.toBlob((blob) => {
|
|
1051
|
+
const url = URL.createObjectURL(blob);
|
|
1052
|
+
img.src = url;
|
|
1053
|
+
img.style.width = maxWidth + 'px';
|
|
1054
|
+
img.style.height = 'auto';
|
|
1055
|
+
resolve();
|
|
1056
|
+
}, 'image/jpeg', quality);
|
|
1057
|
+
} catch (e) {
|
|
1058
|
+
// 如果压缩失败,至少限制大小
|
|
1059
|
+
img.style.maxWidth = '800px';
|
|
1060
|
+
img.style.height = 'auto';
|
|
1061
|
+
resolve();
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
return Promise.all(promises);
|
|
1066
|
+
});
|
|
1067
|
+
|
|
1068
|
+
// 等待图片处理完成
|
|
1069
|
+
await page.waitForTimeout(1000);
|
|
823
1070
|
|
|
824
1071
|
// 生成 PDF
|
|
825
1072
|
const filename = `${String(index).padStart(3, '0')}_${article.title}.pdf`;
|
|
@@ -834,7 +1081,8 @@ async function downloadArticle(page, article, outputDir, index, total) {
|
|
|
834
1081
|
bottom: '20mm',
|
|
835
1082
|
left: '15mm'
|
|
836
1083
|
},
|
|
837
|
-
printBackground:
|
|
1084
|
+
printBackground: false, // 关闭背景打印,显著减小文件大小
|
|
1085
|
+
preferCSSPageSize: false
|
|
838
1086
|
});
|
|
839
1087
|
|
|
840
1088
|
spinner.succeed(`[${index}/${total}] ${chalk.green('✓')} ${article.title}`);
|
|
@@ -955,6 +1203,749 @@ async function mergePDFs(outputDir, columnTitle, articles, deleteAfterMerge = fa
|
|
|
955
1203
|
}
|
|
956
1204
|
}
|
|
957
1205
|
|
|
1206
|
+
// 提取单篇文章的 HTML 内容(用于 EPUB 生成)
|
|
1207
|
+
async function extractArticleContent(page, article, index, total) {
|
|
1208
|
+
try {
|
|
1209
|
+
// 访问文章页面
|
|
1210
|
+
await page.goto(article.url, { waitUntil: 'networkidle' });
|
|
1211
|
+
|
|
1212
|
+
// 等待文章内容加载
|
|
1213
|
+
await page.waitForSelector('.Index_articleContent_QBG5G, .content', { timeout: 60000 });
|
|
1214
|
+
|
|
1215
|
+
// 关键:等待文章完整内容加载,而不是试看内容
|
|
1216
|
+
// 滚动页面以触发懒加载内容
|
|
1217
|
+
await page.evaluate(async () => {
|
|
1218
|
+
await new Promise((resolve) => {
|
|
1219
|
+
let totalHeight = 0;
|
|
1220
|
+
const distance = 100;
|
|
1221
|
+
const timer = setInterval(() => {
|
|
1222
|
+
const scrollHeight = document.body.scrollHeight;
|
|
1223
|
+
window.scrollBy(0, distance);
|
|
1224
|
+
totalHeight += distance;
|
|
1225
|
+
|
|
1226
|
+
if (totalHeight >= scrollHeight) {
|
|
1227
|
+
clearInterval(timer);
|
|
1228
|
+
resolve();
|
|
1229
|
+
}
|
|
1230
|
+
}, 100);
|
|
1231
|
+
});
|
|
1232
|
+
});
|
|
1233
|
+
|
|
1234
|
+
// 再等待一段时间,确保内容完全加载
|
|
1235
|
+
await page.waitForTimeout(3000);
|
|
1236
|
+
|
|
1237
|
+
// 提取文章 HTML 内容
|
|
1238
|
+
const content = await page.evaluate(() => {
|
|
1239
|
+
// 找到文章正文内容
|
|
1240
|
+
const articleContent = document.querySelector('.Index_articleContent_QBG5G, .article-content, article, [class*="articleContent"]');
|
|
1241
|
+
|
|
1242
|
+
if (!articleContent) {
|
|
1243
|
+
return null;
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
// 克隆正文以避免修改原始DOM
|
|
1247
|
+
const contentClone = articleContent.cloneNode(true);
|
|
1248
|
+
|
|
1249
|
+
// 白名单策略:只保留正文核心元素
|
|
1250
|
+
// 允许的元素标签
|
|
1251
|
+
const allowedTags = new Set([
|
|
1252
|
+
'P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', // 段落和标题
|
|
1253
|
+
'UL', 'OL', 'LI', // 列表
|
|
1254
|
+
'BLOCKQUOTE', // 引用
|
|
1255
|
+
'PRE', 'CODE', // 代码
|
|
1256
|
+
'IMG', // 图片
|
|
1257
|
+
'TABLE', 'THEAD', 'TBODY', 'TR', 'TH', 'TD', // 表格
|
|
1258
|
+
'A', // 链接
|
|
1259
|
+
'STRONG', 'B', 'EM', 'I', 'U', // 强调和样式
|
|
1260
|
+
'BR', 'HR', // 换行和分隔线
|
|
1261
|
+
'FIGURE', 'FIGCAPTION', 'DETAILS', 'SUMMARY',
|
|
1262
|
+
'SPAN', 'DIV', 'SECTION', 'ARTICLE' // 容器(可能包含文本)
|
|
1263
|
+
]);
|
|
1264
|
+
|
|
1265
|
+
// 在清理前,移除常见的非正文区域
|
|
1266
|
+
const removalSelectors = [
|
|
1267
|
+
'nav', 'header', 'footer', 'aside',
|
|
1268
|
+
'.comment', '.comments', '.Index_comment',
|
|
1269
|
+
'.recommend', '.recommendation', '.related', '.advertisement', '.ad', '.banner',
|
|
1270
|
+
'.subscribe', '.subscription', '.toolbar', '.Index_shareIcons_1vtJa',
|
|
1271
|
+
'.keyboard-wrapper', '.app-download', '.article-actions', '.article-bottom',
|
|
1272
|
+
'.note', '.notes', '.annotation', '.translation', '.trans', '.translator',
|
|
1273
|
+
'.audio', '.audio-player', '.voice', '.player', '.geek-player', '.podcast', '.radio',
|
|
1274
|
+
'.reward', '.appreciate', '.appreciation', '.donate', '.sponsor', '.thanks', '.support',
|
|
1275
|
+
'.qrcode', '.qr-code', '.qr', '.promotion', '.promo', '.ad-banner',
|
|
1276
|
+
'.copyright', '.statement', '.disclaimer',
|
|
1277
|
+
'.app-download-banner', '.article-plugin', '.article-notification', '.float-bar',
|
|
1278
|
+
'audio', 'video',
|
|
1279
|
+
'[class*="Note"]', '[class*="note"]', '[class*="Translation"]', '[class*="translation"]',
|
|
1280
|
+
'[class*="Audio"]', '[class*="audio"]', '[class*="Reward"]', '[class*="reward"]',
|
|
1281
|
+
'[data-plugin]', '[data-track]', '[data-track-section]', '[data-translation]', '[data-audio]',
|
|
1282
|
+
'[data-role="toolbar"]',
|
|
1283
|
+
'button', 'iframe', 'script', 'style'
|
|
1284
|
+
];
|
|
1285
|
+
removalSelectors.forEach(selector => {
|
|
1286
|
+
contentClone.querySelectorAll(selector).forEach(el => el.remove());
|
|
1287
|
+
});
|
|
1288
|
+
|
|
1289
|
+
// 根据关键词进一步移除插件类元素
|
|
1290
|
+
const pluginKeywords = [
|
|
1291
|
+
'note', 'translation', 'audio', 'player', 'reward', 'donate',
|
|
1292
|
+
'appreciation', 'sponsor', 'qrcode', 'toolbar', 'plugin',
|
|
1293
|
+
'copyright', 'geeknote', 'bilingual'
|
|
1294
|
+
];
|
|
1295
|
+
const pluginElements = Array.from(contentClone.querySelectorAll('*')).filter(el => {
|
|
1296
|
+
const className = (el.className || '').toString().toLowerCase();
|
|
1297
|
+
const idValue = (el.id || '').toString().toLowerCase();
|
|
1298
|
+
const roleValue = (el.getAttribute && el.getAttribute('role')) ? el.getAttribute('role').toLowerCase() : '';
|
|
1299
|
+
const datasetValues = el.dataset ? Object.values(el.dataset).join(' ').toLowerCase() : '';
|
|
1300
|
+
const combined = `${className} ${idValue} ${roleValue} ${datasetValues}`;
|
|
1301
|
+
return pluginKeywords.some(keyword => combined.includes(keyword));
|
|
1302
|
+
});
|
|
1303
|
+
pluginElements.forEach(el => el.remove());
|
|
1304
|
+
|
|
1305
|
+
// 移除 MindMap 等 SVG/Canvas 思维导图内容(阅读器无法正确渲染)
|
|
1306
|
+
const mindmapSelectors = [
|
|
1307
|
+
'.mindmap', '.mind-map', '.MindMap', '.Mind-map',
|
|
1308
|
+
'[data-type="mindmap"]', '[data-role="mindmap"]', '[data-widget="mindmap"]',
|
|
1309
|
+
'[class*="MindMap"]', '[class*="mindMap"]'
|
|
1310
|
+
];
|
|
1311
|
+
mindmapSelectors.forEach(selector => {
|
|
1312
|
+
contentClone.querySelectorAll(selector).forEach(el => el.remove());
|
|
1313
|
+
});
|
|
1314
|
+
const vectorCandidates = Array.from(contentClone.querySelectorAll('svg, canvas, object, embed'));
|
|
1315
|
+
vectorCandidates.forEach(el => {
|
|
1316
|
+
const className = typeof el.className === 'object' ? el.className.baseVal : (el.className || '');
|
|
1317
|
+
const meta = `${className} ${el.id || ''} ${el.getAttribute('data-type') || ''}`.toLowerCase();
|
|
1318
|
+
if (meta.includes('mind') || meta.includes('mindmap') || meta.includes('mind-map')) {
|
|
1319
|
+
el.remove();
|
|
1320
|
+
}
|
|
1321
|
+
});
|
|
1322
|
+
|
|
1323
|
+
// 将富文本中的代码块结构转换为标准 <pre><code>
|
|
1324
|
+
const blockSeparatorTags = new Set([
|
|
1325
|
+
'P','DIV','SECTION','ARTICLE','UL','OL','LI','FIGURE','FIGCAPTION',
|
|
1326
|
+
'TABLE','THEAD','TBODY','TR','TD'
|
|
1327
|
+
]);
|
|
1328
|
+
|
|
1329
|
+
function collectCodeText(node) {
|
|
1330
|
+
const parts = [];
|
|
1331
|
+
|
|
1332
|
+
const ensureNewline = () => {
|
|
1333
|
+
if (!parts.length) {
|
|
1334
|
+
parts.push('\n');
|
|
1335
|
+
return;
|
|
1336
|
+
}
|
|
1337
|
+
if (!parts[parts.length - 1].endsWith('\n')) {
|
|
1338
|
+
parts.push('\n');
|
|
1339
|
+
}
|
|
1340
|
+
};
|
|
1341
|
+
|
|
1342
|
+
const traverse = (current) => {
|
|
1343
|
+
if (!current) {
|
|
1344
|
+
return;
|
|
1345
|
+
}
|
|
1346
|
+
if (current.nodeType === Node.TEXT_NODE) {
|
|
1347
|
+
const textValue = current.textContent.replace(/\u00A0/g, ' ');
|
|
1348
|
+
if (textValue) {
|
|
1349
|
+
parts.push(textValue);
|
|
1350
|
+
}
|
|
1351
|
+
return;
|
|
1352
|
+
}
|
|
1353
|
+
if (current.nodeType !== Node.ELEMENT_NODE) {
|
|
1354
|
+
return;
|
|
1355
|
+
}
|
|
1356
|
+
const tag = current.tagName.toUpperCase();
|
|
1357
|
+
if (tag === 'BR') {
|
|
1358
|
+
ensureNewline();
|
|
1359
|
+
return;
|
|
1360
|
+
}
|
|
1361
|
+
Array.from(current.childNodes).forEach(traverse);
|
|
1362
|
+
if (blockSeparatorTags.has(tag)) {
|
|
1363
|
+
ensureNewline();
|
|
1364
|
+
}
|
|
1365
|
+
};
|
|
1366
|
+
|
|
1367
|
+
traverse(node);
|
|
1368
|
+
let text = parts.join('');
|
|
1369
|
+
text = text
|
|
1370
|
+
.replace(/\r\n/g, '\n')
|
|
1371
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
1372
|
+
.replace(/[ \t]+\n/g, '\n')
|
|
1373
|
+
.replace(/\n+$/g, '\n');
|
|
1374
|
+
return text.trim() ? text : '';
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
const codeLikeSelectors = [
|
|
1378
|
+
'[data-slate-type="code"]',
|
|
1379
|
+
'[data-slate-node="code"]',
|
|
1380
|
+
'[data-code-block]',
|
|
1381
|
+
'[data-code]',
|
|
1382
|
+
'[data-code-language]',
|
|
1383
|
+
'[class*="code-block"]',
|
|
1384
|
+
'[class*="CodeBlock"]'
|
|
1385
|
+
];
|
|
1386
|
+
const codeCandidates = new Set();
|
|
1387
|
+
codeLikeSelectors.forEach(selector => {
|
|
1388
|
+
contentClone.querySelectorAll(selector).forEach(el => codeCandidates.add(el));
|
|
1389
|
+
});
|
|
1390
|
+
const replaceWithPre = (element) => {
|
|
1391
|
+
if (!element || !element.parentNode) {
|
|
1392
|
+
return;
|
|
1393
|
+
}
|
|
1394
|
+
const codeText = collectCodeText(element);
|
|
1395
|
+
if (!codeText) {
|
|
1396
|
+
element.remove();
|
|
1397
|
+
return;
|
|
1398
|
+
}
|
|
1399
|
+
const pre = document.createElement('pre');
|
|
1400
|
+
const code = document.createElement('code');
|
|
1401
|
+
code.textContent = codeText;
|
|
1402
|
+
pre.appendChild(code);
|
|
1403
|
+
element.parentNode.replaceChild(pre, element);
|
|
1404
|
+
};
|
|
1405
|
+
codeCandidates.forEach(el => {
|
|
1406
|
+
if (el.tagName && el.tagName.toUpperCase() === 'PRE') {
|
|
1407
|
+
return;
|
|
1408
|
+
}
|
|
1409
|
+
replaceWithPre(el);
|
|
1410
|
+
});
|
|
1411
|
+
|
|
1412
|
+
const multilineInlineCodes = Array.from(contentClone.querySelectorAll('code')).filter(codeEl => {
|
|
1413
|
+
const parent = codeEl.parentElement;
|
|
1414
|
+
return parent && parent.tagName.toUpperCase() !== 'PRE' && codeEl.textContent.includes('\n');
|
|
1415
|
+
});
|
|
1416
|
+
multilineInlineCodes.forEach(codeEl => {
|
|
1417
|
+
const codeText = collectCodeText(codeEl);
|
|
1418
|
+
if (!codeText) {
|
|
1419
|
+
codeEl.remove();
|
|
1420
|
+
return;
|
|
1421
|
+
}
|
|
1422
|
+
const pre = document.createElement('pre');
|
|
1423
|
+
const innerCode = document.createElement('code');
|
|
1424
|
+
innerCode.textContent = codeText;
|
|
1425
|
+
pre.appendChild(innerCode);
|
|
1426
|
+
codeEl.parentNode.replaceChild(pre, codeEl);
|
|
1427
|
+
});
|
|
1428
|
+
|
|
1429
|
+
// 递归清理函数:移除不在白名单中的元素
|
|
1430
|
+
function cleanElement(element) {
|
|
1431
|
+
const children = Array.from(element.childNodes);
|
|
1432
|
+
|
|
1433
|
+
for (const child of children) {
|
|
1434
|
+
if (child.nodeType === Node.ELEMENT_NODE) {
|
|
1435
|
+
const tagName = child.tagName.toUpperCase();
|
|
1436
|
+
|
|
1437
|
+
if (!allowedTags.has(tagName)) {
|
|
1438
|
+
// 先递归处理子节点
|
|
1439
|
+
cleanElement(child);
|
|
1440
|
+
|
|
1441
|
+
if (child.childNodes.length > 0) {
|
|
1442
|
+
while (child.firstChild) {
|
|
1443
|
+
element.insertBefore(child.firstChild, child);
|
|
1444
|
+
}
|
|
1445
|
+
child.remove();
|
|
1446
|
+
} else {
|
|
1447
|
+
const textContent = (child.textContent || '').trim();
|
|
1448
|
+
if (textContent) {
|
|
1449
|
+
const textNode = document.createTextNode(textContent + ' ');
|
|
1450
|
+
element.insertBefore(textNode, child);
|
|
1451
|
+
}
|
|
1452
|
+
child.remove();
|
|
1453
|
+
}
|
|
1454
|
+
} else {
|
|
1455
|
+
cleanElement(child);
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
cleanElement(contentClone);
|
|
1462
|
+
|
|
1463
|
+
// 移除所有style属性,避免样式冲突
|
|
1464
|
+
const allElements = contentClone.querySelectorAll('*');
|
|
1465
|
+
allElements.forEach(el => {
|
|
1466
|
+
el.removeAttribute('style');
|
|
1467
|
+
el.removeAttribute('class');
|
|
1468
|
+
el.removeAttribute('id');
|
|
1469
|
+
el.removeAttribute('onclick');
|
|
1470
|
+
el.removeAttribute('onload');
|
|
1471
|
+
});
|
|
1472
|
+
|
|
1473
|
+
// 处理图片URL
|
|
1474
|
+
const images = contentClone.querySelectorAll('img');
|
|
1475
|
+
const adKeywordLower = ['ad', 'advert', 'banner', 'qrcode', 'qr-code', 'reward', 'donate', 'appdownload', 'app-download', 'sponsor', 'thanks'];
|
|
1476
|
+
const adKeywordCn = ['广告', '二维码', '赞赏', '打赏', '版权', '推广'];
|
|
1477
|
+
images.forEach(img => {
|
|
1478
|
+
let src = img.getAttribute('src');
|
|
1479
|
+
const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') || img.getAttribute('data-lazy-src');
|
|
1480
|
+
|
|
1481
|
+
if (dataSrc && (dataSrc.startsWith('http://') || dataSrc.startsWith('https://'))) {
|
|
1482
|
+
src = dataSrc;
|
|
1483
|
+
img.setAttribute('src', src);
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
if (!src || src.startsWith('blob:') || src.startsWith('data:')) {
|
|
1487
|
+
img.remove();
|
|
1488
|
+
return;
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
if (!src.startsWith('http://') && !src.startsWith('https://')) {
|
|
1492
|
+
try {
|
|
1493
|
+
const absoluteUrl = new URL(src, window.location.href).href;
|
|
1494
|
+
img.setAttribute('src', absoluteUrl);
|
|
1495
|
+
src = absoluteUrl;
|
|
1496
|
+
} catch (e) {
|
|
1497
|
+
img.remove();
|
|
1498
|
+
}
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
const altText = img.getAttribute('alt') || '';
|
|
1502
|
+
const altLower = altText.toLowerCase();
|
|
1503
|
+
const srcLower = (src || '').toLowerCase();
|
|
1504
|
+
if (
|
|
1505
|
+
adKeywordLower.some(keyword => srcLower.includes(keyword)) ||
|
|
1506
|
+
adKeywordLower.some(keyword => altLower.includes(keyword)) ||
|
|
1507
|
+
adKeywordCn.some(keyword => altText.includes(keyword))
|
|
1508
|
+
) {
|
|
1509
|
+
img.remove();
|
|
1510
|
+
return;
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
// 清理图片属性
|
|
1514
|
+
const imgAttrs = img.attributes;
|
|
1515
|
+
for (let i = imgAttrs.length - 1; i >= 0; i--) {
|
|
1516
|
+
const attrName = imgAttrs[i].name;
|
|
1517
|
+
if (attrName !== 'src' && attrName !== 'alt') {
|
|
1518
|
+
img.removeAttribute(attrName);
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
});
|
|
1522
|
+
|
|
1523
|
+
// 清理空的div和span
|
|
1524
|
+
const containers = contentClone.querySelectorAll('div, span');
|
|
1525
|
+
containers.forEach(container => {
|
|
1526
|
+
if (!container.textContent.trim() && !container.querySelector('img, pre, code, table')) {
|
|
1527
|
+
container.remove();
|
|
1528
|
+
}
|
|
1529
|
+
});
|
|
1530
|
+
|
|
1531
|
+
// 将只包含纯文本的 div 转换为段落,避免没有段间距
|
|
1532
|
+
const blockLikeTags = new Set(['P','UL','OL','LI','TABLE','PRE','BLOCKQUOTE','H1','H2','H3','H4','H5','H6','IMG','SECTION','ARTICLE','FIGURE','FIGCAPTION','DETAILS','SUMMARY']);
|
|
1533
|
+
const textContainers = Array.from(contentClone.querySelectorAll('div, section, article')).reverse();
|
|
1534
|
+
textContainers.forEach(container => {
|
|
1535
|
+
if (container === contentClone) {
|
|
1536
|
+
return;
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
if (!container.textContent.trim()) {
|
|
1540
|
+
return;
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
if (container.querySelector('img, pre, table, ul, ol, blockquote, h1, h2, h3, h4, h5, h6, figure')) {
|
|
1544
|
+
return;
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1547
|
+
const hasBlockChildren = Array.from(container.children).some(child => blockLikeTags.has(child.tagName?.toUpperCase()));
|
|
1548
|
+
if (hasBlockChildren) {
|
|
1549
|
+
return;
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
const paragraph = document.createElement('p');
|
|
1553
|
+
paragraph.innerHTML = container.innerHTML;
|
|
1554
|
+
container.parentNode.replaceChild(paragraph, container);
|
|
1555
|
+
});
|
|
1556
|
+
|
|
1557
|
+
// 包装直接挂在容器下的文本或行内节点,避免散乱文本没有段落间距
|
|
1558
|
+
const inlineTags = new Set(['A','SPAN','STRONG','B','EM','I','U','CODE','SMALL','SUB','SUP','MARK']);
|
|
1559
|
+
|
|
1560
|
+
function wrapInlineChildren(element) {
|
|
1561
|
+
const tagName = element.tagName ? element.tagName.toUpperCase() : '';
|
|
1562
|
+
if (['P','LI','PRE','CODE','TABLE','THEAD','TBODY','TR'].includes(tagName)) {
|
|
1563
|
+
return;
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1566
|
+
const childNodes = Array.from(element.childNodes);
|
|
1567
|
+
let buffer = [];
|
|
1568
|
+
|
|
1569
|
+
const flushBuffer = (referenceNode) => {
|
|
1570
|
+
if (!buffer.length) {
|
|
1571
|
+
return;
|
|
1572
|
+
}
|
|
1573
|
+
const paragraph = document.createElement('p');
|
|
1574
|
+
buffer.forEach(node => paragraph.appendChild(node));
|
|
1575
|
+
element.insertBefore(paragraph, referenceNode);
|
|
1576
|
+
buffer = [];
|
|
1577
|
+
};
|
|
1578
|
+
|
|
1579
|
+
for (const node of childNodes) {
|
|
1580
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
1581
|
+
if (node.textContent.trim()) {
|
|
1582
|
+
buffer.push(node);
|
|
1583
|
+
} else {
|
|
1584
|
+
element.removeChild(node);
|
|
1585
|
+
}
|
|
1586
|
+
continue;
|
|
1587
|
+
}
|
|
1588
|
+
|
|
1589
|
+
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
1590
|
+
const childTag = node.tagName.toUpperCase();
|
|
1591
|
+
if (inlineTags.has(childTag) || childTag === 'BR') {
|
|
1592
|
+
buffer.push(node);
|
|
1593
|
+
continue;
|
|
1594
|
+
}
|
|
1595
|
+
|
|
1596
|
+
flushBuffer(node);
|
|
1597
|
+
wrapInlineChildren(node);
|
|
1598
|
+
continue;
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1601
|
+
flushBuffer(node);
|
|
1602
|
+
}
|
|
1603
|
+
|
|
1604
|
+
flushBuffer(null);
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
wrapInlineChildren(contentClone);
|
|
1608
|
+
|
|
1609
|
+
// 移除尾部的版权/广告声明
|
|
1610
|
+
const footerKeywords = ['版权', '未经许可', '未经授权', '不得转载', '未经允许', 'All Rights Reserved', '最终解释权', '转载'];
|
|
1611
|
+
const trailingElements = Array.from(contentClone.querySelectorAll('p, div, section')).slice(-6);
|
|
1612
|
+
trailingElements.forEach(el => {
|
|
1613
|
+
const text = (el.textContent || '').trim();
|
|
1614
|
+
if (!text) {
|
|
1615
|
+
return;
|
|
1616
|
+
}
|
|
1617
|
+
if (text.length <= 200 && footerKeywords.some(keyword => text.includes(keyword))) {
|
|
1618
|
+
el.remove();
|
|
1619
|
+
}
|
|
1620
|
+
});
|
|
1621
|
+
|
|
1622
|
+
// 处理代码块
|
|
1623
|
+
const codeBlocks = contentClone.querySelectorAll('pre');
|
|
1624
|
+
codeBlocks.forEach(block => {
|
|
1625
|
+
const codeText = collectCodeText(block);
|
|
1626
|
+
if (!codeText) {
|
|
1627
|
+
block.remove();
|
|
1628
|
+
return;
|
|
1629
|
+
}
|
|
1630
|
+
let codeInside = block.querySelector('code');
|
|
1631
|
+
if (!codeInside) {
|
|
1632
|
+
codeInside = document.createElement('code');
|
|
1633
|
+
block.appendChild(codeInside);
|
|
1634
|
+
}
|
|
1635
|
+
codeInside.textContent = codeText;
|
|
1636
|
+
});
|
|
1637
|
+
|
|
1638
|
+
return contentClone.innerHTML;
|
|
1639
|
+
});
|
|
1640
|
+
|
|
1641
|
+
return {
|
|
1642
|
+
success: true,
|
|
1643
|
+
title: article.originalTitle || article.title,
|
|
1644
|
+
content: content || `<p>内容提取失败</p>`
|
|
1645
|
+
};
|
|
1646
|
+
|
|
1647
|
+
} catch (error) {
|
|
1648
|
+
// 判断是否可能是 Cookie 失效
|
|
1649
|
+
let errorMessage = error.message;
|
|
1650
|
+
if (error.message.includes('Timeout') || error.message.includes('timeout')) {
|
|
1651
|
+
errorMessage = 'Cookie 可能已失效或页面加载超时';
|
|
1652
|
+
}
|
|
1653
|
+
|
|
1654
|
+
return {
|
|
1655
|
+
success: false,
|
|
1656
|
+
title: article.originalTitle || article.title,
|
|
1657
|
+
content: `<p>下载失败: ${errorMessage}</p>`,
|
|
1658
|
+
error: errorMessage
|
|
1659
|
+
};
|
|
1660
|
+
}
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
// 并发提取文章内容(用于 EPUB)
|
|
1664
|
+
async function extractWithConcurrency(context, articles, concurrency = 5, delay = 2000, timeout = 60000) {
|
|
1665
|
+
const results = [];
|
|
1666
|
+
const total = articles.length;
|
|
1667
|
+
let completed = 0;
|
|
1668
|
+
|
|
1669
|
+
const progressSpinner = ora(`提取进度: 0/${total}`).start();
|
|
1670
|
+
|
|
1671
|
+
// 创建并发池
|
|
1672
|
+
const pool = [];
|
|
1673
|
+
for (let i = 0; i < Math.min(concurrency, articles.length); i++) {
|
|
1674
|
+
pool.push(context.newPage());
|
|
1675
|
+
}
|
|
1676
|
+
const pages = await Promise.all(pool);
|
|
1677
|
+
|
|
1678
|
+
// 为每个页面设置默认超时
|
|
1679
|
+
pages.forEach(page => {
|
|
1680
|
+
page.setDefaultTimeout(timeout);
|
|
1681
|
+
page.setDefaultNavigationTimeout(timeout);
|
|
1682
|
+
});
|
|
1683
|
+
|
|
1684
|
+
// 处理队列
|
|
1685
|
+
let currentIndex = 0;
|
|
1686
|
+
|
|
1687
|
+
const processNext = async (page, pageIndex) => {
|
|
1688
|
+
while (currentIndex < articles.length) {
|
|
1689
|
+
const index = currentIndex++;
|
|
1690
|
+
const article = articles[index];
|
|
1691
|
+
|
|
1692
|
+
try {
|
|
1693
|
+
const result = await extractArticleContent(page, article, index + 1, total);
|
|
1694
|
+
results[index] = result;
|
|
1695
|
+
completed++;
|
|
1696
|
+
|
|
1697
|
+
// 更新进度条
|
|
1698
|
+
progressSpinner.text = `提取进度: ${completed}/${total}`;
|
|
1699
|
+
|
|
1700
|
+
if (result.success) {
|
|
1701
|
+
progressSpinner.stopAndPersist({
|
|
1702
|
+
symbol: chalk.green('✓'),
|
|
1703
|
+
text: `[${index + 1}/${total}] ${article.originalTitle || article.title}`
|
|
1704
|
+
});
|
|
1705
|
+
} else {
|
|
1706
|
+
progressSpinner.stopAndPersist({
|
|
1707
|
+
symbol: chalk.red('✗'),
|
|
1708
|
+
text: `[${index + 1}/${total}] ${article.originalTitle || article.title} - ${result.error}`
|
|
1709
|
+
});
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
progressSpinner.start();
|
|
1713
|
+
progressSpinner.text = `提取进度: ${completed}/${total}`;
|
|
1714
|
+
|
|
1715
|
+
// 添加延迟
|
|
1716
|
+
if (currentIndex < articles.length) {
|
|
1717
|
+
await page.waitForTimeout(delay);
|
|
1718
|
+
}
|
|
1719
|
+
} catch (error) {
|
|
1720
|
+
results[index] = {
|
|
1721
|
+
success: false,
|
|
1722
|
+
title: article.originalTitle || article.title,
|
|
1723
|
+
content: `<p>提取失败</p>`,
|
|
1724
|
+
error: error.message
|
|
1725
|
+
};
|
|
1726
|
+
completed++;
|
|
1727
|
+
|
|
1728
|
+
progressSpinner.stopAndPersist({
|
|
1729
|
+
symbol: chalk.red('✗'),
|
|
1730
|
+
text: `[${index + 1}/${total}] ${article.title} - ${error.message}`
|
|
1731
|
+
});
|
|
1732
|
+
|
|
1733
|
+
progressSpinner.start();
|
|
1734
|
+
progressSpinner.text = `提取进度: ${completed}/${total}`;
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
};
|
|
1738
|
+
|
|
1739
|
+
// 启动所有worker
|
|
1740
|
+
await Promise.all(pages.map((page, idx) => processNext(page, idx)));
|
|
1741
|
+
|
|
1742
|
+
progressSpinner.succeed(`提取完成: ${completed}/${total}`);
|
|
1743
|
+
|
|
1744
|
+
// 关闭所有page
|
|
1745
|
+
await Promise.all(pages.map(page => page.close()));
|
|
1746
|
+
|
|
1747
|
+
return results;
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
// 生成 EPUB 文件
|
|
1751
|
+
async function generateEPUB(outputDir, columnTitle, columnAuthor, articles, contentResults) {
|
|
1752
|
+
const spinner = ora('正在生成 EPUB 文件...').start();
|
|
1753
|
+
|
|
1754
|
+
try {
|
|
1755
|
+
// 构建章节数据
|
|
1756
|
+
const chapters = contentResults
|
|
1757
|
+
.filter(result => result.success)
|
|
1758
|
+
.map((result, index) => ({
|
|
1759
|
+
title: result.title,
|
|
1760
|
+
content: result.content,
|
|
1761
|
+
excludeFromToc: false
|
|
1762
|
+
}));
|
|
1763
|
+
|
|
1764
|
+
if (chapters.length === 0) {
|
|
1765
|
+
spinner.warn('没有可用的章节内容,无法生成 EPUB');
|
|
1766
|
+
return null;
|
|
1767
|
+
}
|
|
1768
|
+
|
|
1769
|
+
const options = {
|
|
1770
|
+
title: columnTitle,
|
|
1771
|
+
author: columnAuthor || '极客时间',
|
|
1772
|
+
publisher: '极客时间',
|
|
1773
|
+
version: 3,
|
|
1774
|
+
css: `
|
|
1775
|
+
body {
|
|
1776
|
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "PingFang SC", "Hiragino Sans GB", "Microsoft YaHei", sans-serif;
|
|
1777
|
+
line-height: 1.8;
|
|
1778
|
+
color: #333;
|
|
1779
|
+
padding: 1.5em;
|
|
1780
|
+
font-size: 16px;
|
|
1781
|
+
text-align: justify;
|
|
1782
|
+
max-width: 48em;
|
|
1783
|
+
margin: 0 auto;
|
|
1784
|
+
}
|
|
1785
|
+
h1, h2, h3, h4, h5, h6 {
|
|
1786
|
+
font-weight: bold;
|
|
1787
|
+
margin-top: 1.5em;
|
|
1788
|
+
margin-bottom: 0.8em;
|
|
1789
|
+
line-height: 1.4;
|
|
1790
|
+
color: #000;
|
|
1791
|
+
page-break-after: avoid;
|
|
1792
|
+
}
|
|
1793
|
+
h1 {
|
|
1794
|
+
font-size: 2em;
|
|
1795
|
+
border-bottom: 2px solid #e0e0e0;
|
|
1796
|
+
padding-bottom: 0.3em;
|
|
1797
|
+
}
|
|
1798
|
+
h2 {
|
|
1799
|
+
font-size: 1.6em;
|
|
1800
|
+
}
|
|
1801
|
+
h3 {
|
|
1802
|
+
font-size: 1.3em;
|
|
1803
|
+
}
|
|
1804
|
+
section, article {
|
|
1805
|
+
margin: 1.5em 0;
|
|
1806
|
+
padding: 0;
|
|
1807
|
+
}
|
|
1808
|
+
p {
|
|
1809
|
+
margin: 1.2em 0;
|
|
1810
|
+
text-indent: 0;
|
|
1811
|
+
line-height: 1.8;
|
|
1812
|
+
word-wrap: break-word;
|
|
1813
|
+
overflow-wrap: break-word;
|
|
1814
|
+
display: block;
|
|
1815
|
+
page-break-inside: avoid;
|
|
1816
|
+
}
|
|
1817
|
+
/* 确保段落之间有明显间隔 */
|
|
1818
|
+
p + p {
|
|
1819
|
+
margin-top: 1.5em;
|
|
1820
|
+
}
|
|
1821
|
+
/* 代码块样式 */
|
|
1822
|
+
pre {
|
|
1823
|
+
background-color: #f6f8fa;
|
|
1824
|
+
border: 1px solid #e1e4e8;
|
|
1825
|
+
border-radius: 6px;
|
|
1826
|
+
padding: 16px;
|
|
1827
|
+
overflow-x: auto;
|
|
1828
|
+
margin: 1em 0;
|
|
1829
|
+
line-height: 1.5;
|
|
1830
|
+
font-size: 14px;
|
|
1831
|
+
white-space: pre-wrap;
|
|
1832
|
+
word-wrap: break-word;
|
|
1833
|
+
font-family: 'Monaco', 'Menlo', 'Consolas', 'Courier New', monospace;
|
|
1834
|
+
page-break-inside: avoid;
|
|
1835
|
+
}
|
|
1836
|
+
code {
|
|
1837
|
+
font-family: 'Monaco', 'Menlo', 'Consolas', 'Courier New', monospace;
|
|
1838
|
+
font-size: 0.9em;
|
|
1839
|
+
background-color: #f6f8fa;
|
|
1840
|
+
padding: 0.2em 0.4em;
|
|
1841
|
+
border-radius: 3px;
|
|
1842
|
+
border: 1px solid #e1e4e8;
|
|
1843
|
+
}
|
|
1844
|
+
pre code {
|
|
1845
|
+
background-color: transparent;
|
|
1846
|
+
border: none;
|
|
1847
|
+
padding: 0;
|
|
1848
|
+
}
|
|
1849
|
+
/* 列表样式 */
|
|
1850
|
+
ul, ol {
|
|
1851
|
+
margin: 1em 0;
|
|
1852
|
+
padding-left: 2em;
|
|
1853
|
+
line-height: 1.8;
|
|
1854
|
+
}
|
|
1855
|
+
li {
|
|
1856
|
+
margin: 0.5em 0;
|
|
1857
|
+
}
|
|
1858
|
+
/* 引用样式 */
|
|
1859
|
+
blockquote {
|
|
1860
|
+
margin: 1em 0;
|
|
1861
|
+
padding: 0.5em 1em;
|
|
1862
|
+
border-left: 4px solid #ddd;
|
|
1863
|
+
background-color: #f9f9f9;
|
|
1864
|
+
color: #666;
|
|
1865
|
+
font-style: italic;
|
|
1866
|
+
}
|
|
1867
|
+
/* 图片样式 */
|
|
1868
|
+
img {
|
|
1869
|
+
max-width: 100%;
|
|
1870
|
+
height: auto;
|
|
1871
|
+
display: block;
|
|
1872
|
+
margin: 1em auto;
|
|
1873
|
+
page-break-inside: avoid;
|
|
1874
|
+
}
|
|
1875
|
+
/* 表格样式 */
|
|
1876
|
+
table {
|
|
1877
|
+
border-collapse: collapse;
|
|
1878
|
+
width: 100%;
|
|
1879
|
+
margin: 1em 0;
|
|
1880
|
+
font-size: 0.9em;
|
|
1881
|
+
page-break-inside: avoid;
|
|
1882
|
+
}
|
|
1883
|
+
th, td {
|
|
1884
|
+
border: 1px solid #ddd;
|
|
1885
|
+
padding: 8px 12px;
|
|
1886
|
+
text-align: left;
|
|
1887
|
+
line-height: 1.6;
|
|
1888
|
+
}
|
|
1889
|
+
th {
|
|
1890
|
+
background-color: #f2f2f2;
|
|
1891
|
+
font-weight: bold;
|
|
1892
|
+
}
|
|
1893
|
+
/* 链接样式 */
|
|
1894
|
+
a {
|
|
1895
|
+
color: #0366d6;
|
|
1896
|
+
text-decoration: none;
|
|
1897
|
+
}
|
|
1898
|
+
/* 强调和加粗 */
|
|
1899
|
+
strong, b {
|
|
1900
|
+
font-weight: bold;
|
|
1901
|
+
color: #000;
|
|
1902
|
+
}
|
|
1903
|
+
em, i {
|
|
1904
|
+
font-style: italic;
|
|
1905
|
+
}
|
|
1906
|
+
/* 分隔线 */
|
|
1907
|
+
hr {
|
|
1908
|
+
border: none;
|
|
1909
|
+
border-top: 1px solid #e1e4e8;
|
|
1910
|
+
margin: 2em 0;
|
|
1911
|
+
}
|
|
1912
|
+
`,
|
|
1913
|
+
verbose: process.env.DEBUG ? true : false
|
|
1914
|
+
};
|
|
1915
|
+
|
|
1916
|
+
// 生成 EPUB(注意:content 参数是第二个参数,不在 options 里)
|
|
1917
|
+
spinner.text = '正在生成 EPUB...';
|
|
1918
|
+
const epubBuffer = await epub(options, chapters);
|
|
1919
|
+
|
|
1920
|
+
// 保存 EPUB 文件
|
|
1921
|
+
const epubFileName = `${columnTitle}.epub`;
|
|
1922
|
+
const epubFilePath = path.join(outputDir, epubFileName);
|
|
1923
|
+
await fs.writeFile(epubFilePath, epubBuffer);
|
|
1924
|
+
|
|
1925
|
+
spinner.succeed(`已生成 EPUB 文件: ${chalk.green(epubFileName)} (${chapters.length} 章)`);
|
|
1926
|
+
return epubFilePath;
|
|
1927
|
+
|
|
1928
|
+
} catch (error) {
|
|
1929
|
+
spinner.fail(`生成 EPUB 失败: ${error.message}`);
|
|
1930
|
+
|
|
1931
|
+
// 提供更详细的错误信息
|
|
1932
|
+
if (error.message.includes('Only HTTP(S) protocols are supported')) {
|
|
1933
|
+
console.log(chalk.yellow('\n⚠️ 图片URL格式问题:'));
|
|
1934
|
+
console.log(chalk.gray(' 某些图片使用了非HTTP(S)协议(如本地路径、blob URL等)'));
|
|
1935
|
+
console.log(chalk.gray(' 这是一个已知问题,正在修复中\n'));
|
|
1936
|
+
} else if (error.message.includes('fetch') || error.message.includes('network')) {
|
|
1937
|
+
console.log(chalk.yellow('\n⚠️ 网络问题:'));
|
|
1938
|
+
console.log(chalk.gray(' 部分图片下载失败,可能是网络连接问题'));
|
|
1939
|
+
console.log(chalk.gray(' 建议:检查网络连接或稍后重试\n'));
|
|
1940
|
+
}
|
|
1941
|
+
|
|
1942
|
+
if (process.env.DEBUG) {
|
|
1943
|
+
console.error(chalk.gray(error.stack));
|
|
1944
|
+
}
|
|
1945
|
+
return null;
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
|
|
958
1949
|
// 主函数
|
|
959
1950
|
async function main(options) {
|
|
960
1951
|
console.log(chalk.bold.cyan('\n🚀 极客时间专栏下载器\n'));
|
|
@@ -1045,8 +2036,15 @@ async function main(options) {
|
|
|
1045
2036
|
const page = await context.newPage();
|
|
1046
2037
|
|
|
1047
2038
|
try {
|
|
2039
|
+
// 获取配置的超时时间
|
|
2040
|
+
const timeout = parseInt(options.timeout) || 60000;
|
|
2041
|
+
|
|
2042
|
+
// 为页面设置默认超时
|
|
2043
|
+
page.setDefaultTimeout(timeout);
|
|
2044
|
+
page.setDefaultNavigationTimeout(timeout);
|
|
2045
|
+
|
|
1048
2046
|
// 获取文章列表
|
|
1049
|
-
const { articles, columnTitle } = await getArticleList(page, columnUrl);
|
|
2047
|
+
const { articles, columnTitle, columnAuthor } = await getArticleList(page, columnUrl, timeout);
|
|
1050
2048
|
|
|
1051
2049
|
if (articles.length === 0) {
|
|
1052
2050
|
console.log(chalk.yellow('⚠️ 未找到任何文章'));
|
|
@@ -1056,7 +2054,8 @@ async function main(options) {
|
|
|
1056
2054
|
// 为该专栏创建专用文件夹
|
|
1057
2055
|
const outputDir = path.join(baseOutputDir, columnTitle);
|
|
1058
2056
|
await fs.mkdir(outputDir, { recursive: true });
|
|
1059
|
-
console.log(chalk.gray(`📁 专栏输出目录: ${outputDir}
|
|
2057
|
+
console.log(chalk.gray(`📁 专栏输出目录: ${outputDir}`));
|
|
2058
|
+
console.log(chalk.gray(`✍️ 作者: ${columnAuthor}\n`));
|
|
1060
2059
|
|
|
1061
2060
|
// 如果是 dry-run 模式,只显示列表
|
|
1062
2061
|
if (options.dryRun) {
|
|
@@ -1085,33 +2084,121 @@ async function main(options) {
|
|
|
1085
2084
|
console.log(chalk.gray(`📊 并发数: ${concurrency}\n`));
|
|
1086
2085
|
}
|
|
1087
2086
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
//
|
|
1097
|
-
const
|
|
1098
|
-
const
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
2087
|
+
// 验证并规范化格式参数
|
|
2088
|
+
const format = (options.format || 'pdf').toLowerCase();
|
|
2089
|
+
if (!['pdf', 'epub', 'both'].includes(format)) {
|
|
2090
|
+
console.error(chalk.red(`\n❌ 无效的格式: ${options.format}`));
|
|
2091
|
+
console.log(chalk.yellow('支持的格式: pdf, epub, both\n'));
|
|
2092
|
+
return;
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
// 根据格式选择处理方式
|
|
2096
|
+
const needPdf = format === 'pdf' || format === 'both';
|
|
2097
|
+
const needEpub = format === 'epub' || format === 'both';
|
|
2098
|
+
|
|
2099
|
+
let results, contentResults;
|
|
2100
|
+
|
|
2101
|
+
// 生成 PDF
|
|
2102
|
+
if (needPdf) {
|
|
2103
|
+
console.log(chalk.cyan(`📄 格式: PDF${needEpub ? ' + EPUB' : ''}\n`));
|
|
2104
|
+
|
|
2105
|
+
results = await downloadWithConcurrency(
|
|
2106
|
+
context,
|
|
2107
|
+
articlesToDownload,
|
|
1108
2108
|
outputDir,
|
|
1109
|
-
|
|
2109
|
+
concurrency,
|
|
2110
|
+
parseInt(options.delay) || 2000,
|
|
2111
|
+
timeout
|
|
2112
|
+
);
|
|
2113
|
+
|
|
2114
|
+
// 统计结果
|
|
2115
|
+
const successCount = results.filter(r => r.success).length;
|
|
2116
|
+
const failCount = results.filter(r => !r.success).length;
|
|
2117
|
+
const timeoutCount = results.filter(r =>
|
|
2118
|
+
!r.success && r.error && (r.error.includes('timeout') || r.error.includes('Timeout'))
|
|
2119
|
+
).length;
|
|
2120
|
+
|
|
2121
|
+
console.log(chalk.bold.cyan('\n📊 PDF 下载统计\n'));
|
|
2122
|
+
console.log(` ${chalk.green('✓')} 成功: ${successCount}`);
|
|
2123
|
+
console.log(` ${chalk.red('✗')} 失败: ${failCount}`);
|
|
2124
|
+
console.log(` ${chalk.blue('📁')} 保存位置: ${outputDir}\n`);
|
|
2125
|
+
|
|
2126
|
+
// 如果大部分失败都是超时,提示 Cookie 可能失效
|
|
2127
|
+
if (timeoutCount > 0 && timeoutCount >= failCount * 0.8) {
|
|
2128
|
+
console.log(chalk.yellow('⚠️ 检测到大量超时错误,可能的原因:\n'));
|
|
2129
|
+
console.log(chalk.gray(' 1. Cookie 已失效 - 请重新获取 Cookie'));
|
|
2130
|
+
console.log(chalk.gray(' 2. 网络连接慢 - 尝试使用 --timeout 120000 增加超时时间'));
|
|
2131
|
+
console.log(chalk.gray(' 3. 需要登录或权限不足 - 确认已购买该专栏\n'));
|
|
2132
|
+
}
|
|
2133
|
+
|
|
2134
|
+
// 合并 PDF
|
|
2135
|
+
if (options.merge !== false && successCount > 0) {
|
|
2136
|
+
const mergedPath = await mergePDFs(
|
|
2137
|
+
outputDir,
|
|
2138
|
+
columnTitle,
|
|
2139
|
+
articlesToDownload,
|
|
2140
|
+
options.deleteAfterMerge
|
|
2141
|
+
);
|
|
2142
|
+
if (mergedPath) {
|
|
2143
|
+
console.log(chalk.green(`\n✅ PDF 合并完成: ${mergedPath}\n`));
|
|
2144
|
+
}
|
|
2145
|
+
}
|
|
2146
|
+
}
|
|
2147
|
+
|
|
2148
|
+
// 生成 EPUB
|
|
2149
|
+
if (needEpub) {
|
|
2150
|
+
if (needPdf) {
|
|
2151
|
+
console.log(chalk.cyan('\n开始生成 EPUB...\n'));
|
|
2152
|
+
} else {
|
|
2153
|
+
console.log(chalk.cyan('📚 格式: EPUB\n'));
|
|
2154
|
+
}
|
|
2155
|
+
|
|
2156
|
+
// 重要提醒:关于内容完整性
|
|
2157
|
+
console.log(chalk.yellow('⚠️ 重要提醒:'));
|
|
2158
|
+
console.log(chalk.gray(' 1. 确保 Cookie 有效且未过期'));
|
|
2159
|
+
console.log(chalk.gray(' 2. 确认已购买该专栏(避免只获取试看内容)'));
|
|
2160
|
+
console.log(chalk.gray(' 3. EPUB 生成需要下载文章完整内容,耗时较长'));
|
|
2161
|
+
console.log(chalk.gray(' 4. 如果只获取到试看内容,说明 Cookie 失效或无权限\n'));
|
|
2162
|
+
|
|
2163
|
+
contentResults = await extractWithConcurrency(
|
|
2164
|
+
context,
|
|
1110
2165
|
articlesToDownload,
|
|
1111
|
-
|
|
2166
|
+
concurrency,
|
|
2167
|
+
parseInt(options.delay) || 2000,
|
|
2168
|
+
timeout
|
|
1112
2169
|
);
|
|
1113
|
-
|
|
1114
|
-
|
|
2170
|
+
|
|
2171
|
+
// 统计结果
|
|
2172
|
+
const successCount = contentResults.filter(r => r.success).length;
|
|
2173
|
+
const failCount = contentResults.filter(r => !r.success).length;
|
|
2174
|
+
const timeoutCount = contentResults.filter(r =>
|
|
2175
|
+
!r.success && r.error && (r.error.includes('Cookie') || r.error.includes('timeout') || r.error.includes('Timeout'))
|
|
2176
|
+
).length;
|
|
2177
|
+
|
|
2178
|
+
console.log(chalk.bold.cyan('\n📊 EPUB 提取统计\n'));
|
|
2179
|
+
console.log(` ${chalk.green('✓')} 成功: ${successCount}`);
|
|
2180
|
+
console.log(` ${chalk.red('✗')} 失败: ${failCount}\n`);
|
|
2181
|
+
|
|
2182
|
+
// 如果大部分失败都是超时,提示 Cookie 可能失效
|
|
2183
|
+
if (timeoutCount > 0 && timeoutCount >= failCount * 0.8) {
|
|
2184
|
+
console.log(chalk.yellow('⚠️ 检测到大量超时错误,可能的原因:\n'));
|
|
2185
|
+
console.log(chalk.gray(' 1. Cookie 已失效 - 请重新获取 Cookie'));
|
|
2186
|
+
console.log(chalk.gray(' 2. 网络连接慢 - 尝试使用 --timeout 120000 增加超时时间'));
|
|
2187
|
+
console.log(chalk.gray(' 3. 需要登录或权限不足 - 确认已购买该专栏\n'));
|
|
2188
|
+
}
|
|
2189
|
+
|
|
2190
|
+
// 生成 EPUB
|
|
2191
|
+
if (successCount > 0) {
|
|
2192
|
+
const epubPath = await generateEPUB(
|
|
2193
|
+
outputDir,
|
|
2194
|
+
columnTitle,
|
|
2195
|
+
columnAuthor,
|
|
2196
|
+
articlesToDownload,
|
|
2197
|
+
contentResults
|
|
2198
|
+
);
|
|
2199
|
+
if (epubPath) {
|
|
2200
|
+
console.log(chalk.green(`\n✅ EPUB 生成完成: ${epubPath}\n`));
|
|
2201
|
+
}
|
|
1115
2202
|
}
|
|
1116
2203
|
}
|
|
1117
2204
|
|
|
@@ -1142,13 +2229,15 @@ async function main(options) {
|
|
|
1142
2229
|
// 命令行参数
|
|
1143
2230
|
program
|
|
1144
2231
|
.name('geektime-dl')
|
|
1145
|
-
.description('批量下载极客时间专栏文章为PDF')
|
|
1146
|
-
.version('1.0
|
|
2232
|
+
.description('批量下载极客时间专栏文章为PDF或EPUB')
|
|
2233
|
+
.version('1.1.0')
|
|
1147
2234
|
.option('-u, --url <url>', '专栏文章URL(任意一篇)')
|
|
1148
2235
|
.option('-c, --cookie <cookie>', 'Cookie字符串(用于认证)')
|
|
1149
2236
|
.option('-o, --output <dir>', '输出目录', './downloads')
|
|
2237
|
+
.option('-f, --format <format>', '输出格式: pdf, epub, both', 'pdf')
|
|
1150
2238
|
.option('--headless <boolean>', '无头模式', true)
|
|
1151
2239
|
.option('--delay <ms>', '每篇文章之间的延迟(ms)', '2000')
|
|
2240
|
+
.option('--timeout <ms>', '页面加载超时时间(ms)', '60000')
|
|
1152
2241
|
.option('--concurrency <number>', '并发下载数量', '5')
|
|
1153
2242
|
.option('--dry-run', '预览模式,只显示文章列表')
|
|
1154
2243
|
.option('--limit <number>', '限制下载数量(用于测试)')
|
|
@@ -1158,6 +2247,8 @@ program
|
|
|
1158
2247
|
示例:
|
|
1159
2248
|
$ geektime-dl --url "https://time.geekbang.org/column/article/200822" --cookie "your_cookie"
|
|
1160
2249
|
$ geektime-dl -u "https://time.geekbang.org/column/article/200822" -c "your_cookie" --dry-run
|
|
2250
|
+
$ geektime-dl --url "..." --cookie "..." --format epub # 生成EPUB格式
|
|
2251
|
+
$ geektime-dl --url "..." --cookie "..." --format both # 同时生成PDF和EPUB
|
|
1161
2252
|
$ npx @kadaliao/geektime-downloader --url "https://..." --cookie "..." --limit 5
|
|
1162
2253
|
$ geektime-dl --url "..." --cookie "..." --no-merge # 不合并PDF
|
|
1163
2254
|
`)
|