@kadaliao/geektime-downloader 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/download.js CHANGED
@@ -19,6 +19,7 @@ const require = createRequire(import.meta.url);
19
19
  const { version } = require('./package.json');
20
20
 
21
21
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
22
+ let globalCookieHeader = '';
22
23
 
23
24
  // 全局变量:跟踪当前浏览器实例和是否正在关闭
24
25
  let globalBrowser = null;
@@ -244,6 +245,10 @@ const PRINT_FIX_CSS = `
244
245
  }
245
246
  `;
246
247
 
248
+ const GEEKTIME_BASE_URL = 'https://time.geekbang.org';
249
+ const ARTICLE_API_URL = `${GEEKTIME_BASE_URL}/serv/v1/article`;
250
+ const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
251
+
247
252
  // 解析 cookie 字符串
248
253
  function parseCookies(cookieString) {
249
254
  return cookieString.split(';').map(cookie => {
@@ -257,6 +262,245 @@ function parseCookies(cookieString) {
257
262
  });
258
263
  }
259
264
 
265
+ function normalizeArticleHtml(html = '') {
266
+ if (!html) return '';
267
+ return html
268
+ .replace(/<!--\s*\[\[\[read_end]]\]\s*-->/gi, '')
269
+ .replace(/src="\/\//gi, 'src="https://')
270
+ .replace(/src='\/\//gi, "src='https://")
271
+ .replace(/href="\/\//gi, 'href="https://')
272
+ .replace(/href='\/\//gi, "href='https://");
273
+ }
274
+
275
+ async function fetchArticleData(context, articleId) {
276
+ const maxAttempts = 3;
277
+ const refererUrl = `${GEEKTIME_BASE_URL}/column/article/${articleId}`;
278
+ let lastError = null;
279
+
280
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
281
+ try {
282
+ const response = await context.request.post(ARTICLE_API_URL, {
283
+ headers: {
284
+ 'user-agent': DEFAULT_USER_AGENT,
285
+ 'content-type': 'application/json',
286
+ 'accept': 'application/json, text/plain, */*',
287
+ 'origin': GEEKTIME_BASE_URL,
288
+ 'referer': refererUrl,
289
+ 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
290
+ ...(globalCookieHeader ? { 'cookie': globalCookieHeader } : {})
291
+ },
292
+ data: {
293
+ id: String(articleId),
294
+ include_neighbors: true,
295
+ is_freelyread: true
296
+ }
297
+ });
298
+
299
+ const bodyText = await response.text();
300
+
301
+ if (!response.ok()) {
302
+ throw new Error(`API请求失败: ${response.status()} ${response.statusText()} - ${bodyText.slice(0, 160)}`);
303
+ }
304
+
305
+ let json;
306
+ try {
307
+ json = JSON.parse(bodyText);
308
+ } catch (parseError) {
309
+ throw new Error(`API响应解析失败: ${parseError.message} - ${bodyText.slice(0, 160)}`);
310
+ }
311
+
312
+ if (!json || json.code !== 0 || !json.data) {
313
+ throw new Error(`无法获取完整文章内容: ${bodyText.slice(0, 160)}`);
314
+ }
315
+
316
+ if (!json.data.article_content) {
317
+ throw new Error('文章内容为空,可能需要更新 Cookie 或重新获取权限');
318
+ }
319
+
320
+ return json.data;
321
+ } catch (error) {
322
+ lastError = error;
323
+ if (attempt < maxAttempts) {
324
+ await new Promise(resolve => setTimeout(resolve, attempt * 700));
325
+ }
326
+ }
327
+ }
328
+
329
+ throw lastError || new Error('未知错误导致文章内容获取失败');
330
+ }
331
+
332
+ async function sanitizeArticleHtml(page, rawHtml) {
333
+ return page.evaluate((html) => {
334
+ const template = document.createElement('template');
335
+ template.innerHTML = html;
336
+
337
+ const removalSelectors = [
338
+ 'nav', 'header', 'footer', 'aside',
339
+ '.comment', '.comments', '.Index_comment',
340
+ '.recommend', '.recommendation', '.related', '.advertisement', '.ad', '.banner',
341
+ '.subscribe', '.subscription', '.toolbar', '.Index_shareIcons_1vtJa',
342
+ '.keyboard-wrapper', '.app-download', '.article-actions', '.article-bottom',
343
+ '.note', '.notes', '.annotation', '.translation', '.trans', '.translator',
344
+ '.audio', '.audio-player', '.voice', '.player', '.geek-player', '.podcast', '.radio',
345
+ '.reward', '.appreciate', '.appreciation', '.donate', '.sponsor', '.thanks', '.support',
346
+ '.qrcode', '.qr-code', '.qr', '.promotion', '.promo', '.ad-banner',
347
+ '.copyright', '.statement', '.disclaimer',
348
+ '.app-download-banner', '.article-plugin', '.article-notification', '.float-bar',
349
+ 'audio', 'video',
350
+ '[class*="Note"]', '[class*="note"]', '[class*="Translation"]', '[class*="translation"]',
351
+ '[class*="Audio"]', '[class*="audio"]', '[class*="Reward"]', '[class*="reward"]',
352
+ '[data-plugin]', '[data-track]', '[data-track-section]', '[data-translation]', '[data-audio]',
353
+ '[data-role="toolbar"]',
354
+ 'button', 'iframe', 'script', 'style'
355
+ ];
356
+ removalSelectors.forEach(selector => {
357
+ template.content.querySelectorAll(selector).forEach(el => el.remove());
358
+ });
359
+
360
+ const pluginKeywords = [
361
+ 'note', 'translation', 'audio', 'player', 'reward', 'donate',
362
+ 'appreciation', 'sponsor', 'qrcode', 'toolbar', 'plugin',
363
+ 'copyright', 'geeknote', 'bilingual'
364
+ ];
365
+ const pluginElements = Array.from(template.content.querySelectorAll('*')).filter(el => {
366
+ const className = (el.className || '').toString().toLowerCase();
367
+ const idValue = (el.id || '').toString().toLowerCase();
368
+ const roleValue = (el.getAttribute && el.getAttribute('role')) ? el.getAttribute('role').toLowerCase() : '';
369
+ const datasetValues = el.dataset ? Object.values(el.dataset).join(' ').toLowerCase() : '';
370
+ const combined = `${className} ${idValue} ${roleValue} ${datasetValues}`;
371
+ return pluginKeywords.some(keyword => combined.includes(keyword));
372
+ });
373
+ pluginElements.forEach(el => el.remove());
374
+
375
+ const mindmapSelectors = [
376
+ '.mindmap', '.mind-map', '.MindMap', '.Mind-map',
377
+ '[data-type="mindmap"]', '[data-role="mindmap"]', '[data-widget="mindmap"]',
378
+ '[class*="MindMap"]', '[class*="mindMap"]'
379
+ ];
380
+ mindmapSelectors.forEach(selector => {
381
+ template.content.querySelectorAll(selector).forEach(el => el.remove());
382
+ });
383
+ const vectorCandidates = Array.from(template.content.querySelectorAll('svg, canvas, object, embed'));
384
+ vectorCandidates.forEach(el => {
385
+ const className = typeof el.className === 'object' ? el.className.baseVal : (el.className || '');
386
+ const meta = `${className} ${el.id || ''} ${el.getAttribute('data-type') || ''}`.toLowerCase();
387
+ if (meta.includes('mind') || meta.includes('mindmap') || meta.includes('mind-map')) {
388
+ el.remove();
389
+ }
390
+ });
391
+
392
+ const allowedTags = new Set([
393
+ 'P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6',
394
+ 'UL', 'OL', 'LI',
395
+ 'BLOCKQUOTE', 'PRE', 'CODE',
396
+ 'IMG', 'TABLE', 'THEAD', 'TBODY', 'TR', 'TH', 'TD', 'FIGURE', 'FIGCAPTION',
397
+ 'STRONG', 'EM', 'B', 'I', 'SPAN', 'DIV', 'BR', 'HR',
398
+ 'A', 'SUP', 'SUB'
399
+ ]);
400
+
401
+ const blockDisplayTags = new Set(['DIV', 'SECTION', 'ARTICLE', 'FIGURE']);
402
+ const allowedAttributes = new Set(['href', 'src', 'alt', 'title', 'class', 'style', 'target', 'rel']);
403
+
404
+ function sanitizeNode(node) {
405
+ const children = Array.from(node.children || []);
406
+ for (const child of children) {
407
+ if (!allowedTags.has(child.tagName)) {
408
+ child.replaceWith(...child.childNodes);
409
+ continue;
410
+ }
411
+
412
+ if (blockDisplayTags.has(child.tagName)) {
413
+ child.style.display = 'block';
414
+ }
415
+
416
+ const attributes = Array.from(child.attributes);
417
+ for (const attr of attributes) {
418
+ if (!allowedAttributes.has(attr.name.toLowerCase())) {
419
+ child.removeAttribute(attr.name);
420
+ }
421
+ }
422
+
423
+ sanitizeNode(child);
424
+ }
425
+ }
426
+
427
+ sanitizeNode(template.content || template);
428
+
429
+ const images = template.content ? template.content.querySelectorAll('img') : [];
430
+ images.forEach(img => {
431
+ if (!img.getAttribute('loading')) {
432
+ img.setAttribute('loading', 'lazy');
433
+ }
434
+ img.style.maxWidth = '100%';
435
+ img.style.height = 'auto';
436
+ });
437
+
438
+ return template.innerHTML;
439
+ }, rawHtml);
440
+ }
441
+
442
+ function escapeHtml(text = '') {
443
+ return text
444
+ .replace(/&/g, '&amp;')
445
+ .replace(/</g, '&lt;')
446
+ .replace(/>/g, '&gt;')
447
+ .replace(/"/g, '&quot;')
448
+ .replace(/'/g, '&#39;');
449
+ }
450
+
451
+ function buildPrintableHtml(title, sanitizedHtml) {
452
+ const baseCss = `
453
+ body {
454
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
455
+ font-size: 16px;
456
+ line-height: 1.8;
457
+ color: #1f2329;
458
+ margin: 0;
459
+ padding: 40px;
460
+ background: #fff;
461
+ }
462
+
463
+ .article-print-wrapper {
464
+ max-width: 900px;
465
+ margin: 0 auto;
466
+ }
467
+
468
+ .article-print-wrapper h1 {
469
+ font-size: 32px;
470
+ line-height: 1.4;
471
+ margin-bottom: 24px;
472
+ }
473
+
474
+ a {
475
+ color: #0f5ef2;
476
+ text-decoration: none;
477
+ }
478
+
479
+ pre {
480
+ background: #f7f7f7;
481
+ padding: 16px;
482
+ border-radius: 6px;
483
+ overflow: auto;
484
+ }
485
+ `;
486
+
487
+ return `
488
+ <!DOCTYPE html>
489
+ <html lang="zh-CN">
490
+ <head>
491
+ <meta charset="utf-8">
492
+ <base href="${GEEKTIME_BASE_URL}">
493
+ <style>${baseCss}${PRINT_FIX_CSS}</style>
494
+ </head>
495
+ <body>
496
+ <div class="article-print-wrapper">
497
+ <h1>${escapeHtml(title)}</h1>
498
+ ${sanitizedHtml}
499
+ </div>
500
+ </body>
501
+ </html>`;
502
+ }
503
+
260
504
  // 获取专栏所有文章列表(通过API)
261
505
  function getValueByPath(obj, path) {
262
506
  if (!obj || !path) return undefined;
@@ -702,128 +946,67 @@ async function downloadWithConcurrency(context, articles, outputDir, concurrency
702
946
  // 下载单篇文章为 PDF(静默模式,不显示单独的spinner)
703
947
  async function downloadArticleSilent(page, article, outputDir, index, total) {
704
948
  try {
705
- // 访问文章页面
706
- await page.goto(article.url, { waitUntil: 'networkidle' });
707
- await page.waitForTimeout(2000);
708
-
709
- // 注入打印修复样式
710
- await page.addStyleTag({ content: PRINT_FIX_CSS });
711
-
712
- // 激进的布局重构:提取正文并重建页面结构
713
- await page.evaluate((titleText) => {
714
- // 1. 找到文章正文内容
715
- const articleContent = document.querySelector('.Index_articleContent_QBG5G, .article-content, article, [class*="articleContent"]');
716
-
717
- if (articleContent) {
718
- // 2. 克隆正文内容
719
- const contentClone = articleContent.cloneNode(true);
720
-
721
- // 3. 清空body的所有内容
722
- document.body.innerHTML = '';
723
-
724
- // 4. 重置body样式为全宽
725
- document.body.style.margin = '0';
726
- document.body.style.padding = '0';
727
- document.body.style.width = '100%';
728
- document.body.style.maxWidth = 'none';
729
- document.body.style.boxSizing = 'border-box';
730
-
731
- // 5. 创建一个简单的容器
732
- const wrapper = document.createElement('div');
733
- wrapper.style.width = '100%';
734
- wrapper.style.maxWidth = '100%';
735
- wrapper.style.margin = '0';
736
- wrapper.style.padding = '0';
737
- wrapper.style.boxSizing = 'border-box';
738
-
739
- // 6. 创建标题元素(使用传入的标题文本)
740
- if (titleText) {
741
- const titleElement = document.createElement('h1');
742
- titleElement.textContent = titleText;
743
- // 设置标题样式
744
- titleElement.style.fontSize = '32px';
745
- titleElement.style.fontWeight = 'bold';
746
- titleElement.style.marginBottom = '30px';
747
- titleElement.style.marginTop = '0';
748
- titleElement.style.lineHeight = '1.4';
749
- titleElement.style.color = '#000';
750
- wrapper.appendChild(titleElement);
751
- }
752
-
753
- // 7. 将正文插入容器
754
- wrapper.appendChild(contentClone);
755
-
756
- // 8. 将容器插入body
757
- document.body.appendChild(wrapper);
758
-
759
- // 9. 确保正文内容使用全宽且不溢出
760
- contentClone.style.width = '100%';
761
- contentClone.style.maxWidth = '100%';
762
- contentClone.style.margin = '0';
763
- contentClone.style.padding = '0';
764
- contentClone.style.boxSizing = 'border-box';
765
- contentClone.style.overflowWrap = 'break-word';
766
- contentClone.style.wordBreak = 'break-word';
767
- } else {
768
- // 如果找不到正文,使用原有的删除方法
769
- const selectors = [
770
- 'aside',
771
- '[class*="leftSide"]',
772
- '[class*="LeftSide"]',
773
- '[class*="sidebar"]',
774
- '[class*="Sidebar"]',
775
- '[class*="side_"]',
776
- '[class*="catalog"]',
777
- '[class*="directory"]',
778
- '[class*="toc"]',
779
- '[class*="outline"]',
780
- '[class*="Outline"]',
781
- 'nav',
782
- '[class*="nav"]',
783
- '[class*="Nav"]',
784
- '[class*="rightSide"]',
785
- '[class*="RightSide"]',
786
- '[class*="comment"]',
787
- '[class*="recommend"]',
788
- '[class*="footer"]',
789
- '[class*="bottom"]'
790
- ];
949
+ if (process.env.DEBUG) {
950
+ console.log(chalk.gray(`[silent] 准备处理文章 ${article.id} - ${article.originalTitle || article.title}`));
951
+ }
952
+ const articleData = await fetchArticleData(page.context(), article.id);
953
+ if (process.env.DEBUG) {
954
+ console.log(chalk.gray(`[silent] 已获取文章数据 ${article.id}`));
955
+ }
956
+ const normalizedHtml = normalizeArticleHtml(articleData.article_content || '');
957
+ const sanitizedHtml = await sanitizeArticleHtml(page, normalizedHtml);
958
+ if (process.env.DEBUG) {
959
+ console.log(chalk.gray(`[silent] 已完成内容清洗 ${article.id}`));
960
+ }
961
+ const printableHtml = buildPrintableHtml(article.originalTitle || article.title, sanitizedHtml);
791
962
 
792
- selectors.forEach(selector => {
793
- try {
794
- const elements = document.querySelectorAll(selector);
795
- elements.forEach(el => el.remove());
796
- } catch (e) {
797
- // 忽略无效选择器
798
- }
799
- });
963
+ await page.setContent(printableHtml, { waitUntil: 'domcontentloaded' });
964
+ if (process.env.DEBUG) {
965
+ console.log(chalk.gray(`[silent] 已设置页面内容 ${article.id}`));
966
+ }
967
+ try {
968
+ await page.waitForLoadState('networkidle', { timeout: 5000 });
969
+ if (process.env.DEBUG) {
970
+ console.log(chalk.gray(`[silent] networkidle 完成 ${article.id}`));
800
971
  }
801
-
802
- // 额外:删除所有包含"大纲"的元素
803
- const allElements = document.querySelectorAll('*');
804
- allElements.forEach(el => {
805
- const text = el.textContent || el.innerText || '';
806
- if (text.trim() === '大纲' ||
807
- (text.length < 200 && text.includes('大纲') && el.children.length <= 10)) {
808
- el.remove();
809
- }
810
- });
811
- }, article.originalTitle || article.title);
812
-
813
- // 等待文章内容加载
814
- await page.waitForSelector('.Index_articleContent_QBG5G, .content');
972
+ } catch {
973
+ // 忽略由于没有额外资源导致的延时
974
+ if (process.env.DEBUG) {
975
+ console.log(chalk.gray(`[silent] networkidle 超时(已忽略) ${article.id}`));
976
+ }
977
+ }
815
978
 
816
979
  // 优化图片大小:将大图片转换为合适的尺寸,减小PDF体积
980
+ if (process.env.DEBUG) {
981
+ console.log(chalk.gray(`[silent] 开始处理图片 ${article.id}`));
982
+ }
817
983
  await page.evaluate(() => {
818
984
  const images = document.querySelectorAll('img');
819
985
  const promises = Array.from(images).map(img => {
820
986
  return new Promise((resolve) => {
987
+ let resolved = false;
988
+ const safeResolve = () => {
989
+ if (!resolved) {
990
+ resolved = true;
991
+ resolve();
992
+ }
993
+ };
994
+ const attachTimeout = () => setTimeout(safeResolve, 3000);
995
+ let fallbackTimer = null;
996
+
821
997
  // 如果图片还未加载完成,等待加载
822
998
  if (!img.complete) {
823
- img.onload = () => processImage(img, resolve);
824
- img.onerror = () => resolve(); // 图片加载失败,跳过
999
+ fallbackTimer = attachTimeout();
1000
+ img.onload = () => {
1001
+ if (fallbackTimer) clearTimeout(fallbackTimer);
1002
+ processImage(img, safeResolve);
1003
+ };
1004
+ img.onerror = () => {
1005
+ if (fallbackTimer) clearTimeout(fallbackTimer);
1006
+ safeResolve(); // 图片加载失败,跳过
1007
+ };
825
1008
  } else {
826
- processImage(img, resolve);
1009
+ processImage(img, safeResolve);
827
1010
  }
828
1011
  });
829
1012
  });
@@ -851,12 +1034,21 @@ async function downloadArticleSilent(page, article, outputDir, index, total) {
851
1034
  ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
852
1035
 
853
1036
  // 转换为压缩后的data URL
1037
+ let hasResolved = false;
1038
+ const finalize = () => {
1039
+ if (!hasResolved) {
1040
+ hasResolved = true;
1041
+ resolve();
1042
+ }
1043
+ };
854
1044
  canvas.toBlob((blob) => {
855
- const url = URL.createObjectURL(blob);
856
- img.src = url;
1045
+ if (blob) {
1046
+ const url = URL.createObjectURL(blob);
1047
+ img.src = url;
1048
+ }
857
1049
  img.style.width = maxWidth + 'px';
858
1050
  img.style.height = 'auto';
859
- resolve();
1051
+ finalize();
860
1052
  }, 'image/jpeg', quality);
861
1053
  } catch (e) {
862
1054
  // 如果压缩失败,至少限制大小
@@ -868,9 +1060,15 @@ async function downloadArticleSilent(page, article, outputDir, index, total) {
868
1060
 
869
1061
  return Promise.all(promises);
870
1062
  });
1063
+ if (process.env.DEBUG) {
1064
+ console.log(chalk.gray(`[silent] 图片处理完成 ${article.id}`));
1065
+ }
871
1066
 
872
1067
  // 等待图片处理完成
873
1068
  await page.waitForTimeout(1000);
1069
+ if (process.env.DEBUG) {
1070
+ console.log(chalk.gray(`[silent] 已准备生成PDF ${article.id}`));
1071
+ }
874
1072
 
875
1073
  // 生成 PDF
876
1074
  const filename = `${String(index).padStart(3, '0')}_${article.title}.pdf`;
@@ -888,10 +1086,16 @@ async function downloadArticleSilent(page, article, outputDir, index, total) {
888
1086
  printBackground: false, // 关闭背景打印,显著减小文件大小
889
1087
  preferCSSPageSize: false
890
1088
  });
1089
+ if (process.env.DEBUG) {
1090
+ console.log(chalk.gray(`[silent] PDF生成完成 ${article.id}`));
1091
+ }
891
1092
 
892
1093
  return { success: true, title: article.title };
893
1094
 
894
1095
  } catch (error) {
1096
+ if (process.env.DEBUG) {
1097
+ console.log(chalk.red(`[silent] 文章 ${article.id} 失败: ${error.message}`));
1098
+ }
895
1099
  return { success: false, title: article.title, error: error.message };
896
1100
  }
897
1101
  }
@@ -901,116 +1105,17 @@ async function downloadArticle(page, article, outputDir, index, total) {
901
1105
  const spinner = ora(`[${index}/${total}] 正在下载: ${article.title}`).start();
902
1106
 
903
1107
  try {
904
- // 访问文章页面
905
- await page.goto(article.url, { waitUntil: 'networkidle' });
906
- await page.waitForTimeout(2000);
907
-
908
- // 注入打印修复样式
909
- await page.addStyleTag({ content: PRINT_FIX_CSS });
910
-
911
- // 激进的布局重构:提取正文并重建页面结构
912
- await page.evaluate((titleText) => {
913
- // 1. 找到文章正文内容
914
- const articleContent = document.querySelector('.Index_articleContent_QBG5G, .article-content, article, [class*="articleContent"]');
915
-
916
- if (articleContent) {
917
- // 2. 克隆正文内容
918
- const contentClone = articleContent.cloneNode(true);
919
-
920
- // 3. 清空body的所有内容
921
- document.body.innerHTML = '';
922
-
923
- // 4. 重置body样式为全宽
924
- document.body.style.margin = '0';
925
- document.body.style.padding = '0';
926
- document.body.style.width = '100%';
927
- document.body.style.maxWidth = 'none';
928
- document.body.style.boxSizing = 'border-box';
929
-
930
- // 5. 创建一个简单的容器
931
- const wrapper = document.createElement('div');
932
- wrapper.style.width = '100%';
933
- wrapper.style.maxWidth = '100%';
934
- wrapper.style.margin = '0';
935
- wrapper.style.padding = '0';
936
- wrapper.style.boxSizing = 'border-box';
937
-
938
- // 6. 创建标题元素(使用传入的标题文本)
939
- if (titleText) {
940
- const titleElement = document.createElement('h1');
941
- titleElement.textContent = titleText;
942
- // 设置标题样式
943
- titleElement.style.fontSize = '32px';
944
- titleElement.style.fontWeight = 'bold';
945
- titleElement.style.marginBottom = '30px';
946
- titleElement.style.marginTop = '0';
947
- titleElement.style.lineHeight = '1.4';
948
- titleElement.style.color = '#000';
949
- wrapper.appendChild(titleElement);
950
- }
951
-
952
- // 7. 将正文插入容器
953
- wrapper.appendChild(contentClone);
1108
+ const articleData = await fetchArticleData(page.context(), article.id);
1109
+ const normalizedHtml = normalizeArticleHtml(articleData.article_content || '');
1110
+ const sanitizedHtml = await sanitizeArticleHtml(page, normalizedHtml);
1111
+ const printableHtml = buildPrintableHtml(article.originalTitle || article.title, sanitizedHtml);
954
1112
 
955
- // 8. 将容器插入body
956
- document.body.appendChild(wrapper);
957
-
958
- // 9. 确保正文内容使用全宽且不溢出
959
- contentClone.style.width = '100%';
960
- contentClone.style.maxWidth = '100%';
961
- contentClone.style.margin = '0';
962
- contentClone.style.padding = '0';
963
- contentClone.style.boxSizing = 'border-box';
964
- contentClone.style.overflowWrap = 'break-word';
965
- contentClone.style.wordBreak = 'break-word';
966
- } else {
967
- // 如果找不到正文,使用原有的删除方法
968
- const selectors = [
969
- 'aside',
970
- '[class*="leftSide"]',
971
- '[class*="LeftSide"]',
972
- '[class*="sidebar"]',
973
- '[class*="Sidebar"]',
974
- '[class*="side_"]',
975
- '[class*="catalog"]',
976
- '[class*="directory"]',
977
- '[class*="toc"]',
978
- '[class*="outline"]',
979
- '[class*="Outline"]',
980
- 'nav',
981
- '[class*="nav"]',
982
- '[class*="Nav"]',
983
- '[class*="rightSide"]',
984
- '[class*="RightSide"]',
985
- '[class*="comment"]',
986
- '[class*="recommend"]',
987
- '[class*="footer"]',
988
- '[class*="bottom"]'
989
- ];
990
-
991
- selectors.forEach(selector => {
992
- try {
993
- const elements = document.querySelectorAll(selector);
994
- elements.forEach(el => el.remove());
995
- } catch (e) {
996
- // 忽略无效选择器
997
- }
998
- });
999
- }
1000
-
1001
- // 额外:删除所有包含"大纲"的元素
1002
- const allElements = document.querySelectorAll('*');
1003
- allElements.forEach(el => {
1004
- const text = el.textContent || el.innerText || '';
1005
- if (text.trim() === '大纲' ||
1006
- (text.length < 200 && text.includes('大纲') && el.children.length <= 10)) {
1007
- el.remove();
1008
- }
1009
- });
1010
- }, article.originalTitle || article.title);
1011
-
1012
- // 等待文章内容加载
1013
- await page.waitForSelector('.Index_articleContent_QBG5G, .content');
1113
+ await page.setContent(printableHtml, { waitUntil: 'domcontentloaded' });
1114
+ try {
1115
+ await page.waitForLoadState('networkidle', { timeout: 5000 });
1116
+ } catch {
1117
+ // 没有额外资源加载时忽略
1118
+ }
1014
1119
 
1015
1120
  // 优化图片大小:将大图片转换为合适的尺寸,减小PDF体积
1016
1121
  await page.evaluate(() => {
@@ -1209,460 +1314,32 @@ async function mergePDFs(outputDir, columnTitle, articles, deleteAfterMerge = fa
1209
1314
  // 提取单篇文章的 HTML 内容(用于 EPUB 生成)
1210
1315
  async function extractArticleContent(page, article, index, total) {
1211
1316
  try {
1212
- // 访问文章页面
1213
- await page.goto(article.url, { waitUntil: 'networkidle' });
1214
-
1215
- // 等待文章内容加载
1216
- await page.waitForSelector('.Index_articleContent_QBG5G, .content', { timeout: 60000 });
1217
-
1218
- // 关键:等待文章完整内容加载,而不是试看内容
1219
- // 滚动页面以触发懒加载内容
1220
- await page.evaluate(async () => {
1221
- await new Promise((resolve) => {
1222
- let totalHeight = 0;
1223
- const distance = 100;
1224
- const timer = setInterval(() => {
1225
- const scrollHeight = document.body.scrollHeight;
1226
- window.scrollBy(0, distance);
1227
- totalHeight += distance;
1228
-
1229
- if (totalHeight >= scrollHeight) {
1230
- clearInterval(timer);
1231
- resolve();
1232
- }
1233
- }, 100);
1234
- });
1235
- });
1236
-
1237
- // 再等待一段时间,确保内容完全加载
1238
- await page.waitForTimeout(3000);
1239
-
1240
- // 提取文章 HTML 内容
1241
- const content = await page.evaluate(() => {
1242
- // 找到文章正文内容
1243
- const articleContent = document.querySelector('.Index_articleContent_QBG5G, .article-content, article, [class*="articleContent"]');
1244
-
1245
- if (!articleContent) {
1246
- return null;
1247
- }
1248
-
1249
- // 克隆正文以避免修改原始DOM
1250
- const contentClone = articleContent.cloneNode(true);
1251
-
1252
- // 白名单策略:只保留正文核心元素
1253
- // 允许的元素标签
1254
- const allowedTags = new Set([
1255
- 'P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', // 段落和标题
1256
- 'UL', 'OL', 'LI', // 列表
1257
- 'BLOCKQUOTE', // 引用
1258
- 'PRE', 'CODE', // 代码
1259
- 'IMG', // 图片
1260
- 'TABLE', 'THEAD', 'TBODY', 'TR', 'TH', 'TD', // 表格
1261
- 'A', // 链接
1262
- 'STRONG', 'B', 'EM', 'I', 'U', // 强调和样式
1263
- 'BR', 'HR', // 换行和分隔线
1264
- 'FIGURE', 'FIGCAPTION', 'DETAILS', 'SUMMARY',
1265
- 'SPAN', 'DIV', 'SECTION', 'ARTICLE' // 容器(可能包含文本)
1266
- ]);
1267
-
1268
- // 在清理前,移除常见的非正文区域
1269
- const removalSelectors = [
1270
- 'nav', 'header', 'footer', 'aside',
1271
- '.comment', '.comments', '.Index_comment',
1272
- '.recommend', '.recommendation', '.related', '.advertisement', '.ad', '.banner',
1273
- '.subscribe', '.subscription', '.toolbar', '.Index_shareIcons_1vtJa',
1274
- '.keyboard-wrapper', '.app-download', '.article-actions', '.article-bottom',
1275
- '.note', '.notes', '.annotation', '.translation', '.trans', '.translator',
1276
- '.audio', '.audio-player', '.voice', '.player', '.geek-player', '.podcast', '.radio',
1277
- '.reward', '.appreciate', '.appreciation', '.donate', '.sponsor', '.thanks', '.support',
1278
- '.qrcode', '.qr-code', '.qr', '.promotion', '.promo', '.ad-banner',
1279
- '.copyright', '.statement', '.disclaimer',
1280
- '.app-download-banner', '.article-plugin', '.article-notification', '.float-bar',
1281
- 'audio', 'video',
1282
- '[class*="Note"]', '[class*="note"]', '[class*="Translation"]', '[class*="translation"]',
1283
- '[class*="Audio"]', '[class*="audio"]', '[class*="Reward"]', '[class*="reward"]',
1284
- '[data-plugin]', '[data-track]', '[data-track-section]', '[data-translation]', '[data-audio]',
1285
- '[data-role="toolbar"]',
1286
- 'button', 'iframe', 'script', 'style'
1287
- ];
1288
- removalSelectors.forEach(selector => {
1289
- contentClone.querySelectorAll(selector).forEach(el => el.remove());
1290
- });
1291
-
1292
- // 根据关键词进一步移除插件类元素
1293
- const pluginKeywords = [
1294
- 'note', 'translation', 'audio', 'player', 'reward', 'donate',
1295
- 'appreciation', 'sponsor', 'qrcode', 'toolbar', 'plugin',
1296
- 'copyright', 'geeknote', 'bilingual'
1297
- ];
1298
- const pluginElements = Array.from(contentClone.querySelectorAll('*')).filter(el => {
1299
- const className = (el.className || '').toString().toLowerCase();
1300
- const idValue = (el.id || '').toString().toLowerCase();
1301
- const roleValue = (el.getAttribute && el.getAttribute('role')) ? el.getAttribute('role').toLowerCase() : '';
1302
- const datasetValues = el.dataset ? Object.values(el.dataset).join(' ').toLowerCase() : '';
1303
- const combined = `${className} ${idValue} ${roleValue} ${datasetValues}`;
1304
- return pluginKeywords.some(keyword => combined.includes(keyword));
1305
- });
1306
- pluginElements.forEach(el => el.remove());
1307
-
1308
- // 移除 MindMap 等 SVG/Canvas 思维导图内容(阅读器无法正确渲染)
1309
- const mindmapSelectors = [
1310
- '.mindmap', '.mind-map', '.MindMap', '.Mind-map',
1311
- '[data-type="mindmap"]', '[data-role="mindmap"]', '[data-widget="mindmap"]',
1312
- '[class*="MindMap"]', '[class*="mindMap"]'
1313
- ];
1314
- mindmapSelectors.forEach(selector => {
1315
- contentClone.querySelectorAll(selector).forEach(el => el.remove());
1316
- });
1317
- const vectorCandidates = Array.from(contentClone.querySelectorAll('svg, canvas, object, embed'));
1318
- vectorCandidates.forEach(el => {
1319
- const className = typeof el.className === 'object' ? el.className.baseVal : (el.className || '');
1320
- const meta = `${className} ${el.id || ''} ${el.getAttribute('data-type') || ''}`.toLowerCase();
1321
- if (meta.includes('mind') || meta.includes('mindmap') || meta.includes('mind-map')) {
1322
- el.remove();
1323
- }
1324
- });
1325
-
1326
- // 将富文本中的代码块结构转换为标准 <pre><code>
1327
- const blockSeparatorTags = new Set([
1328
- 'P','DIV','SECTION','ARTICLE','UL','OL','LI','FIGURE','FIGCAPTION',
1329
- 'TABLE','THEAD','TBODY','TR','TD'
1330
- ]);
1331
-
1332
- function collectCodeText(node) {
1333
- const parts = [];
1334
-
1335
- const ensureNewline = () => {
1336
- if (!parts.length) {
1337
- parts.push('\n');
1338
- return;
1339
- }
1340
- if (!parts[parts.length - 1].endsWith('\n')) {
1341
- parts.push('\n');
1342
- }
1343
- };
1344
-
1345
- const traverse = (current) => {
1346
- if (!current) {
1347
- return;
1348
- }
1349
- if (current.nodeType === Node.TEXT_NODE) {
1350
- const textValue = current.textContent.replace(/\u00A0/g, ' ');
1351
- if (textValue) {
1352
- parts.push(textValue);
1353
- }
1354
- return;
1355
- }
1356
- if (current.nodeType !== Node.ELEMENT_NODE) {
1357
- return;
1358
- }
1359
- const tag = current.tagName.toUpperCase();
1360
- if (tag === 'BR') {
1361
- ensureNewline();
1362
- return;
1363
- }
1364
- Array.from(current.childNodes).forEach(traverse);
1365
- if (blockSeparatorTags.has(tag)) {
1366
- ensureNewline();
1367
- }
1368
- };
1369
-
1370
- traverse(node);
1371
- let text = parts.join('');
1372
- text = text
1373
- .replace(/\r\n/g, '\n')
1374
- .replace(/\n{3,}/g, '\n\n')
1375
- .replace(/[ \t]+\n/g, '\n')
1376
- .replace(/\n+$/g, '\n');
1377
- return text.trim() ? text : '';
1378
- }
1379
-
1380
- const codeLikeSelectors = [
1381
- '[data-slate-type="code"]',
1382
- '[data-slate-node="code"]',
1383
- '[data-code-block]',
1384
- '[data-code]',
1385
- '[data-code-language]',
1386
- '[class*="code-block"]',
1387
- '[class*="CodeBlock"]'
1388
- ];
1389
- const codeCandidates = new Set();
1390
- codeLikeSelectors.forEach(selector => {
1391
- contentClone.querySelectorAll(selector).forEach(el => codeCandidates.add(el));
1392
- });
1393
- const replaceWithPre = (element) => {
1394
- if (!element || !element.parentNode) {
1395
- return;
1396
- }
1397
- const codeText = collectCodeText(element);
1398
- if (!codeText) {
1399
- element.remove();
1400
- return;
1401
- }
1402
- const pre = document.createElement('pre');
1403
- const code = document.createElement('code');
1404
- code.textContent = codeText;
1405
- pre.appendChild(code);
1406
- element.parentNode.replaceChild(pre, element);
1407
- };
1408
- codeCandidates.forEach(el => {
1409
- if (el.tagName && el.tagName.toUpperCase() === 'PRE') {
1410
- return;
1411
- }
1412
- replaceWithPre(el);
1413
- });
1414
-
1415
- const multilineInlineCodes = Array.from(contentClone.querySelectorAll('code')).filter(codeEl => {
1416
- const parent = codeEl.parentElement;
1417
- return parent && parent.tagName.toUpperCase() !== 'PRE' && codeEl.textContent.includes('\n');
1418
- });
1419
- multilineInlineCodes.forEach(codeEl => {
1420
- const codeText = collectCodeText(codeEl);
1421
- if (!codeText) {
1422
- codeEl.remove();
1423
- return;
1424
- }
1425
- const pre = document.createElement('pre');
1426
- const innerCode = document.createElement('code');
1427
- innerCode.textContent = codeText;
1428
- pre.appendChild(innerCode);
1429
- codeEl.parentNode.replaceChild(pre, codeEl);
1430
- });
1431
-
1432
- // 递归清理函数:移除不在白名单中的元素
1433
- function cleanElement(element) {
1434
- const children = Array.from(element.childNodes);
1435
-
1436
- for (const child of children) {
1437
- if (child.nodeType === Node.ELEMENT_NODE) {
1438
- const tagName = child.tagName.toUpperCase();
1439
-
1440
- if (!allowedTags.has(tagName)) {
1441
- // 先递归处理子节点
1442
- cleanElement(child);
1443
-
1444
- if (child.childNodes.length > 0) {
1445
- while (child.firstChild) {
1446
- element.insertBefore(child.firstChild, child);
1447
- }
1448
- child.remove();
1449
- } else {
1450
- const textContent = (child.textContent || '').trim();
1451
- if (textContent) {
1452
- const textNode = document.createTextNode(textContent + ' ');
1453
- element.insertBefore(textNode, child);
1454
- }
1455
- child.remove();
1456
- }
1457
- } else {
1458
- cleanElement(child);
1459
- }
1460
- }
1461
- }
1462
- }
1463
-
1464
- cleanElement(contentClone);
1465
-
1466
- // 移除所有style属性,避免样式冲突
1467
- const allElements = contentClone.querySelectorAll('*');
1468
- allElements.forEach(el => {
1469
- el.removeAttribute('style');
1470
- el.removeAttribute('class');
1471
- el.removeAttribute('id');
1472
- el.removeAttribute('onclick');
1473
- el.removeAttribute('onload');
1474
- });
1475
-
1476
- // 处理图片URL
1477
- const images = contentClone.querySelectorAll('img');
1478
- const adKeywordLower = ['ad', 'advert', 'banner', 'qrcode', 'qr-code', 'reward', 'donate', 'appdownload', 'app-download', 'sponsor', 'thanks'];
1479
- const adKeywordCn = ['广告', '二维码', '赞赏', '打赏', '版权', '推广'];
1480
- images.forEach(img => {
1481
- let src = img.getAttribute('src');
1482
- const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') || img.getAttribute('data-lazy-src');
1483
-
1484
- if (dataSrc && (dataSrc.startsWith('http://') || dataSrc.startsWith('https://'))) {
1485
- src = dataSrc;
1486
- img.setAttribute('src', src);
1487
- }
1488
-
1489
- if (!src || src.startsWith('blob:') || src.startsWith('data:')) {
1490
- img.remove();
1491
- return;
1492
- }
1493
-
1494
- if (!src.startsWith('http://') && !src.startsWith('https://')) {
1495
- try {
1496
- const absoluteUrl = new URL(src, window.location.href).href;
1497
- img.setAttribute('src', absoluteUrl);
1498
- src = absoluteUrl;
1499
- } catch (e) {
1500
- img.remove();
1501
- }
1502
- }
1317
+ const articleData = await fetchArticleData(page.context(), article.id);
1318
+ const normalizedHtml = normalizeArticleHtml(articleData.article_content || '');
1319
+ const sanitizedHtml = await sanitizeArticleHtml(page, normalizedHtml);
1503
1320
 
1504
- const altText = img.getAttribute('alt') || '';
1505
- const altLower = altText.toLowerCase();
1506
- const srcLower = (src || '').toLowerCase();
1507
- if (
1508
- adKeywordLower.some(keyword => srcLower.includes(keyword)) ||
1509
- adKeywordLower.some(keyword => altLower.includes(keyword)) ||
1510
- adKeywordCn.some(keyword => altText.includes(keyword))
1511
- ) {
1512
- img.remove();
1513
- return;
1514
- }
1515
-
1516
- // 清理图片属性
1517
- const imgAttrs = img.attributes;
1518
- for (let i = imgAttrs.length - 1; i >= 0; i--) {
1519
- const attrName = imgAttrs[i].name;
1520
- if (attrName !== 'src' && attrName !== 'alt') {
1521
- img.removeAttribute(attrName);
1522
- }
1523
- }
1524
- });
1525
-
1526
- // 清理空的div和span
1527
- const containers = contentClone.querySelectorAll('div, span');
1528
- containers.forEach(container => {
1529
- if (!container.textContent.trim() && !container.querySelector('img, pre, code, table')) {
1530
- container.remove();
1531
- }
1532
- });
1533
-
1534
- // 将只包含纯文本的 div 转换为段落,避免没有段间距
1535
- const blockLikeTags = new Set(['P','UL','OL','LI','TABLE','PRE','BLOCKQUOTE','H1','H2','H3','H4','H5','H6','IMG','SECTION','ARTICLE','FIGURE','FIGCAPTION','DETAILS','SUMMARY']);
1536
- const textContainers = Array.from(contentClone.querySelectorAll('div, section, article')).reverse();
1537
- textContainers.forEach(container => {
1538
- if (container === contentClone) {
1539
- return;
1540
- }
1541
-
1542
- if (!container.textContent.trim()) {
1543
- return;
1544
- }
1545
-
1546
- if (container.querySelector('img, pre, table, ul, ol, blockquote, h1, h2, h3, h4, h5, h6, figure')) {
1547
- return;
1548
- }
1549
-
1550
- const hasBlockChildren = Array.from(container.children).some(child => blockLikeTags.has(child.tagName?.toUpperCase()));
1551
- if (hasBlockChildren) {
1552
- return;
1553
- }
1554
-
1555
- const paragraph = document.createElement('p');
1556
- paragraph.innerHTML = container.innerHTML;
1557
- container.parentNode.replaceChild(paragraph, container);
1558
- });
1559
-
1560
- // 包装直接挂在容器下的文本或行内节点,避免散乱文本没有段落间距
1561
- const inlineTags = new Set(['A','SPAN','STRONG','B','EM','I','U','CODE','SMALL','SUB','SUP','MARK']);
1562
-
1563
- function wrapInlineChildren(element) {
1564
- const tagName = element.tagName ? element.tagName.toUpperCase() : '';
1565
- if (['P','LI','PRE','CODE','TABLE','THEAD','TBODY','TR'].includes(tagName)) {
1566
- return;
1567
- }
1568
-
1569
- const childNodes = Array.from(element.childNodes);
1570
- let buffer = [];
1571
-
1572
- const flushBuffer = (referenceNode) => {
1573
- if (!buffer.length) {
1574
- return;
1575
- }
1576
- const paragraph = document.createElement('p');
1577
- buffer.forEach(node => paragraph.appendChild(node));
1578
- element.insertBefore(paragraph, referenceNode);
1579
- buffer = [];
1580
- };
1581
-
1582
- for (const node of childNodes) {
1583
- if (node.nodeType === Node.TEXT_NODE) {
1584
- if (node.textContent.trim()) {
1585
- buffer.push(node);
1586
- } else {
1587
- element.removeChild(node);
1588
- }
1589
- continue;
1590
- }
1591
-
1592
- if (node.nodeType === Node.ELEMENT_NODE) {
1593
- const childTag = node.tagName.toUpperCase();
1594
- if (inlineTags.has(childTag) || childTag === 'BR') {
1595
- buffer.push(node);
1596
- continue;
1597
- }
1598
-
1599
- flushBuffer(node);
1600
- wrapInlineChildren(node);
1601
- continue;
1602
- }
1603
-
1604
- flushBuffer(node);
1605
- }
1606
-
1607
- flushBuffer(null);
1608
- }
1609
-
1610
- wrapInlineChildren(contentClone);
1611
-
1612
- // 移除尾部的版权/广告声明
1613
- const footerKeywords = ['版权', '未经许可', '未经授权', '不得转载', '未经允许', 'All Rights Reserved', '最终解释权', '转载'];
1614
- const trailingElements = Array.from(contentClone.querySelectorAll('p, div, section')).slice(-6);
1615
- trailingElements.forEach(el => {
1616
- const text = (el.textContent || '').trim();
1617
- if (!text) {
1618
- return;
1619
- }
1620
- if (text.length <= 200 && footerKeywords.some(keyword => text.includes(keyword))) {
1621
- el.remove();
1622
- }
1623
- });
1624
-
1625
- // 处理代码块
1626
- const codeBlocks = contentClone.querySelectorAll('pre');
1627
- codeBlocks.forEach(block => {
1628
- const codeText = collectCodeText(block);
1629
- if (!codeText) {
1630
- block.remove();
1631
- return;
1632
- }
1633
- let codeInside = block.querySelector('code');
1634
- if (!codeInside) {
1635
- codeInside = document.createElement('code');
1636
- block.appendChild(codeInside);
1637
- }
1638
- codeInside.textContent = codeText;
1639
- });
1640
-
1641
- return contentClone.innerHTML;
1642
- });
1321
+ if (!sanitizedHtml) {
1322
+ throw new Error('未能提取到文章内容');
1323
+ }
1643
1324
 
1644
1325
  return {
1645
1326
  success: true,
1646
1327
  title: article.originalTitle || article.title,
1647
- content: content || `<p>内容提取失败</p>`
1328
+ content: sanitizedHtml
1648
1329
  };
1649
1330
 
1650
1331
  } catch (error) {
1651
- // 判断是否可能是 Cookie 失效
1652
- let errorMessage = error.message;
1653
- if (error.message.includes('Timeout') || error.message.includes('timeout')) {
1654
- errorMessage = 'Cookie 可能已失效或页面加载超时';
1655
- }
1656
-
1332
+ console.error(`[${index}/${total}] 提取文章内容失败: ${article.originalTitle || article.title}`, error);
1657
1333
  return {
1658
1334
  success: false,
1659
1335
  title: article.originalTitle || article.title,
1660
- content: `<p>下载失败: ${errorMessage}</p>`,
1661
- error: errorMessage
1336
+ error: error.message,
1337
+ content: ''
1662
1338
  };
1663
1339
  }
1664
1340
  }
1665
1341
 
1342
+
1666
1343
  // 并发提取文章内容(用于 EPUB)
1667
1344
  async function extractWithConcurrency(context, articles, concurrency = 5, delay = 2000, timeout = 60000) {
1668
1345
  const results = [];
@@ -1769,7 +1446,7 @@ async function generateEPUB(outputDir, columnTitle, columnAuthor, articles, cont
1769
1446
  return null;
1770
1447
  }
1771
1448
 
1772
- const options = {
1449
+ const options = {
1773
1450
  title: columnTitle,
1774
1451
  author: columnAuthor || '极客时间',
1775
1452
  publisher: '极客时间',
@@ -2029,13 +1706,46 @@ async function main(options) {
2029
1706
  globalBrowser = browser;
2030
1707
 
2031
1708
  const context = await browser.newContext({
2032
- userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
1709
+ userAgent: DEFAULT_USER_AGENT
2033
1710
  });
2034
1711
 
1712
+ // 兼容用户直接复制整行"Cookie: xxx"
1713
+ let normalizedCookie = cookie.trim();
1714
+ if (/^cookie:/i.test(normalizedCookie)) {
1715
+ normalizedCookie = normalizedCookie.replace(/^cookie:\s*/i, '');
1716
+ }
1717
+ globalCookieHeader = normalizedCookie;
1718
+
2035
1719
  // 设置 cookies
2036
- const cookies = parseCookies(cookie);
1720
+ const cookies = parseCookies(normalizedCookie);
2037
1721
  await context.addCookies(cookies);
2038
1722
 
1723
+ // 确保所有极客时间域名的请求都携带原始Cookie串,避免Playwright丢失关键字段
1724
+ await context.route('**/*', (route) => {
1725
+ const request = route.request();
1726
+ let url;
1727
+ try {
1728
+ url = new URL(request.url());
1729
+ } catch {
1730
+ return route.continue();
1731
+ }
1732
+
1733
+ const hostname = url.hostname || '';
1734
+ const isGeekbangDomain =
1735
+ hostname === 'geekbang.org' ||
1736
+ hostname.endsWith('.geekbang.org');
1737
+
1738
+ if (!isGeekbangDomain) {
1739
+ return route.continue();
1740
+ }
1741
+
1742
+ const headers = {
1743
+ ...request.headers(),
1744
+ cookie: normalizedCookie
1745
+ };
1746
+ route.continue({ headers });
1747
+ });
1748
+
2039
1749
  const page = await context.newPage();
2040
1750
 
2041
1751
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kadaliao/geektime-downloader",
3
- "version": "1.1.2",
3
+ "version": "1.1.3",
4
4
  "description": "极客时间专栏文章批量下载工具 - 支持一键下载整个专栏为PDF或EPUB",
5
5
  "type": "module",
6
6
  "main": "download.js",