tt-help-cli-ycl 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +17 -17
  2. package/cli.js +9 -9
  3. package/package.json +44 -44
  4. package/src/cli/auto.js +1 -1
  5. package/src/cli/explore.js +1 -1
  6. package/src/cli/progress.js +111 -111
  7. package/src/cli/scrape.js +47 -47
  8. package/src/cli/utils.js +18 -18
  9. package/src/cli/videos.js +41 -41
  10. package/src/cli/watch.js +28 -28
  11. package/src/lib/args.js +385 -377
  12. package/src/lib/browser/anti-detect.js +23 -23
  13. package/src/lib/browser/cdp.js +142 -142
  14. package/src/lib/browser/launch.js +43 -43
  15. package/src/lib/browser/page.js +80 -62
  16. package/src/lib/constants.js +84 -85
  17. package/src/lib/delay.js +54 -54
  18. package/src/lib/{explore.js → explore-fetch.js} +118 -118
  19. package/src/lib/fetcher.js +45 -45
  20. package/src/lib/filter.js +66 -66
  21. package/src/lib/io.js +54 -54
  22. package/src/lib/output.js +80 -80
  23. package/src/lib/parser.js +47 -47
  24. package/src/lib/retry.js +44 -44
  25. package/src/lib/scrape.js +40 -40
  26. package/src/lib/url.js +52 -52
  27. package/src/main.mjs +200 -200
  28. package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
  29. package/src/{auto-core.mjs → scraper/auto-core.mjs} +183 -174
  30. package/src/scraper/core.mjs +188 -182
  31. package/src/{explore-core.mjs → scraper/explore-core.mjs} +159 -148
  32. package/src/scraper/modules/captcha-handler.mjs +114 -0
  33. package/src/scraper/modules/comment-extractor.mjs +69 -57
  34. package/src/scraper/modules/follow-extractor.mjs +121 -121
  35. package/src/scraper/modules/guess-extractor.mjs +51 -51
  36. package/src/scraper/modules/page-error-detector.mjs +70 -68
  37. package/src/scraper/modules/page-helpers.mjs +46 -44
  38. package/src/scraper/modules/scroll-collector.mjs +189 -189
  39. package/src/{get-user-videos-core.mjs → videos/core.mjs} +126 -126
  40. package/src/{data-store.mjs → watch/data-store.mjs} +29 -3
  41. package/src/watch/public/index.html +444 -344
  42. package/src/watch/server.mjs +24 -1
  43. package/src/lib/auto-browser.mjs +0 -6
  44. package/src/lib/get-user-videos-browser.mjs +0 -1
  45. package/src/lib/scrape-browser.mjs +0 -1
  46. package/src/test-auto-follow.cjs +0 -109
  47. package/src/test-extractors.cjs +0 -75
  48. package/src/test-follow.cjs +0 -41
@@ -1,86 +1,85 @@
1
- import { join, dirname } from 'path';
2
- import { readFileSync, writeFileSync, existsSync } from 'fs';
3
- import { fileURLToPath } from 'url';
4
- import { USER_SECTION_SIZE } from './parser.js';
5
-
6
- const __filename = fileURLToPath(import.meta.url);
7
- const __dirname = dirname(__filename);
8
- const homeDir = process.env.HOME || process.env.USERPROFILE || '';
9
- const configPath = join(homeDir, '.tt-help.json');
10
-
11
- const DEFAULT_PROXY = 'http://127.0.0.1:7897';
12
- const DEFAULT_OUTPUT = 'tiktok_data.json';
13
-
14
- let proxy = DEFAULT_PROXY;
15
- let configFile = null;
16
- let browser = null;
17
-
18
- try {
19
- if (existsSync(configPath)) {
20
- const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
21
- if (cfg.proxy) {
22
- proxy = cfg.proxy;
23
- }
24
- if (cfg.browser) {
25
- browser = cfg.browser;
26
- }
27
- configFile = configPath;
28
- }
29
- } catch {
30
- // no config file
31
- }
32
-
33
- function saveBrowser(path) {
34
- const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
35
- cfg.browser = path;
36
- writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
37
- browser = path;
38
- configFile = configPath;
39
- }
40
-
41
- const HELP_TEXT = [
42
- '用法: tt-help explore <用户名> [preset] [选项]',
43
- '',
44
- ' 支持多个用户名: tt-help explore @user1 @user2 --server http://127.0.0.1:3000',
45
- ' 预设: fast, normal(默认), slow, stealth',
46
- ' 选项:',
47
- ' --server <URL> 服务端地址,默认 http://127.0.0.1:3000',
48
- ' --location <国家代码> 国家筛选,默认 ES',
49
- ' --max-comments <数量> 每视频最大评论数,默认 10',
50
- ' --max-guess <数量> 每视频最大猜你喜欢数,默认 0',
51
- ' --enable-follow 启用关注/粉丝提取(默认启用)',
52
- ' --disable-follow 禁用关注/粉丝提取',
53
- ' --max-following <数量> 最大获取关注数,默认 5',
54
- ' --max-followers <数量> 最大获取粉丝数,默认 5',
55
- ' --max-users <数量> 最大处理用户数,默认无限制',
56
- ' --proxy <地址> 临时指定代理地址',
57
- ' -o, --output <file> 指定输出文件(默认: tiktok_data.json)',
58
- ' -h, --help 显示帮助',
59
- ' --version 显示版本号',
60
- '',
61
- ' 示例: tt-help explore qiqi23280 fast --location ES --max-comments 50',
1
+ import { join, dirname } from 'path';
2
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
3
+ import { fileURLToPath } from 'url';
4
+ import { USER_SECTION_SIZE } from './parser.js';
5
+
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = dirname(__filename);
8
+ const homeDir = process.env.HOME || process.env.USERPROFILE || '';
9
+ const configPath = join(homeDir, '.tt-help.json');
10
+
11
+ const DEFAULT_PROXY = 'http://127.0.0.1:7897';
12
+ const DEFAULT_OUTPUT = 'tiktok_data.json';
13
+
14
+ let proxy = DEFAULT_PROXY;
15
+ let configFile = null;
16
+ let browser = null;
17
+
18
+ try {
19
+ if (existsSync(configPath)) {
20
+ const cfg = JSON.parse(readFileSync(configPath, 'utf-8'));
21
+ if (cfg.proxy) {
22
+ proxy = cfg.proxy;
23
+ }
24
+ if (cfg.browser) {
25
+ browser = cfg.browser;
26
+ }
27
+ configFile = configPath;
28
+ }
29
+ } catch {
30
+ // no config file
31
+ }
32
+
33
+ function saveBrowser(path) {
34
+ const cfg = existsSync(configPath) ? JSON.parse(readFileSync(configPath, 'utf-8')) : {};
35
+ cfg.browser = path;
36
+ writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf-8');
37
+ browser = path;
38
+ configFile = configPath;
39
+ }
40
+
41
+ const HELP_TEXT = [
42
+ '用法: tt-help explore <用户名> [preset] [选项]',
43
+ '',
44
+ ' 支持多个用户名: tt-help explore @user1 @user2 --server http://127.0.0.1:3000',
45
+ ' 预设: fast, normal(默认), slow, stealth',
46
+ ' 选项:',
47
+ ' --server <URL> 服务端地址,默认 http://127.0.0.1:3000',
48
+ ' --location <国家代码> 国家筛选,默认 ES',
49
+ ' --max-comments <数量> 每视频最大评论数,默认 10',
50
+ ' --max-guess <数量> 每视频最大猜你喜欢数,默认 0',
51
+ ' --enable-follow 启用关注/粉丝提取(默认启用)',
52
+ ' --disable-follow 禁用关注/粉丝提取',
53
+ ' --max-following <数量> 最大获取关注数,默认 5',
54
+ ' --max-followers <数量> 最大获取粉丝数,默认 5',
55
+ ' --max-users <数量> 最大处理用户数,默认无限制',
56
+ ' 全局选项:',
57
+ ' -h, --help 显示帮助',
58
+ ' --version 显示版本号',
59
+ '',
60
+ ' 示例: tt-help explore qiqi23280 fast --location ES --max-comments 50',
62
61
  ];
63
-
64
- const CONFIG_TEXT = [
65
- 'tt-help v1.0.1',
66
- '',
67
- '配置:',
68
- ` 代理: ${proxy}`,
69
- ` 浏览器: ${browser || '未配置(将自动探测或回退)'}`,
70
- ` 输出格式: json`,
71
- ` 默认输出: ${DEFAULT_OUTPUT}`,
72
- ` 配置文件: ${configFile || '无(使用默认值)'}`,
73
- ];
74
-
75
- export {
76
- proxy,
77
- configFile,
78
- configPath,
79
- DEFAULT_PROXY,
80
- DEFAULT_OUTPUT,
81
- USER_SECTION_SIZE,
82
- HELP_TEXT,
83
- CONFIG_TEXT,
84
- browser,
85
- saveBrowser,
86
- };
62
+
63
+ const CONFIG_TEXT = [
64
+ 'tt-help v1.0.1',
65
+ '',
66
+ '配置:',
67
+ ` 代理: ${proxy}`,
68
+ ` 浏览器: ${browser || '未配置(将自动探测或回退)'}`,
69
+ ` 输出格式: json`,
70
+ ` 默认输出: ${DEFAULT_OUTPUT}`,
71
+ ` 配置文件: ${configFile || '无(使用默认值)'}`,
72
+ ];
73
+
74
+ export {
75
+ proxy,
76
+ configFile,
77
+ configPath,
78
+ DEFAULT_PROXY,
79
+ DEFAULT_OUTPUT,
80
+ USER_SECTION_SIZE,
81
+ HELP_TEXT,
82
+ CONFIG_TEXT,
83
+ browser,
84
+ saveBrowser,
85
+ };
package/src/lib/delay.js CHANGED
@@ -1,54 +1,54 @@
1
- export const DELAY_PRESETS = {
2
- fast: { switchMax: 300, commentMax: 200, fast: true },
3
- normal: { switchMax: 1500, commentMax: 800 },
4
- slow: { switchMax: 3000, commentMax: 2000 },
5
- stealth: { switchMax: 5000, commentMax: 3500 },
6
- };
7
-
8
- const delayConfig = {
9
- switchMax: 2500,
10
- commentMax: 1500,
11
- fast: false,
12
- };
13
-
14
- export function setDelayConfig(config) {
15
- if (typeof config === 'string') {
16
- const preset = DELAY_PRESETS[config.toLowerCase()];
17
- if (!preset) {
18
- throw new Error(
19
- `未知的延迟预设: ${config}\n可用预设: ${Object.keys(DELAY_PRESETS).join(', ')}`
20
- );
21
- }
22
- delayConfig.switchMax = preset.switchMax;
23
- delayConfig.commentMax = preset.commentMax;
24
- delayConfig.fast = preset.fast || false;
25
- } else if (typeof config === 'object') {
26
- if (config.switchMax) delayConfig.switchMax = config.switchMax;
27
- if (config.commentMax) delayConfig.commentMax = config.commentMax;
28
- delayConfig.fast = config.fast || false;
29
- }
30
- }
31
-
32
- export function getDelayConfig() {
33
- return { ...delayConfig };
34
- }
35
-
36
- export function listDelayPresets() {
37
- return DELAY_PRESETS;
38
- }
39
-
40
- export function delay(min, max) {
41
- const lo = Math.min(min, max);
42
- const hi = Math.max(min, max);
43
- let ms;
44
- if (delayConfig.fast) {
45
- ms = 0;
46
- } else {
47
- ms = Math.floor(Math.random() * (hi - lo + 1)) + lo;
48
- }
49
- return new Promise(r => setTimeout(r, ms));
50
- }
51
-
52
- export function randomDelay(min = 200, max = 600) {
53
- return delay(min, max);
54
- }
1
+ export const DELAY_PRESETS = {
2
+ fast: { switchMax: 300, commentMax: 200, fast: true },
3
+ normal: { switchMax: 1500, commentMax: 800 },
4
+ slow: { switchMax: 3000, commentMax: 2000 },
5
+ stealth: { switchMax: 5000, commentMax: 3500 },
6
+ };
7
+
8
+ const delayConfig = {
9
+ switchMax: 2500,
10
+ commentMax: 1500,
11
+ fast: false,
12
+ };
13
+
14
+ export function setDelayConfig(config) {
15
+ if (typeof config === 'string') {
16
+ const preset = DELAY_PRESETS[config.toLowerCase()];
17
+ if (!preset) {
18
+ throw new Error(
19
+ `未知的延迟预设: ${config}\n可用预设: ${Object.keys(DELAY_PRESETS).join(', ')}`
20
+ );
21
+ }
22
+ delayConfig.switchMax = preset.switchMax;
23
+ delayConfig.commentMax = preset.commentMax;
24
+ delayConfig.fast = preset.fast || false;
25
+ } else if (typeof config === 'object') {
26
+ if (config.switchMax) delayConfig.switchMax = config.switchMax;
27
+ if (config.commentMax) delayConfig.commentMax = config.commentMax;
28
+ delayConfig.fast = config.fast || false;
29
+ }
30
+ }
31
+
32
+ export function getDelayConfig() {
33
+ return { ...delayConfig };
34
+ }
35
+
36
+ export function listDelayPresets() {
37
+ return DELAY_PRESETS;
38
+ }
39
+
40
+ export function delay(min, max) {
41
+ const lo = Math.min(min, max);
42
+ const hi = Math.max(min, max);
43
+ let ms;
44
+ if (delayConfig.fast) {
45
+ ms = 0;
46
+ } else {
47
+ ms = Math.floor(Math.random() * (hi - lo + 1)) + lo;
48
+ }
49
+ return new Promise(r => setTimeout(r, ms));
50
+ }
51
+
52
+ export function randomDelay(min = 200, max = 600) {
53
+ return delay(min, max);
54
+ }
@@ -1,118 +1,118 @@
1
- import { chromium } from 'playwright';
2
- import { browser, saveBrowser, configPath } from './constants.js';
3
- import { detectBrowser } from './browser/launch.js';
4
- import { getAntiDetectScript } from './browser/anti-detect.js';
5
- import { retryWithBackoff } from './retry.js';
6
- import { scrollAndCollect } from '../scraper/modules/scroll-collector.mjs';
7
-
8
- const EXPLORE_URL = 'https://www.tiktok.com/explore';
9
-
10
- function sleep(ms) {
11
- return new Promise(r => setTimeout(r, ms));
12
- }
13
-
14
- export async function fetchExplore(count = 100) {
15
- let browserPath = browser;
16
- let browserSource = '配置';
17
-
18
- if (!browserPath) {
19
- console.log(' [0/6] 未配置浏览器,正在自动探测...');
20
- const detected = detectBrowser();
21
- if (detected) {
22
- browserPath = detected;
23
- browserSource = '自动探测';
24
- try {
25
- saveBrowser(browserPath);
26
- console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
27
- } catch (err) {
28
- console.log(` [0/6] 保存配置失败: ${err.message}`);
29
- }
30
- }
31
- }
32
-
33
- const launchOptions = {
34
- headless: true,
35
- args: [
36
- '--no-sandbox',
37
- '--disable-setuid-sandbox',
38
- '--disable-blink-features=AutomationControlled',
39
- '--disable-dev-shm-usage',
40
- ],
41
- };
42
-
43
- if (browserPath) {
44
- console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
45
- launchOptions.executablePath = browserPath;
46
- }
47
-
48
- let instance;
49
- try {
50
- instance = await chromium.launch(launchOptions);
51
- } catch (err) {
52
- if (browserPath) {
53
- console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
54
- }
55
- instance = await chromium.launch({
56
- headless: true,
57
- args: launchOptions.args,
58
- });
59
- }
60
-
61
- try {
62
- const context = await instance.newContext({
63
- viewport: { width: 1280, height: 900 },
64
- userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
65
- locale: 'en-US',
66
- });
67
-
68
- await context.addInitScript(getAntiDetectScript());
69
-
70
- const page = await context.newPage();
71
- await retryWithBackoff(() => page.goto(EXPLORE_URL, { waitUntil: 'load', timeout: 30000 }));
72
- console.log(' [1/6] 页面已加载');
73
-
74
- await sleep(5000);
75
-
76
- const allUrls = await scrollAndCollect(page, {
77
- container: null,
78
- collectFn: () => ({
79
- items: Array.from(document.querySelectorAll('a'))
80
- .filter(a => /\/video\/\d{16,20}/.test(a.href))
81
- .map(a => a.href),
82
- }),
83
- maxItems: count * 2,
84
- delayRange: [1500, 2500],
85
- staleThreshold: 5,
86
- onRound: (round, items, allItems) => {
87
- if ((round + 1) % 10 === 0) {
88
- const uniqueCount = [...new Set(allItems)].length;
89
- console.log(` [2/6] 滚动 ${round + 1},当前 ${uniqueCount} 个视频`);
90
- }
91
- },
92
- });
93
-
94
- await sleep(3000);
95
-
96
- const unique = [...new Set(allUrls)];
97
- console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
98
-
99
- const results = [];
100
- const seen = new Set();
101
- for (const url of unique) {
102
- if (results.length >= count) break;
103
- const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
104
- if (videoId && !seen.has(videoId)) {
105
- seen.add(videoId);
106
- const user = url.match(/\/@([^/]+)/)?.[1];
107
- if (user) {
108
- results.push({ user, id: videoId, url });
109
- }
110
- }
111
- }
112
-
113
- console.log(` [5/6] 去重后 ${results.length} 个`);
114
- return results;
115
- } finally {
116
- await instance.close();
117
- }
118
- }
1
+ import { chromium } from 'playwright';
2
+ import { browser, saveBrowser, configPath } from './constants.js';
3
+ import { detectBrowser } from './browser/launch.js';
4
+ import { getAntiDetectScript } from './browser/anti-detect.js';
5
+ import { retryWithBackoff } from './retry.js';
6
+ import { scrollAndCollect } from '../scraper/modules/scroll-collector.mjs';
7
+
8
+ const EXPLORE_URL = 'https://www.tiktok.com/explore';
9
+
10
+ function sleep(ms) {
11
+ return new Promise(r => setTimeout(r, ms));
12
+ }
13
+
14
+ export async function fetchExplore(count = 100) {
15
+ let browserPath = browser;
16
+ let browserSource = '配置';
17
+
18
+ if (!browserPath) {
19
+ console.log(' [0/6] 未配置浏览器,正在自动探测...');
20
+ const detected = detectBrowser();
21
+ if (detected) {
22
+ browserPath = detected;
23
+ browserSource = '自动探测';
24
+ try {
25
+ saveBrowser(browserPath);
26
+ console.log(` [0/6] 已保存浏览器路径到配置: ${configPath}`);
27
+ } catch (err) {
28
+ console.log(` [0/6] 保存配置失败: ${err.message}`);
29
+ }
30
+ }
31
+ }
32
+
33
+ const launchOptions = {
34
+ headless: true,
35
+ args: [
36
+ '--no-sandbox',
37
+ '--disable-setuid-sandbox',
38
+ '--disable-blink-features=AutomationControlled',
39
+ '--disable-dev-shm-usage',
40
+ ],
41
+ };
42
+
43
+ if (browserPath) {
44
+ console.log(` [0/6] 使用${browserSource}浏览器: ${browserPath}`);
45
+ launchOptions.executablePath = browserPath;
46
+ }
47
+
48
+ let instance;
49
+ try {
50
+ instance = await chromium.launch(launchOptions);
51
+ } catch (err) {
52
+ if (browserPath) {
53
+ console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
54
+ }
55
+ instance = await chromium.launch({
56
+ headless: true,
57
+ args: launchOptions.args,
58
+ });
59
+ }
60
+
61
+ try {
62
+ const context = await instance.newContext({
63
+ viewport: { width: 1280, height: 900 },
64
+ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
65
+ locale: 'en-US',
66
+ });
67
+
68
+ await context.addInitScript(getAntiDetectScript());
69
+
70
+ const page = await context.newPage();
71
+ await retryWithBackoff(() => page.goto(EXPLORE_URL, { waitUntil: 'load', timeout: 30000 }));
72
+ console.log(' [1/6] 页面已加载');
73
+
74
+ await sleep(5000);
75
+
76
+ const allUrls = await scrollAndCollect(page, {
77
+ container: null,
78
+ collectFn: () => ({
79
+ items: Array.from(document.querySelectorAll('a'))
80
+ .filter(a => /\/video\/\d{16,20}/.test(a.href))
81
+ .map(a => a.href),
82
+ }),
83
+ maxItems: count * 2,
84
+ delayRange: [1500, 2500],
85
+ staleThreshold: 5,
86
+ onRound: (round, items, allItems) => {
87
+ if ((round + 1) % 10 === 0) {
88
+ const uniqueCount = [...new Set(allItems)].length;
89
+ console.log(` [2/6] 滚动 ${round + 1},当前 ${uniqueCount} 个视频`);
90
+ }
91
+ },
92
+ });
93
+
94
+ await sleep(3000);
95
+
96
+ const unique = [...new Set(allUrls)];
97
+ console.log(` [4/6] 共检测到 ${unique.length} 个不重复视频`);
98
+
99
+ const results = [];
100
+ const seen = new Set();
101
+ for (const url of unique) {
102
+ if (results.length >= count) break;
103
+ const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
104
+ if (videoId && !seen.has(videoId)) {
105
+ seen.add(videoId);
106
+ const user = url.match(/\/@([^/]+)/)?.[1];
107
+ if (user) {
108
+ results.push({ user, id: videoId, url });
109
+ }
110
+ }
111
+ }
112
+
113
+ console.log(` [5/6] 去重后 ${results.length} 个`);
114
+ return results;
115
+ } finally {
116
+ await instance.close();
117
+ }
118
+ }
@@ -1,45 +1,45 @@
1
- import { fetch, ProxyAgent } from 'undici';
2
- import { DEFAULT_PROXY } from './constants.js';
3
- import { isProfileUrl } from './url.js';
4
-
5
- const HEADERS = {
6
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
7
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
8
- 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
9
- 'Accept-Encoding': 'gzip, deflate, br',
10
- 'Connection': 'keep-alive',
11
- 'Upgrade-Insecure-Requests': '1',
12
- 'Sec-Fetch-Dest': 'document',
13
- 'Sec-Fetch-Mode': 'navigate',
14
- 'Sec-Fetch-Site': 'none',
15
- 'Sec-Fetch-User': '?1',
16
- 'Cache-Control': 'max-age=0',
17
- };
18
-
19
- export { isProfileUrl } from './url.js';
20
-
21
- export async function fetchHtml(url, proxyUrl) {
22
- const p = proxyUrl || DEFAULT_PROXY;
23
- const agent = new ProxyAgent(p);
24
- let lastError;
25
-
26
- for (let attempt = 1; attempt <= 3; attempt++) {
27
- try {
28
- const res = await fetch(url, {
29
- headers: HEADERS,
30
- dispatcher: agent,
31
- redirect: 'follow',
32
- });
33
- const html = await res.text();
34
- return html;
35
- } catch (err) {
36
- lastError = err;
37
- if (attempt < 3) {
38
- const waitMs = Math.pow(2, attempt - 1) * 3000 + Math.random() * 2000;
39
- await new Promise(r => setTimeout(r, waitMs));
40
- }
41
- }
42
- }
43
-
44
- throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
45
- }
1
+ import { fetch, ProxyAgent } from 'undici';
2
+ import { DEFAULT_PROXY } from './constants.js';
3
+ import { isProfileUrl } from './url.js';
4
+
5
+ const HEADERS = {
6
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
7
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
8
+ 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
9
+ 'Accept-Encoding': 'gzip, deflate, br',
10
+ 'Connection': 'keep-alive',
11
+ 'Upgrade-Insecure-Requests': '1',
12
+ 'Sec-Fetch-Dest': 'document',
13
+ 'Sec-Fetch-Mode': 'navigate',
14
+ 'Sec-Fetch-Site': 'none',
15
+ 'Sec-Fetch-User': '?1',
16
+ 'Cache-Control': 'max-age=0',
17
+ };
18
+
19
+ export { isProfileUrl } from './url.js';
20
+
21
+ export async function fetchHtml(url, proxyUrl) {
22
+ const p = proxyUrl || DEFAULT_PROXY;
23
+ const agent = new ProxyAgent(p);
24
+ let lastError;
25
+
26
+ for (let attempt = 1; attempt <= 3; attempt++) {
27
+ try {
28
+ const res = await fetch(url, {
29
+ headers: HEADERS,
30
+ dispatcher: agent,
31
+ redirect: 'follow',
32
+ });
33
+ const html = await res.text();
34
+ return html;
35
+ } catch (err) {
36
+ lastError = err;
37
+ if (attempt < 3) {
38
+ const waitMs = Math.pow(2, attempt - 1) * 3000 + Math.random() * 2000;
39
+ await new Promise(r => setTimeout(r, waitMs));
40
+ }
41
+ }
42
+ }
43
+
44
+ throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
45
+ }