tt-help-cli-ycl 1.3.5 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.5",
3
+ "version": "1.3.6",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/auto.js CHANGED
@@ -1,17 +1,36 @@
1
1
  import { getOrCreatePage } from '../lib/browser/page.js';
2
2
 
3
+ const MAX_RETRY_WAIT = 5 * 60 * 1000;
4
+
5
+ async function withRetry(label, fn) {
6
+ let backoff = 1000;
7
+ while (true) {
8
+ try {
9
+ return await fn();
10
+ } catch (err) {
11
+ console.error(`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`);
12
+ await new Promise(r => setTimeout(r, backoff));
13
+ if (backoff < MAX_RETRY_WAIT) backoff *= 2;
14
+ }
15
+ }
16
+ }
17
+
3
18
  async function apiPost(url, body) {
4
- const res = await fetch(url, {
5
- method: 'POST',
6
- headers: { 'Content-Type': 'application/json' },
7
- body: JSON.stringify(body),
19
+ return withRetry(`POST ${url}`, async () => {
20
+ const res = await fetch(url, {
21
+ method: 'POST',
22
+ headers: { 'Content-Type': 'application/json' },
23
+ body: JSON.stringify(body),
24
+ });
25
+ return res.json();
8
26
  });
9
- return res.json();
10
27
  }
11
28
 
12
29
  async function apiGet(url) {
13
- const res = await fetch(url);
14
- return res.json();
30
+ return withRetry(`GET ${url}`, async () => {
31
+ const res = await fetch(url);
32
+ return res.json();
33
+ });
15
34
  }
16
35
 
17
36
  export async function handleAuto(options) {
@@ -32,44 +51,54 @@ export async function handleAuto(options) {
32
51
  maxFollowers: autoMaxFollowers,
33
52
  };
34
53
 
35
- try {
36
- await apiGet(`${serverUrl}/api/stats`);
37
- } catch {
38
- console.error(`无法连接服务端: ${serverUrl}`);
39
- console.error('请先启动服务端: tt-help watch -o <数据文件>');
40
- process.exit(1);
41
- }
54
+ await apiGet(`${serverUrl}/api/stats`);
42
55
 
43
56
  if (autoUsernames.length > 0) {
44
57
  const { added, skipped } = await apiPost(`${serverUrl}/api/users`, { usernames: autoUsernames });
45
58
  console.error(`种子用户: ${added} 个新增, ${skipped} 个已存在`);
46
59
  }
47
60
 
61
+ console.error(`服务器: ${serverUrl}(断开会自动重连)`);
62
+
48
63
  const { ensureBrowserReady, processUser } = await import('../scraper/auto-core.mjs');
49
64
  const browser = await ensureBrowserReady();
50
65
 
51
- try {
52
- const page = await getOrCreatePage(browser);
66
+ const page = await getOrCreatePage(browser);
53
67
 
54
- let processedCount = 0;
55
- let errorCount = 0;
68
+ let processedCount = 0;
69
+ let errorCount = 0;
56
70
 
57
- while (true) {
58
- const job = await apiGet(`${serverUrl}/api/job`);
59
- if (!job.hasJob) break;
71
+ while (true) {
72
+ const job = await apiGet(`${serverUrl}/api/job`);
73
+ if (!job.hasJob) break;
60
74
 
61
- const username = job.user.uniqueId;
62
- processedCount++;
63
- console.error(`\n[${processedCount}] 处理 @${username}...`);
75
+ const username = job.user.uniqueId;
76
+ processedCount++;
77
+ let proxyRetry = 0;
78
+
79
+ while (true) {
80
+ console.error(`\n[${processedCount}] 处理 @${username}...${proxyRetry > 0 ? ` (代理重试 ${proxyRetry})` : ''}`);
64
81
 
65
82
  const result = await processUser(page, username, { ...runOptions, browser }, console.error);
66
83
 
67
- if (result.restricted || result.error) {
68
- if (result.error) errorCount++;
84
+ if (result.restricted) {
69
85
  await apiPost(`${serverUrl}/api/job/${username}`, result);
86
+ break;
87
+ }
88
+
89
+ if (result.error && result.error.includes('代理错误')) {
90
+ proxyRetry++;
91
+ console.error(` [代理错误] ${result.error},等待 10s 后重试...`);
92
+ await new Promise(r => setTimeout(r, 10000));
70
93
  continue;
71
94
  }
72
95
 
96
+ if (result.error) {
97
+ errorCount++;
98
+ await apiPost(`${serverUrl}/api/job/${username}`, result);
99
+ break;
100
+ }
101
+
73
102
  const payload = {
74
103
  userInfo: result.userInfo || {},
75
104
  discoveredVideoAuthors: result.discoveredVideoAuthors || [],
@@ -80,15 +109,13 @@ export async function handleAuto(options) {
80
109
  };
81
110
  await apiPost(`${serverUrl}/api/job/${username}`, payload);
82
111
  console.error(' 已提交');
112
+ break;
83
113
  }
84
-
85
- const stats = await apiGet(`${serverUrl}/api/stats`);
86
- console.error(`\n完成: ${processedCount} 个用户处理, ${errorCount} 个出错`);
87
- console.error(` 总用户: ${stats.totalUsers}, 已完成: ${stats.processedUsers}, 待处理: ${stats.pendingUsers}, 错误: ${stats.errorUsers}`);
88
- } catch (err) {
89
- console.error(`自动抓取失败: ${err.message}`);
90
- process.exit(1);
91
- } finally {
92
- await browser.close().catch(() => {});
93
114
  }
115
+
116
+ const stats = await apiGet(`${serverUrl}/api/stats`);
117
+ console.error(`\n完成: ${processedCount} 个用户处理, ${errorCount} 个出错`);
118
+ console.error(` 总用户: ${stats.totalUsers}, 已完成: ${stats.processedUsers}, 待处理: ${stats.pendingUsers}, 错误: ${stats.errorUsers}`);
119
+
120
+ await browser.close().catch(() => {});
94
121
  }
@@ -1,18 +1,37 @@
1
1
  import { getOrCreatePage } from '../lib/browser/page.js';
2
2
  import { delay, getDelayConfig, setDelayConfig } from '../scraper/modules/page-helpers.mjs';
3
3
 
4
+ const MAX_RETRY_WAIT = 5 * 60 * 1000;
5
+
6
+ async function withRetry(label, fn) {
7
+ let backoff = 1000;
8
+ while (true) {
9
+ try {
10
+ return await fn();
11
+ } catch (err) {
12
+ console.error(`[连接] ${label} 失败: ${err.message},${backoff / 1000}秒后重试...`);
13
+ await new Promise(r => setTimeout(r, backoff));
14
+ if (backoff < MAX_RETRY_WAIT) backoff *= 2;
15
+ }
16
+ }
17
+ }
18
+
4
19
  async function apiPost(url, body) {
5
- const res = await fetch(url, {
6
- method: 'POST',
7
- headers: { 'Content-Type': 'application/json' },
8
- body: JSON.stringify(body),
20
+ return withRetry(`POST ${url}`, async () => {
21
+ const res = await fetch(url, {
22
+ method: 'POST',
23
+ headers: { 'Content-Type': 'application/json' },
24
+ body: JSON.stringify(body),
25
+ });
26
+ return res.json();
9
27
  });
10
- return res.json();
11
28
  }
12
29
 
13
30
  async function apiGet(url) {
14
- const res = await fetch(url);
15
- return res.json();
31
+ return withRetry(`GET ${url}`, async () => {
32
+ const res = await fetch(url);
33
+ return res.json();
34
+ });
16
35
  }
17
36
 
18
37
  export async function handleExplore(options) {
@@ -24,12 +43,7 @@ export async function handleExplore(options) {
24
43
 
25
44
  setDelayConfig(explorePreset);
26
45
 
27
- try {
28
- await apiGet(`${serverUrl}/api/stats`);
29
- } catch {
30
- console.error(`无法连接服务端: ${serverUrl},退出`);
31
- process.exit(1);
32
- }
46
+ await apiGet(`${serverUrl}/api/stats`);
33
47
 
34
48
  if (exploreUsernames && exploreUsernames.length > 0) {
35
49
  const { added, skipped } = await apiPost(`${serverUrl}/api/users`, { usernames: exploreUsernames });
@@ -39,24 +53,27 @@ export async function handleExplore(options) {
39
53
  console.error(`\n国家筛选: ${exploreLocation}`);
40
54
  console.error(`评论: ${exploreMaxComments}, 猜你喜欢: ${exploreMaxGuess}`);
41
55
  console.error(`关注/粉丝: ${exploreEnableFollow ? '启用' : '禁用'}`);
56
+ console.error(`服务器: ${serverUrl}(断开会自动重连)`);
42
57
  if (exploreMaxUsers > 0) console.error(`上限: ${exploreMaxUsers} 个用户`);
43
58
 
44
59
  const { ensureBrowserReady, processExplore } = await import('../scraper/explore-core.mjs');
45
60
  const browser = await ensureBrowserReady();
46
61
 
47
- try {
48
- const page = await getOrCreatePage(browser);
62
+ const page = await getOrCreatePage(browser);
49
63
 
50
- let processedCount = 0;
51
- let errorCount = 0;
64
+ let processedCount = 0;
65
+ let errorCount = 0;
52
66
 
53
- while (true) {
54
- const job = await apiGet(`${serverUrl}/api/job`);
55
- if (!job.hasJob) break;
67
+ while (true) {
68
+ const job = await apiGet(`${serverUrl}/api/job`);
69
+ if (!job.hasJob) break;
56
70
 
57
- const username = job.user.uniqueId;
58
- processedCount++;
59
- console.error(`\n[${processedCount}] 探索 @${username}...`);
71
+ const username = job.user.uniqueId;
72
+ processedCount++;
73
+ let proxyRetry = 0;
74
+
75
+ while (true) {
76
+ console.error(`\n[${processedCount}] 探索 @${username}...${proxyRetry > 0 ? ` (代理重试 ${proxyRetry})` : ''}`);
60
77
 
61
78
  const { switchMax } = getDelayConfig();
62
79
  await delay(switchMax, switchMax * 3);
@@ -73,13 +90,20 @@ export async function handleExplore(options) {
73
90
 
74
91
  if (result.restricted) {
75
92
  await apiPost(`${serverUrl}/api/job/${username}`, { restricted: true, userInfo: result.userInfo || {} });
93
+ break;
94
+ }
95
+
96
+ if (result.error && result.error.includes('代理错误')) {
97
+ proxyRetry++;
98
+ console.error(` [代理错误] ${result.error},等待 10s 后重试...`);
99
+ await new Promise(r => setTimeout(r, 10000));
76
100
  continue;
77
101
  }
78
102
 
79
103
  if (result.error) {
80
104
  errorCount++;
81
105
  await apiPost(`${serverUrl}/api/job/${username}`, { error: result.error });
82
- continue;
106
+ break;
83
107
  }
84
108
 
85
109
  const payload = {
@@ -97,20 +121,18 @@ export async function handleExplore(options) {
97
121
  };
98
122
  await apiPost(`${serverUrl}/api/job/${username}`, payload);
99
123
  console.error(' 已提交');
100
-
101
- if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
102
- console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
103
- break;
104
- }
124
+ break;
105
125
  }
106
126
 
107
- const stats = await apiGet(`${serverUrl}/api/stats`);
108
- console.error(`\n完成: ${processedCount} 个用户处理, ${errorCount} 个出错`);
109
- console.error(` 总用户: ${stats.totalUsers}, 已完成: ${stats.processedUsers}, 待处理: ${stats.pendingUsers}, 错误: ${stats.errorUsers}`);
110
- } catch (err) {
111
- console.error(`探索失败: ${err.message}`);
112
- process.exit(1);
113
- } finally {
114
- await browser.close().catch(() => {});
127
+ if (exploreMaxUsers > 0 && processedCount >= exploreMaxUsers) {
128
+ console.error(`\n已达上限 ${exploreMaxUsers} 个用户,停止处理`);
129
+ break;
130
+ }
115
131
  }
132
+
133
+ const stats = await apiGet(`${serverUrl}/api/stats`);
134
+ console.error(`\n完成: ${processedCount} 个用户处理, ${errorCount} 个出错`);
135
+ console.error(` 总用户: ${stats.totalUsers}, 已完成: ${stats.processedUsers}, 待处理: ${stats.pendingUsers}, 错误: ${stats.errorUsers}`);
136
+
137
+ await browser.close().catch(() => {});
116
138
  }
package/src/cli/watch.js CHANGED
@@ -1,4 +1,5 @@
1
- import { writeFileSync, existsSync } from 'fs';
1
+ import { existsSync } from 'fs';
2
+ import { createStore } from '../watch/data-store.mjs';
2
3
  import { startWatchServer, openBrowser } from '../watch/server.mjs';
3
4
 
4
5
  export async function handleWatch(options) {
@@ -16,10 +17,12 @@ export async function handleWatch(options) {
16
17
  process.exit(1);
17
18
  }
18
19
 
19
- const { server, port } = await startWatchServer(outputFile, watchPort);
20
+ const store = createStore(outputFile);
21
+ const { server, port } = await startWatchServer(outputFile, watchPort, store);
20
22
  openBrowser(port);
21
23
 
22
24
  process.once('SIGINT', () => {
25
+ store.stopBackup();
23
26
  server.close();
24
27
  process.exit(0);
25
28
  });
@@ -78,3 +78,10 @@ export async function getOrCreatePage(browser) {
78
78
  }
79
79
  return page;
80
80
  }
81
+
82
+ export function assertPageUrl(page, expectedPath) {
83
+ const actual = page.url();
84
+ if (!actual.includes(expectedPath)) {
85
+ throw new Error(`[代理错误] 预期访问 ${expectedPath},实际跳转到了 ${actual}`);
86
+ }
87
+ }
@@ -8,6 +8,7 @@ import {
8
8
  retryWithBackoff,
9
9
  detectPageError,
10
10
  isLoggedIn,
11
+ assertPageUrl,
11
12
  } from './modules/page-helpers.mjs';
12
13
  export { ensureBrowserReady };
13
14
  import {
@@ -66,6 +67,7 @@ async function processUser(page, username, options, log) {
66
67
  await retryWithBackoff(() => page.goto(`https://www.tiktok.com/@${username}`, {
67
68
  waitUntil: 'load', timeout: 30000,
68
69
  }), { log });
70
+ assertPageUrl(page, `@${username}`);
69
71
  await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
70
72
  await delay(1000, 2000);
71
73
 
@@ -6,6 +6,7 @@ import {
6
6
  setDelayConfig,
7
7
  getDelayConfig,
8
8
  retryWithBackoff,
9
+ assertPageUrl,
9
10
  } from './modules/page-helpers.mjs';
10
11
  import { extractCommentAuthors } from './modules/comment-extractor.mjs';
11
12
  import { extractGuessVideos } from './modules/guess-extractor.mjs';
@@ -100,6 +101,7 @@ async function runScrape(options) {
100
101
  }
101
102
 
102
103
  await retryWithBackoff(() => page.goto(videoUrl, { waitUntil: 'load', timeout: 30000 }), { log });
104
+ assertPageUrl(page, videoUrl.split('/video/')[0]);
103
105
  await delay(Math.round(config.switchMax * 0.5), config.switchMax);
104
106
  await closeCommentPanel(page);
105
107
  await delay(Math.round(config.commentMax * 0.5), config.commentMax);
@@ -6,6 +6,7 @@ import {
6
6
  retryWithBackoff,
7
7
  detectPageError,
8
8
  isLoggedIn,
9
+ assertPageUrl,
9
10
  } from './modules/page-helpers.mjs';
10
11
  export { ensureBrowserReady };
11
12
  import {
@@ -47,6 +48,7 @@ async function processExplore(page, username, options, log) {
47
48
  log(` 访问 @${username} 主页...`);
48
49
  const homeUrl = `https://www.tiktok.com/@${username}`;
49
50
  await retryWithBackoff(() => page.goto(homeUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }), { log });
51
+ assertPageUrl(page, `@${username}`);
50
52
  await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
51
53
  await delay(1000, 2000);
52
54
 
@@ -107,6 +109,7 @@ async function processExplore(page, username, options, log) {
107
109
 
108
110
  log(` 进入第一个视频: ${videoUrl}`);
109
111
  await retryWithBackoff(() => page.goto(videoUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }), { log });
112
+ assertPageUrl(page, videoUrl.split('/video/')[0]);
110
113
  await delay(1500, 2500);
111
114
 
112
115
  const videoData = await scrapeSingleVideo(page, 0, 0, log, 'NEVER_MATCH');
@@ -12,6 +12,7 @@ import {
12
12
  findTikTokPage,
13
13
  getOrCreatePage,
14
14
  isLoggedIn,
15
+ assertPageUrl,
15
16
  } from '../../lib/browser/page.js';
16
17
  import { retryWithBackoff, isRetryableError } from '../../lib/retry.js';
17
18
  import {
@@ -34,6 +35,7 @@ export {
34
35
  findTikTokPage,
35
36
  getOrCreatePage,
36
37
  isLoggedIn,
38
+ assertPageUrl,
37
39
  retryWithBackoff,
38
40
  isRetryableError,
39
41
  extractUserSection,
@@ -11,18 +11,47 @@ function inferStatus(u) {
11
11
  export function createStore(filePath) {
12
12
  let data = [];
13
13
 
14
+ let backupTimer = null;
15
+
14
16
  if (filePath) {
15
17
  const resolved = path.resolve(filePath);
18
+ const backupDir = path.join(path.dirname(resolved), '.backup');
19
+ const maxBackups = 3;
20
+
16
21
  if (fs.existsSync(resolved)) {
17
22
  try {
18
- const raw = fs.readFileSync(resolved, 'utf-8');
19
- data = JSON.parse(raw);
20
- if (!Array.isArray(data)) data = [];
23
+ const content = fs.readFileSync(resolved, 'utf-8');
24
+ data = JSON.parse(content);
25
+ if (!Array.isArray(data)) {
26
+ data = [];
27
+ }
21
28
  } catch (e) {
22
29
  console.error(`[data-store] 读取文件失败: ${e.message}`);
23
30
  data = [];
24
31
  }
25
32
  }
33
+
34
+ function runBackup() {
35
+ if (!fs.existsSync(resolved)) return;
36
+ if (!fs.existsSync(backupDir)) fs.mkdirSync(backupDir, { recursive: true });
37
+ const now = new Date();
38
+ const timestamp = now.toISOString().replace(/[:.]/g, '-').slice(0, 13);
39
+ const backupFile = path.join(backupDir, `data-${timestamp}.json`);
40
+ try {
41
+ fs.copyFileSync(resolved, backupFile);
42
+ const files = fs.readdirSync(backupDir)
43
+ .filter(f => f.startsWith('data-') && f.endsWith('.json'))
44
+ .sort()
45
+ .map(f => path.join(backupDir, f));
46
+ while (files.length > maxBackups) {
47
+ fs.unlinkSync(files.shift());
48
+ }
49
+ } catch (e) {
50
+ console.error(`[data-store] 备份失败: ${e.message}`);
51
+ }
52
+ }
53
+
54
+ backupTimer = setInterval(runBackup, 60 * 60 * 1000);
26
55
  }
27
56
 
28
57
  for (const u of data) {
@@ -32,25 +61,17 @@ export function createStore(filePath) {
32
61
  function save() {
33
62
  if (!filePath) return;
34
63
  const resolved = path.resolve(filePath);
35
- try {
36
- if (fs.existsSync(resolved)) {
37
- const raw = fs.readFileSync(resolved, 'utf-8');
38
- const diskData = JSON.parse(raw);
39
- if (Array.isArray(diskData)) {
40
- const memIds = new Set(data.map(u => u.uniqueId));
41
- for (const diskUser of diskData) {
42
- if (!memIds.has(diskUser.uniqueId)) {
43
- if (!diskUser.status) diskUser.status = inferStatus(diskUser);
44
- data.push(diskUser);
45
- }
46
- }
47
- }
48
- }
49
- } catch (e) { console.error(`[data-store] 合并磁盘数据失败: ${e.message}`); }
50
64
  const json = JSON.stringify(data, null, 2);
51
65
  fs.writeFileSync(resolved, json, 'utf-8');
52
66
  }
53
67
 
68
+ function stopBackup() {
69
+ if (backupTimer) {
70
+ clearInterval(backupTimer);
71
+ backupTimer = null;
72
+ }
73
+ }
74
+
54
75
  function getUser(uid) {
55
76
  return data.find(u => u.uniqueId === uid);
56
77
  }
@@ -234,6 +255,7 @@ export function createStore(filePath) {
234
255
  save, getUser, hasUser, addUser,
235
256
  getPendingUsers, getProcessedUsers, getAllUsers,
236
257
  claimNextJob, commitJob, resetJob, togglePin,
258
+ stopBackup,
237
259
  data,
238
260
  };
239
261
  }
@@ -104,9 +104,9 @@ function sendJSON(res, code, data) {
104
104
  res.end(JSON.stringify(data));
105
105
  }
106
106
 
107
- export function startWatchServer(outputFile, port = 3000) {
107
+ export function startWatchServer(outputFile, port = 3000, existingStore) {
108
108
  return new Promise((_resolve, reject) => {
109
- const store = createStore(outputFile);
109
+ const store = existingStore || createStore(outputFile);
110
110
 
111
111
  const server = http.createServer(async (req, res) => {
112
112
  const { path: routePath, params } = parseQuery(req.url);