tt-help-cli-ycl 1.3.77 → 1.3.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.77",
3
+ "version": "1.3.79",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/attach.js CHANGED
@@ -83,6 +83,7 @@ export async function handleAttach(options) {
83
83
  serverUrl,
84
84
  attachCountries,
85
85
  customProxy,
86
+ attachPoolSize,
86
87
  showHelp,
87
88
  } = options;
88
89
  const effectiveProxy = customProxy || configuredProxy;
@@ -110,6 +111,9 @@ export async function handleAttach(options) {
110
111
  attachLog(
111
112
  " --proxy <代理地址> HTTP 代理地址(如 http://127.0.0.1:7890),不配置则从 ~/.tt-help.json 读取",
112
113
  );
114
+ attachLog(
115
+ " --size <N> 浏览器页面池大小(默认: 3,不建议超过 3)",
116
+ );
113
117
  attachLog("");
114
118
  attachLog("说明:");
115
119
  attachLog(
@@ -117,12 +121,16 @@ export async function handleAttach(options) {
117
121
  );
118
122
  attachLog(" 抓取完成后通过 POST /api/user-info-batch 批量回传结果");
119
123
  attachLog(" 浏览器崩溃时自动重启,支持长时间无人值守运行");
124
+ attachLog(
125
+ " --size 控制单个浏览器内并发的 page 数,增大可提升吞吐但可能触发 TikTok 限流",
126
+ );
120
127
  attachLog("");
121
128
  attachLog("示例:");
122
129
  attachLog(" tt-help attach");
123
130
  attachLog(" tt-help attach -p 5 -i 10");
124
131
  attachLog(" tt-help attach -p 3 -i 5 -s http://127.0.0.1:3001");
125
132
  attachLog(" tt-help attach -c PL,DE,FR -p 5");
133
+ attachLog(" tt-help attach -p 5 --size 5");
126
134
  return;
127
135
  }
128
136
 
@@ -135,6 +143,7 @@ export async function handleAttach(options) {
135
143
  );
136
144
 
137
145
  const scraper = new TikTokScraper({
146
+ poolSize: attachPoolSize || 3,
138
147
  proxyServer: effectiveProxy || null,
139
148
  });
140
149
  const shutdown = async (signal) => {
package/src/cli/open.js CHANGED
@@ -94,8 +94,8 @@ export async function handleOpen(parsed) {
94
94
  process.on("SIGINT", async () => {
95
95
  console.error("\n正在关闭浏览器...");
96
96
  try {
97
- console.error(`[SIGINT] 正在终止 Edge 进程 (userDataDir: ${userDataDir})`);
98
- await killEdgeProcesses(userDataDir);
97
+ console.error(`[SIGINT] 正在终止 Edge 进程 (端口: ${port})`);
98
+ await killEdgeProcesses(userDataDir, port);
99
99
  console.error("[SIGINT] Edge 进程已终止,正在关闭浏览器连接...");
100
100
  await browser.close();
101
101
  } catch (e) {
package/src/lib/args.js CHANGED
@@ -534,6 +534,7 @@ function parseAttachArgs(args) {
534
534
  let serverUrl = defaultServer;
535
535
  let countries = [];
536
536
  let customProxy = null;
537
+ let poolSize = 3;
537
538
 
538
539
  for (let i = 0; i < args.length; i++) {
539
540
  const arg = args[i];
@@ -550,6 +551,8 @@ function parseAttachArgs(args) {
550
551
  .filter(Boolean);
551
552
  } else if (arg === "--proxy") {
552
553
  customProxy = args[++i];
554
+ } else if (arg === "--size") {
555
+ poolSize = parseInt(args[++i], 10) || 3;
553
556
  }
554
557
  }
555
558
 
@@ -560,6 +563,7 @@ function parseAttachArgs(args) {
560
563
  serverUrl,
561
564
  attachCountries: countries,
562
565
  customProxy,
566
+ attachPoolSize: poolSize,
563
567
  urls: [],
564
568
  outputFormat: "json",
565
569
  exploreCount: 0,
@@ -70,7 +70,7 @@ function checkEdgeArgs() {
70
70
  });
71
71
  }
72
72
 
73
- function killEdgeProcesses(targetDir) {
73
+ function killEdgeProcesses(targetDir, port) {
74
74
  return new Promise((resolve) => {
75
75
  const platform = os.platform();
76
76
  let command;
@@ -102,20 +102,16 @@ function killEdgeProcesses(targetDir) {
102
102
  'killall -9 "Microsoft Edge" 2>/dev/null; rm -f ~/Library/Caches/Microsoft\\ Edge/Singleton*; true';
103
103
  }
104
104
  } else if (platform === "win32") {
105
- if (targetDir) {
105
+ if (port) {
106
+ // 用端口匹配(纯数字,无编码问题),而不是 userDataDir 路径(中文路径会乱码)
106
107
  try {
107
108
  const ps1Path = path.join(os.tmpdir(), `kill-edge-${Date.now()}.ps1`);
108
- const escapedDir = targetDir.replace(/'/g, "''");
109
109
  const ps1Content =
110
110
  `$procs = Get-CimInstance Win32_Process -Filter "Name='msedge.exe'" 2>$null; ` +
111
- `Write-Output "TOTAL_MSEDGE_PROCS: $($procs.Count)"; ` +
112
- `foreach ($p in $procs) { ` +
113
- ` Write-Output "PROC_PID=$($p.ProcessId) CMD=$($p.CommandLine)"; ` +
114
- `}; ` +
115
111
  `$count = 0; ` +
116
112
  `$pids = @(); ` +
117
113
  `foreach ($p in $procs) { ` +
118
- ` if ($p.CommandLine -and $p.CommandLine -like "*${escapedDir}*") { ` +
114
+ ` if ($p.CommandLine -and $p.CommandLine -like "*--remote-debugging-port=${port}*") { ` +
119
115
  ` $pids += $p.ProcessId; ` +
120
116
  ` Stop-Process -Id $p.ProcessId -Force -ErrorAction SilentlyContinue; ` +
121
117
  ` $count++ ` +
@@ -132,7 +128,9 @@ function killEdgeProcesses(targetDir) {
132
128
  .toString()
133
129
  .trim();
134
130
  console.error(`[killEdgeProcesses] Windows PS result: ${result}`);
135
- try { fs.unlinkSync(ps1Path); } catch {}
131
+ try {
132
+ fs.unlinkSync(ps1Path);
133
+ } catch {}
136
134
  command = "exit 0";
137
135
  } catch (e) {
138
136
  console.error(`[killEdgeProcesses] Windows PS failed: ${e.message}`);
@@ -222,7 +220,7 @@ export async function ensureBrowserReady(options = {}) {
222
220
  const edgeArgsValid = await checkEdgeArgs();
223
221
  if (!edgeArgsValid) {
224
222
  console.error(`Edge 已运行但启动参数不完整,正在重启端口 ${port}...`);
225
- await killEdgeProcesses(userDataDir);
223
+ await killEdgeProcesses(userDataDir, port);
226
224
  await new Promise((r) => setTimeout(r, 3000));
227
225
  needLaunch = true;
228
226
  }
@@ -238,7 +236,7 @@ export async function ensureBrowserReady(options = {}) {
238
236
  const edgeRunning = await isEdgeRunning();
239
237
  if (edgeRunning) {
240
238
  console.error(`Edge 已运行但 CDP 端口 ${port} 未启用,正在重启...`);
241
- await killEdgeProcesses(userDataDir);
239
+ await killEdgeProcesses(userDataDir, port);
242
240
  await new Promise((r) => setTimeout(r, 3000));
243
241
  } else {
244
242
  console.error(`CDP 端口 ${port} 未就绪,正在启动 Edge 浏览器...`);
@@ -266,7 +264,7 @@ export async function switchAccount(oldAccount, newAccount, proxyServer) {
266
264
  console.error(` [账户切换] 等待 30 秒,请完成当前操作后自动切换...`);
267
265
  await new Promise((r) => setTimeout(r, 30000));
268
266
 
269
- await killEdgeProcesses(oldAccount.userDataDir);
267
+ await killEdgeProcesses(oldAccount.userDataDir, oldAccount.port);
270
268
  await new Promise((r) => setTimeout(r, 3000));
271
269
 
272
270
  const newCdpOptions = {};
@@ -31,7 +31,7 @@ export async function relaunchBrowser(cdpOptions, port) {
31
31
  console.error(` [浏览器] 浏览器已关闭,正在重启 (端口 ${port})...`);
32
32
  const targetDir = cdpOptions.userDataDir || DEFAULT_USER_DATA_DIR;
33
33
  // kill 并清理 Singleton 锁文件,确保 Edge 能启动新实例
34
- await killEdgeProcesses(targetDir);
34
+ await killEdgeProcesses(targetDir, port);
35
35
  await new Promise((r) => setTimeout(r, 3000));
36
36
  // 确保端口已释放后再启动
37
37
  let retries = 0;
@@ -43,7 +43,7 @@ export async function relaunchBrowser(cdpOptions, port) {
43
43
  retries++;
44
44
  console.error(` [浏览器] CDP 连接异常,重试 ${retries}/5...`);
45
45
  await new Promise((r) => setTimeout(r, 3000));
46
- await killEdgeProcesses(targetDir);
46
+ await killEdgeProcesses(targetDir, port);
47
47
  await new Promise((r) => setTimeout(r, 5000));
48
48
  continue;
49
49
  }
@@ -143,6 +143,66 @@ function initUserDb(filePath) {
143
143
  if (!existingJobColumns.has("bio_link")) {
144
144
  db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
145
145
  }
146
+ db.exec(`
147
+ CREATE TABLE IF NOT EXISTS jobs_base (
148
+ unique_id TEXT PRIMARY KEY,
149
+ nickname TEXT,
150
+ status TEXT DEFAULT 'pending',
151
+ sources TEXT,
152
+ claimed_by TEXT,
153
+ claimed_at INTEGER,
154
+ error TEXT,
155
+ pinned INTEGER DEFAULT 0,
156
+ no_video INTEGER DEFAULT 0,
157
+ restricted INTEGER DEFAULT 0,
158
+ user_update_count INTEGER DEFAULT 0,
159
+ tt_seller INTEGER,
160
+ verified INTEGER,
161
+ video_count INTEGER DEFAULT 0,
162
+ comment_count INTEGER DEFAULT 0,
163
+ guessed_location TEXT,
164
+ location_created TEXT,
165
+ confirmed_location TEXT,
166
+ modified_at INTEGER,
167
+ follower_count INTEGER DEFAULT 0,
168
+ following_count INTEGER DEFAULT 0,
169
+ heart_count INTEGER DEFAULT 0,
170
+ refresh_time INTEGER,
171
+ processed INTEGER DEFAULT 0,
172
+ processed_at INTEGER,
173
+ created_at INTEGER,
174
+ updated_at INTEGER,
175
+ region TEXT,
176
+ signature TEXT,
177
+ sec_uid TEXT,
178
+ status_code INTEGER,
179
+ latest_video_time INTEGER,
180
+ bio_link TEXT
181
+ )
182
+ `);
183
+
184
+ // 迁移:为已存在的 jobs_base 表补全列
185
+ const existingJobBaseColumns = new Set(
186
+ db
187
+ .prepare("PRAGMA table_info(jobs_base)")
188
+ .all()
189
+ .map((c) => c.name),
190
+ );
191
+ if (!existingJobBaseColumns.has("status_code")) {
192
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
193
+ }
194
+ if (!existingJobBaseColumns.has("latest_video_time")) {
195
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
196
+ }
197
+ if (!existingJobBaseColumns.has("confirmed_location")) {
198
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
199
+ }
200
+ if (!existingJobBaseColumns.has("modified_at")) {
201
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
202
+ }
203
+ if (!existingJobBaseColumns.has("bio_link")) {
204
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
205
+ }
146
206
  db.exec(`
147
207
  CREATE TABLE IF NOT EXISTS raw_jobs (
148
208
  unique_id TEXT PRIMARY KEY,
@@ -635,11 +695,16 @@ function getDashboardStatsFromDb(targetLocations = []) {
635
695
  .all(...targetLocations)
636
696
  : [];
637
697
 
698
+ const jobsBaseCount = db
699
+ .prepare("SELECT COUNT(*) as total FROM jobs_base")
700
+ .get().total;
701
+
638
702
  return {
639
703
  totalUsers: aggregateRow.total,
640
704
  rawJobs: getRawJobsCount(),
641
705
  dbTotalUsers: getUserDbCount(),
642
706
  jobsTotal: aggregateRow.total,
707
+ jobsBaseTotal: jobsBaseCount,
643
708
  jobsPending: aggregateRow.pending,
644
709
  processedUsers: aggregateRow.done,
645
710
  pendingUsers: aggregateRow.pending,
@@ -1730,6 +1795,91 @@ function inferStatus(u) {
1730
1795
  return "pending";
1731
1796
  }
1732
1797
 
1798
+ function addJobBaseToDb(user) {
1799
+ if (!db) return;
1800
+ const now = Date.now();
1801
+ db.prepare(
1802
+ `
1803
+ INSERT OR IGNORE INTO jobs_base (
1804
+ unique_id,
1805
+ nickname,
1806
+ status,
1807
+ sources,
1808
+ claimed_by,
1809
+ claimed_at,
1810
+ error,
1811
+ pinned,
1812
+ no_video,
1813
+ restricted,
1814
+ user_update_count,
1815
+ tt_seller,
1816
+ verified,
1817
+ video_count,
1818
+ comment_count,
1819
+ guessed_location,
1820
+ location_created,
1821
+ follower_count,
1822
+ following_count,
1823
+ heart_count,
1824
+ refresh_time,
1825
+ processed,
1826
+ processed_at,
1827
+ created_at,
1828
+ updated_at,
1829
+ region,
1830
+ signature,
1831
+ bio_link,
1832
+ sec_uid
1833
+ )
1834
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1835
+ `,
1836
+ ).run(
1837
+ user.uniqueId,
1838
+ user.nickname || null,
1839
+ user.status || inferStatus(user),
1840
+ JSON.stringify(
1841
+ Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
1842
+ ),
1843
+ user.claimedBy || null,
1844
+ user.claimedAt || null,
1845
+ user.error || null,
1846
+ user.pinned ? 1 : 0,
1847
+ user.noVideo ? 1 : 0,
1848
+ user.restricted ? 1 : 0,
1849
+ user.userUpdateCount || 0,
1850
+ user.ttSeller === undefined ||
1851
+ user.ttSeller === null ||
1852
+ user.ttSeller === ""
1853
+ ? null
1854
+ : user.ttSeller
1855
+ ? 1
1856
+ : 0,
1857
+ user.verified === undefined ||
1858
+ user.verified === null ||
1859
+ user.verified === ""
1860
+ ? null
1861
+ : user.verified
1862
+ ? 1
1863
+ : 0,
1864
+ user.videoCount || 0,
1865
+ user.commentCount || 0,
1866
+ user.guessedLocation || null,
1867
+ user.locationCreated || null,
1868
+ user.followerCount || 0,
1869
+ user.followingCount || 0,
1870
+ user.heartCount || 0,
1871
+ user.refreshTime || null,
1872
+ user.processed ? 1 : 0,
1873
+ user.processedAt || null,
1874
+ user.createdAt || now,
1875
+ user.updatedAt || now,
1876
+ user.region || null,
1877
+ user.signature || null,
1878
+ user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
1879
+ user.secUid || null,
1880
+ );
1881
+ }
1882
+
1733
1883
  function addJob(user) {
1734
1884
  if (!db) {
1735
1885
  addUserToDb(user);
@@ -1951,9 +2101,9 @@ export function createStore(filePath) {
1951
2101
  function addUser(user, append) {
1952
2102
  const memoryIdx = uidIndex.get(user.uniqueId);
1953
2103
  if (db && memoryIdx === undefined) {
1954
- const existingJob = getJobRow(user.uniqueId);
1955
- if (existingJob) {
1956
- return updateJobInfo(user.uniqueId, user, false);
2104
+ // users 表判重(所有发现过的用户合集),而不是 jobs 表
2105
+ if (hasUserInDb(user.uniqueId)) {
2106
+ return;
1957
2107
  }
1958
2108
  addJob(user);
1959
2109
  return;
@@ -2802,7 +2952,8 @@ export function createStore(filePath) {
2802
2952
  const newUsers = [];
2803
2953
  for (const d of unique) {
2804
2954
  if (!hasUser(d.uniqueId)) {
2805
- addJob(d);
2955
+ addUserToDb(d);
2956
+ addJobBaseToDb(d);
2806
2957
  newUsers.push(d.uniqueId);
2807
2958
  }
2808
2959
  }
@@ -3423,37 +3574,37 @@ export function createStore(filePath) {
3423
3574
  });
3424
3575
  txn(updates);
3425
3576
 
3426
- // 在事务外执行移动操作(避免嵌套 transaction 问题)
3577
+ // 批量移动非商家用户到 raw_jobs(优化:一次 SQL 搞定)
3427
3578
  if (moveList.length > 0) {
3428
- const moveTxn = db.transaction((ids) => {
3429
- ids.forEach((uid) => {
3430
- db.prepare(
3431
- `
3432
- INSERT OR REPLACE INTO raw_jobs (
3433
- unique_id, nickname, status, sources, claimed_by, claimed_at,
3434
- error, pinned, no_video, restricted, user_update_count,
3435
- tt_seller, verified, video_count, comment_count,
3436
- guessed_location, location_created, confirmed_location, modified_at,
3437
- follower_count, following_count, heart_count, refresh_time,
3438
- processed, processed_at, created_at, updated_at,
3439
- region, signature, bio_link, sec_uid, status_code, latest_video_time
3440
- )
3441
- SELECT
3442
- unique_id, nickname, status, sources, claimed_by, claimed_at,
3443
- error, pinned, no_video, restricted, user_update_count,
3444
- tt_seller, verified, video_count, comment_count,
3445
- guessed_location, location_created, confirmed_location, modified_at,
3446
- follower_count, following_count, heart_count, refresh_time,
3447
- processed, processed_at, created_at, updated_at,
3448
- region, signature, bio_link, sec_uid, status_code, latest_video_time
3449
- FROM jobs WHERE unique_id = ?
3450
- `,
3451
- ).run(uid);
3452
-
3453
- db.prepare("DELETE FROM jobs WHERE unique_id = ?").run(uid);
3454
- });
3455
- });
3456
- moveTxn(moveList);
3579
+ const placeholders = moveList.map(() => "?").join(",");
3580
+ // 批量 INSERT 到 raw_jobs
3581
+ db.prepare(
3582
+ `
3583
+ INSERT OR REPLACE INTO raw_jobs (
3584
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
3585
+ error, pinned, no_video, restricted, user_update_count,
3586
+ tt_seller, verified, video_count, comment_count,
3587
+ guessed_location, location_created, confirmed_location, modified_at,
3588
+ follower_count, following_count, heart_count, refresh_time,
3589
+ processed, processed_at, created_at, updated_at,
3590
+ region, signature, bio_link, sec_uid, status_code, latest_video_time
3591
+ )
3592
+ SELECT
3593
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
3594
+ error, pinned, no_video, restricted, user_update_count,
3595
+ tt_seller, verified, video_count, comment_count,
3596
+ guessed_location, location_created, confirmed_location, modified_at,
3597
+ follower_count, following_count, heart_count, refresh_time,
3598
+ processed, processed_at, created_at, updated_at,
3599
+ region, signature, bio_link, sec_uid, status_code, latest_video_time
3600
+ FROM jobs WHERE unique_id IN (${placeholders})
3601
+ `,
3602
+ ).run(...moveList);
3603
+
3604
+ // 批量 DELETE jobs
3605
+ db.prepare(`DELETE FROM jobs WHERE unique_id IN (${placeholders})`).run(
3606
+ ...moveList,
3607
+ );
3457
3608
  }
3458
3609
 
3459
3610
  // 清理内部标记
@@ -159,6 +159,9 @@ function renderStats() {
159
159
  flashEl("statProcessing", d.processingUsers || 0);
160
160
  flashEl("statDone", d.processedUsers);
161
161
  flashEl("statPending", d.pendingUsers);
162
+ const statJobsBase = document.getElementById("statJobsBase");
163
+ if (statJobsBase)
164
+ statJobsBase.textContent = `jobs_base: ${formatStatNum(d.jobsBaseTotal || 0)}`;
162
165
  flashEl("statError", d.errorUsers);
163
166
  flashEl("statRestricted", d.restrictedUsers);
164
167
  flashEl("statTarget", d.targetUsers, { full: true });
@@ -35,6 +35,7 @@
35
35
  <div class="stat-card clickable pending-card" id="statPendingCard" onclick="navigateToPending()">
36
36
  <div class="label">待处理</div>
37
37
  <div class="value pending" id="statPending">0</div>
38
+ <div class="value-sub" id="statJobsBase">jobs_base: 0</div>
38
39
  </div>
39
40
  <div class="stat-card">
40
41
  <div class="label">错误</div>
@@ -217,6 +217,17 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
217
217
  }
218
218
 
219
219
  // 调试接口:直接查询数据库原始数据
220
+ // 临时 API:执行 VACUUM 优化数据库
221
+ if (req.method === "POST" && routePath === "/api/db-vacuum") {
222
+ try {
223
+ store.rawQuery("VACUUM");
224
+ sendJSON(res, 200, { message: "VACUUM completed" });
225
+ } catch (e) {
226
+ sendJSON(res, 400, { error: e.message });
227
+ }
228
+ return;
229
+ }
230
+
220
231
  if (req.method === "GET" && routePath === "/api/db-query") {
221
232
  const sql = params.sql || "SELECT * FROM jobs LIMIT 10";
222
233
  const limit = Math.min(parseInt(params.limit) || 100, 1000);