tt-help-cli-ycl 1.3.78 → 1.3.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tt-help-cli-ycl",
3
- "version": "1.3.78",
3
+ "version": "1.3.79",
4
4
  "description": "TikTok user & video data scraper - extract ttSeller, verified, locationCreated from HTML source",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli/attach.js CHANGED
@@ -83,6 +83,7 @@ export async function handleAttach(options) {
83
83
  serverUrl,
84
84
  attachCountries,
85
85
  customProxy,
86
+ attachPoolSize,
86
87
  showHelp,
87
88
  } = options;
88
89
  const effectiveProxy = customProxy || configuredProxy;
@@ -110,6 +111,9 @@ export async function handleAttach(options) {
110
111
  attachLog(
111
112
  " --proxy <代理地址> HTTP 代理地址(如 http://127.0.0.1:7890),不配置则从 ~/.tt-help.json 读取",
112
113
  );
114
+ attachLog(
115
+ " --size <N> 浏览器页面池大小(默认: 3,不建议超过 3)",
116
+ );
113
117
  attachLog("");
114
118
  attachLog("说明:");
115
119
  attachLog(
@@ -117,12 +121,16 @@ export async function handleAttach(options) {
117
121
  );
118
122
  attachLog(" 抓取完成后通过 POST /api/user-info-batch 批量回传结果");
119
123
  attachLog(" 浏览器崩溃时自动重启,支持长时间无人值守运行");
124
+ attachLog(
125
+ " --size 控制单个浏览器内并发的 page 数,增大可提升吞吐但可能触发 TikTok 限流",
126
+ );
120
127
  attachLog("");
121
128
  attachLog("示例:");
122
129
  attachLog(" tt-help attach");
123
130
  attachLog(" tt-help attach -p 5 -i 10");
124
131
  attachLog(" tt-help attach -p 3 -i 5 -s http://127.0.0.1:3001");
125
132
  attachLog(" tt-help attach -c PL,DE,FR -p 5");
133
+ attachLog(" tt-help attach -p 5 --size 5");
126
134
  return;
127
135
  }
128
136
 
@@ -135,6 +143,7 @@ export async function handleAttach(options) {
135
143
  );
136
144
 
137
145
  const scraper = new TikTokScraper({
146
+ poolSize: attachPoolSize || 3,
138
147
  proxyServer: effectiveProxy || null,
139
148
  });
140
149
  const shutdown = async (signal) => {
package/src/lib/args.js CHANGED
@@ -534,6 +534,7 @@ function parseAttachArgs(args) {
534
534
  let serverUrl = defaultServer;
535
535
  let countries = [];
536
536
  let customProxy = null;
537
+ let poolSize = 3;
537
538
 
538
539
  for (let i = 0; i < args.length; i++) {
539
540
  const arg = args[i];
@@ -550,6 +551,8 @@ function parseAttachArgs(args) {
550
551
  .filter(Boolean);
551
552
  } else if (arg === "--proxy") {
552
553
  customProxy = args[++i];
554
+ } else if (arg === "--size") {
555
+ poolSize = parseInt(args[++i], 10) || 3;
553
556
  }
554
557
  }
555
558
 
@@ -560,6 +563,7 @@ function parseAttachArgs(args) {
560
563
  serverUrl,
561
564
  attachCountries: countries,
562
565
  customProxy,
566
+ attachPoolSize: poolSize,
563
567
  urls: [],
564
568
  outputFormat: "json",
565
569
  exploreCount: 0,
@@ -143,6 +143,66 @@ function initUserDb(filePath) {
143
143
  if (!existingJobColumns.has("bio_link")) {
144
144
  db.exec(`ALTER TABLE jobs ADD COLUMN bio_link TEXT`);
145
145
  }
146
+ db.exec(`
147
+ CREATE TABLE IF NOT EXISTS jobs_base (
148
+ unique_id TEXT PRIMARY KEY,
149
+ nickname TEXT,
150
+ status TEXT DEFAULT 'pending',
151
+ sources TEXT,
152
+ claimed_by TEXT,
153
+ claimed_at INTEGER,
154
+ error TEXT,
155
+ pinned INTEGER DEFAULT 0,
156
+ no_video INTEGER DEFAULT 0,
157
+ restricted INTEGER DEFAULT 0,
158
+ user_update_count INTEGER DEFAULT 0,
159
+ tt_seller INTEGER,
160
+ verified INTEGER,
161
+ video_count INTEGER DEFAULT 0,
162
+ comment_count INTEGER DEFAULT 0,
163
+ guessed_location TEXT,
164
+ location_created TEXT,
165
+ confirmed_location TEXT,
166
+ modified_at INTEGER,
167
+ follower_count INTEGER DEFAULT 0,
168
+ following_count INTEGER DEFAULT 0,
169
+ heart_count INTEGER DEFAULT 0,
170
+ refresh_time INTEGER,
171
+ processed INTEGER DEFAULT 0,
172
+ processed_at INTEGER,
173
+ created_at INTEGER,
174
+ updated_at INTEGER,
175
+ region TEXT,
176
+ signature TEXT,
177
+ sec_uid TEXT,
178
+ status_code INTEGER,
179
+ latest_video_time INTEGER,
180
+ bio_link TEXT
181
+ )
182
+ `);
183
+
184
+ // 迁移:为已存在的 jobs_base 表补全列
185
+ const existingJobBaseColumns = new Set(
186
+ db
187
+ .prepare("PRAGMA table_info(jobs_base)")
188
+ .all()
189
+ .map((c) => c.name),
190
+ );
191
+ if (!existingJobBaseColumns.has("status_code")) {
192
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN status_code INTEGER`);
193
+ }
194
+ if (!existingJobBaseColumns.has("latest_video_time")) {
195
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN latest_video_time INTEGER`);
196
+ }
197
+ if (!existingJobBaseColumns.has("confirmed_location")) {
198
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN confirmed_location TEXT`);
199
+ }
200
+ if (!existingJobBaseColumns.has("modified_at")) {
201
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN modified_at INTEGER`);
202
+ }
203
+ if (!existingJobBaseColumns.has("bio_link")) {
204
+ db.exec(`ALTER TABLE jobs_base ADD COLUMN bio_link TEXT`);
205
+ }
146
206
  db.exec(`
147
207
  CREATE TABLE IF NOT EXISTS raw_jobs (
148
208
  unique_id TEXT PRIMARY KEY,
@@ -635,11 +695,16 @@ function getDashboardStatsFromDb(targetLocations = []) {
635
695
  .all(...targetLocations)
636
696
  : [];
637
697
 
698
+ const jobsBaseCount = db
699
+ .prepare("SELECT COUNT(*) as total FROM jobs_base")
700
+ .get().total;
701
+
638
702
  return {
639
703
  totalUsers: aggregateRow.total,
640
704
  rawJobs: getRawJobsCount(),
641
705
  dbTotalUsers: getUserDbCount(),
642
706
  jobsTotal: aggregateRow.total,
707
+ jobsBaseTotal: jobsBaseCount,
643
708
  jobsPending: aggregateRow.pending,
644
709
  processedUsers: aggregateRow.done,
645
710
  pendingUsers: aggregateRow.pending,
@@ -1730,6 +1795,91 @@ function inferStatus(u) {
1730
1795
  return "pending";
1731
1796
  }
1732
1797
 
1798
+ function addJobBaseToDb(user) {
1799
+ if (!db) return;
1800
+ const now = Date.now();
1801
+ db.prepare(
1802
+ `
1803
+ INSERT OR IGNORE INTO jobs_base (
1804
+ unique_id,
1805
+ nickname,
1806
+ status,
1807
+ sources,
1808
+ claimed_by,
1809
+ claimed_at,
1810
+ error,
1811
+ pinned,
1812
+ no_video,
1813
+ restricted,
1814
+ user_update_count,
1815
+ tt_seller,
1816
+ verified,
1817
+ video_count,
1818
+ comment_count,
1819
+ guessed_location,
1820
+ location_created,
1821
+ follower_count,
1822
+ following_count,
1823
+ heart_count,
1824
+ refresh_time,
1825
+ processed,
1826
+ processed_at,
1827
+ created_at,
1828
+ updated_at,
1829
+ region,
1830
+ signature,
1831
+ bio_link,
1832
+ sec_uid
1833
+ )
1834
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1835
+ `,
1836
+ ).run(
1837
+ user.uniqueId,
1838
+ user.nickname || null,
1839
+ user.status || inferStatus(user),
1840
+ JSON.stringify(
1841
+ Array.isArray(user.sources) ? [...new Set(user.sources)] : [],
1842
+ ),
1843
+ user.claimedBy || null,
1844
+ user.claimedAt || null,
1845
+ user.error || null,
1846
+ user.pinned ? 1 : 0,
1847
+ user.noVideo ? 1 : 0,
1848
+ user.restricted ? 1 : 0,
1849
+ user.userUpdateCount || 0,
1850
+ user.ttSeller === undefined ||
1851
+ user.ttSeller === null ||
1852
+ user.ttSeller === ""
1853
+ ? null
1854
+ : user.ttSeller
1855
+ ? 1
1856
+ : 0,
1857
+ user.verified === undefined ||
1858
+ user.verified === null ||
1859
+ user.verified === ""
1860
+ ? null
1861
+ : user.verified
1862
+ ? 1
1863
+ : 0,
1864
+ user.videoCount || 0,
1865
+ user.commentCount || 0,
1866
+ user.guessedLocation || null,
1867
+ user.locationCreated || null,
1868
+ user.followerCount || 0,
1869
+ user.followingCount || 0,
1870
+ user.heartCount || 0,
1871
+ user.refreshTime || null,
1872
+ user.processed ? 1 : 0,
1873
+ user.processedAt || null,
1874
+ user.createdAt || now,
1875
+ user.updatedAt || now,
1876
+ user.region || null,
1877
+ user.signature || null,
1878
+ user.bioLink?.link || user.bioLink?.url || user.bioLink || null,
1879
+ user.secUid || null,
1880
+ );
1881
+ }
1882
+
1733
1883
  function addJob(user) {
1734
1884
  if (!db) {
1735
1885
  addUserToDb(user);
@@ -1951,9 +2101,9 @@ export function createStore(filePath) {
1951
2101
  function addUser(user, append) {
1952
2102
  const memoryIdx = uidIndex.get(user.uniqueId);
1953
2103
  if (db && memoryIdx === undefined) {
1954
- const existingJob = getJobRow(user.uniqueId);
1955
- if (existingJob) {
1956
- return updateJobInfo(user.uniqueId, user, false);
2104
+ // users 表判重(所有发现过的用户合集),而不是 jobs 表
2105
+ if (hasUserInDb(user.uniqueId)) {
2106
+ return;
1957
2107
  }
1958
2108
  addJob(user);
1959
2109
  return;
@@ -2802,7 +2952,8 @@ export function createStore(filePath) {
2802
2952
  const newUsers = [];
2803
2953
  for (const d of unique) {
2804
2954
  if (!hasUser(d.uniqueId)) {
2805
- addJob(d);
2955
+ addUserToDb(d);
2956
+ addJobBaseToDb(d);
2806
2957
  newUsers.push(d.uniqueId);
2807
2958
  }
2808
2959
  }
@@ -3423,37 +3574,37 @@ export function createStore(filePath) {
3423
3574
  });
3424
3575
  txn(updates);
3425
3576
 
3426
- // 在事务外执行移动操作(避免嵌套 transaction 问题)
3577
+ // 批量移动非商家用户到 raw_jobs(优化:一次 SQL 搞定)
3427
3578
  if (moveList.length > 0) {
3428
- const moveTxn = db.transaction((ids) => {
3429
- ids.forEach((uid) => {
3430
- db.prepare(
3431
- `
3432
- INSERT OR REPLACE INTO raw_jobs (
3433
- unique_id, nickname, status, sources, claimed_by, claimed_at,
3434
- error, pinned, no_video, restricted, user_update_count,
3435
- tt_seller, verified, video_count, comment_count,
3436
- guessed_location, location_created, confirmed_location, modified_at,
3437
- follower_count, following_count, heart_count, refresh_time,
3438
- processed, processed_at, created_at, updated_at,
3439
- region, signature, bio_link, sec_uid, status_code, latest_video_time
3440
- )
3441
- SELECT
3442
- unique_id, nickname, status, sources, claimed_by, claimed_at,
3443
- error, pinned, no_video, restricted, user_update_count,
3444
- tt_seller, verified, video_count, comment_count,
3445
- guessed_location, location_created, confirmed_location, modified_at,
3446
- follower_count, following_count, heart_count, refresh_time,
3447
- processed, processed_at, created_at, updated_at,
3448
- region, signature, bio_link, sec_uid, status_code, latest_video_time
3449
- FROM jobs WHERE unique_id = ?
3450
- `,
3451
- ).run(uid);
3452
-
3453
- db.prepare("DELETE FROM jobs WHERE unique_id = ?").run(uid);
3454
- });
3455
- });
3456
- moveTxn(moveList);
3579
+ const placeholders = moveList.map(() => "?").join(",");
3580
+ // 批量 INSERT 到 raw_jobs
3581
+ db.prepare(
3582
+ `
3583
+ INSERT OR REPLACE INTO raw_jobs (
3584
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
3585
+ error, pinned, no_video, restricted, user_update_count,
3586
+ tt_seller, verified, video_count, comment_count,
3587
+ guessed_location, location_created, confirmed_location, modified_at,
3588
+ follower_count, following_count, heart_count, refresh_time,
3589
+ processed, processed_at, created_at, updated_at,
3590
+ region, signature, bio_link, sec_uid, status_code, latest_video_time
3591
+ )
3592
+ SELECT
3593
+ unique_id, nickname, status, sources, claimed_by, claimed_at,
3594
+ error, pinned, no_video, restricted, user_update_count,
3595
+ tt_seller, verified, video_count, comment_count,
3596
+ guessed_location, location_created, confirmed_location, modified_at,
3597
+ follower_count, following_count, heart_count, refresh_time,
3598
+ processed, processed_at, created_at, updated_at,
3599
+ region, signature, bio_link, sec_uid, status_code, latest_video_time
3600
+ FROM jobs WHERE unique_id IN (${placeholders})
3601
+ `,
3602
+ ).run(...moveList);
3603
+
3604
+ // 批量 DELETE jobs
3605
+ db.prepare(`DELETE FROM jobs WHERE unique_id IN (${placeholders})`).run(
3606
+ ...moveList,
3607
+ );
3457
3608
  }
3458
3609
 
3459
3610
  // 清理内部标记
@@ -159,6 +159,9 @@ function renderStats() {
159
159
  flashEl("statProcessing", d.processingUsers || 0);
160
160
  flashEl("statDone", d.processedUsers);
161
161
  flashEl("statPending", d.pendingUsers);
162
+ const statJobsBase = document.getElementById("statJobsBase");
163
+ if (statJobsBase)
164
+ statJobsBase.textContent = `jobs_base: ${formatStatNum(d.jobsBaseTotal || 0)}`;
162
165
  flashEl("statError", d.errorUsers);
163
166
  flashEl("statRestricted", d.restrictedUsers);
164
167
  flashEl("statTarget", d.targetUsers, { full: true });
@@ -35,6 +35,7 @@
35
35
  <div class="stat-card clickable pending-card" id="statPendingCard" onclick="navigateToPending()">
36
36
  <div class="label">待处理</div>
37
37
  <div class="value pending" id="statPending">0</div>
38
+ <div class="value-sub" id="statJobsBase">jobs_base: 0</div>
38
39
  </div>
39
40
  <div class="stat-card">
40
41
  <div class="label">错误</div>
@@ -217,6 +217,17 @@ export function startWatchServer(dataAnchor, port = 3000, existingStore) {
217
217
  }
218
218
 
219
219
  // 调试接口:直接查询数据库原始数据
220
+ // 临时 API:执行 VACUUM 优化数据库
221
+ if (req.method === "POST" && routePath === "/api/db-vacuum") {
222
+ try {
223
+ store.rawQuery("VACUUM");
224
+ sendJSON(res, 200, { message: "VACUUM completed" });
225
+ } catch (e) {
226
+ sendJSON(res, 400, { error: e.message });
227
+ }
228
+ return;
229
+ }
230
+
220
231
  if (req.method === "GET" && routePath === "/api/db-query") {
221
232
  const sql = params.sql || "SELECT * FROM jobs LIMIT 10";
222
233
  const limit = Math.min(parseInt(params.limit) || 100, 1000);