tt-help-cli-ycl 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/auto-core.mjs +174 -0
  3. package/src/cli/auto.js +94 -0
  4. package/src/cli/explore.js +117 -0
  5. package/src/cli/progress.js +111 -0
  6. package/src/cli/scrape.js +47 -0
  7. package/src/cli/utils.js +18 -0
  8. package/src/cli/videos.js +41 -0
  9. package/src/cli/watch.js +28 -0
  10. package/src/data-store.mjs +213 -0
  11. package/src/{explore-core.cjs → explore-core.mjs} +148 -157
  12. package/src/{get-user-videos-core.cjs → get-user-videos-core.mjs} +6 -23
  13. package/src/lib/args.js +19 -38
  14. package/src/lib/auto-browser.mjs +5 -12
  15. package/src/lib/browser/anti-detect.js +23 -0
  16. package/src/lib/browser/cdp.js +142 -0
  17. package/src/lib/browser/launch.js +43 -0
  18. package/src/lib/browser/page.js +62 -0
  19. package/src/lib/constants.js +13 -95
  20. package/src/lib/delay.js +54 -0
  21. package/src/lib/explore.js +16 -123
  22. package/src/lib/fetcher.js +3 -18
  23. package/src/lib/get-user-videos-browser.mjs +1 -6
  24. package/src/lib/io.js +8 -30
  25. package/src/lib/parser.js +1 -1
  26. package/src/lib/retry.js +44 -0
  27. package/src/lib/scrape-browser.mjs +1 -6
  28. package/src/lib/scrape.js +5 -4
  29. package/src/lib/url.js +52 -0
  30. package/src/main.mjs +59 -822
  31. package/src/scraper/{core.cjs → core.mjs} +25 -57
  32. package/src/scraper/modules/{comment-extractor.cjs → comment-extractor.mjs} +23 -15
  33. package/src/scraper/modules/follow-extractor.mjs +121 -0
  34. package/src/scraper/modules/{guess-extractor.cjs → guess-extractor.mjs} +3 -5
  35. package/src/scraper/modules/page-error-detector.mjs +68 -0
  36. package/src/scraper/modules/page-helpers.mjs +44 -0
  37. package/src/scraper/modules/scroll-collector.mjs +189 -0
  38. package/src/watch/public/index.html +139 -64
  39. package/src/watch/server.mjs +234 -153
  40. package/src/auto-core.cjs +0 -367
  41. package/src/data-store.cjs +0 -69
  42. package/src/get-user-videos.cjs +0 -59
  43. package/src/scraper/index.cjs +0 -97
  44. package/src/scraper/modules/follow-extractor.cjs +0 -112
  45. package/src/scraper/modules/page-helpers.cjs +0 -422
  46. package/src/scraper/modules/scroll-collector.cjs +0 -173
  47. package/src/scraper/modules/video-scanner.cjs +0 -43
@@ -1,7 +1,9 @@
1
1
  import { chromium } from 'playwright';
2
- import { existsSync, accessSync } from 'fs';
3
2
  import { browser, saveBrowser, configPath } from './constants.js';
4
- import scrollCollector from '../scraper/modules/scroll-collector.cjs';
3
+ import { detectBrowser } from './browser/launch.js';
4
+ import { getAntiDetectScript } from './browser/anti-detect.js';
5
+ import { retryWithBackoff } from './retry.js';
6
+ import { scrollAndCollect } from '../scraper/modules/scroll-collector.mjs';
5
7
 
6
8
  const EXPLORE_URL = 'https://www.tiktok.com/explore';
7
9
 
@@ -9,85 +11,12 @@ function sleep(ms) {
9
11
  return new Promise(r => setTimeout(r, ms));
10
12
  }
11
13
 
12
- function isRetryableError(error) {
13
- if (!error) return false;
14
- const msg = (error.message || error.toString() || '').toLowerCase();
15
- const patterns = ['interrupted', 'net::', 'econn', 'etimedout', 'enotfound', 'eai_again', 'esocketreset', 'connection.*refused', 'connection.*reset', 'failed.*navigate', 'target.*closed', 'crash'];
16
- return patterns.some(p => new RegExp(p, 'i').test(msg));
17
- }
18
-
19
- async function retryGoto(page, url, options, { maxRetries = 3, baseDelay = 3000 } = {}) {
20
- let lastError;
21
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
22
- try {
23
- return await page.goto(url, options);
24
- } catch (error) {
25
- lastError = error;
26
- if (attempt >= maxRetries || !isRetryableError(error)) {
27
- throw error;
28
- }
29
- const jitter = Math.random() * 500;
30
- const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
31
- console.log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime)}ms 后重试...`);
32
- await sleep(waitTime);
33
- }
34
- }
35
- throw lastError;
36
- }
37
-
38
- function detectBrowser() {
39
- const isMac = process.platform === 'darwin';
40
- const isWin = process.platform === 'win32';
41
- const isLinux = process.platform === 'linux';
42
-
43
- const paths = [];
44
-
45
- if (isMac) {
46
- paths.push(
47
- '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
48
- '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary',
49
- '/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
50
- '/Applications/Brave Browser.app/Contents/MacOS/Brave Browser',
51
- );
52
- } else if (isWin) {
53
- const localAppData = process.env.LOCALAPPDATA || '';
54
- const programFiles = process.env.PROGRAMFILES || '';
55
- const programFilesX86 = process.env['PROGRAMFILES(X86)'] || '';
56
- paths.push(
57
- `${programFiles}\\Google\\Chrome\\Application\\chrome.exe`,
58
- `${programFilesX86}\\Google\\Chrome\\Application\\chrome.exe`,
59
- `${localAppData}\\Google\\Chrome\\Application\\chrome.exe`,
60
- `${programFiles}\\Microsoft\\Edge\\Application\\msedge.exe`,
61
- `${programFilesX86}\\Microsoft\\Edge\\Application\\msedge.exe`,
62
- );
63
- } else if (isLinux) {
64
- paths.push(
65
- '/usr/bin/google-chrome',
66
- '/usr/bin/google-chrome-stable',
67
- '/usr/bin/chromium-browser',
68
- '/usr/bin/chromium',
69
- '/snap/bin/chromium',
70
- '/usr/bin/microsoft-edge',
71
- );
72
- }
73
-
74
- for (const p of paths) {
75
- try {
76
- accessSync(p);
77
- return p;
78
- } catch {
79
- // 文件不存在或无权限
80
- }
81
- }
82
- return null;
83
- }
84
-
85
14
  export async function fetchExplore(count = 100) {
86
15
  let browserPath = browser;
87
16
  let browserSource = '配置';
88
17
 
89
18
  if (!browserPath) {
90
- console.log(` [0/6] 未配置浏览器,正在自动探测...`);
19
+ console.log(' [0/6] 未配置浏览器,正在自动探测...');
91
20
  const detected = detectBrowser();
92
21
  if (detected) {
93
22
  browserPath = detected;
@@ -101,9 +30,6 @@ export async function fetchExplore(count = 100) {
101
30
  }
102
31
  }
103
32
 
104
- let browserLaunched = false;
105
- let instance;
106
-
107
33
  const launchOptions = {
108
34
  headless: true,
109
35
  args: [
@@ -119,9 +45,9 @@ export async function fetchExplore(count = 100) {
119
45
  launchOptions.executablePath = browserPath;
120
46
  }
121
47
 
48
+ let instance;
122
49
  try {
123
50
  instance = await chromium.launch(launchOptions);
124
- browserLaunched = true;
125
51
  } catch (err) {
126
52
  if (browserPath) {
127
53
  console.log(` [0/6] 浏览器启动失败 (${err.message}),回退到 Playwright Chromium...`);
@@ -130,11 +56,6 @@ export async function fetchExplore(count = 100) {
130
56
  headless: true,
131
57
  args: launchOptions.args,
132
58
  });
133
- browserLaunched = true;
134
- }
135
-
136
- if (!browserLaunched) {
137
- throw new Error('无法启动浏览器,请确保已安装 Chrome/Edge 或运行 "npx playwright install chromium"');
138
59
  }
139
60
 
140
61
  try {
@@ -144,49 +65,21 @@ export async function fetchExplore(count = 100) {
144
65
  locale: 'en-US',
145
66
  });
146
67
 
147
- // 注入反检测脚本
148
- await context.addInitScript(() => {
149
- // 重写 navigator.webdriver
150
- Object.defineProperty(navigator, 'webdriver', { get: () => false });
151
-
152
- // 伪造 window.chrome
153
- if (!window.chrome) {
154
- window.chrome = { runtime: {} };
155
- }
156
-
157
- // 覆写 permissions query
158
- const originalQuery = window.navigator.permissions.query;
159
- window.navigator.permissions.query = (params) =>
160
- params.name === 'notifications'
161
- ? Promise.resolve({ state: Notification.permission })
162
- : originalQuery(params);
163
-
164
- // 覆写 languages
165
- Object.defineProperty(navigator, 'languages', {
166
- get: () => ['en-US', 'en'],
167
- });
168
-
169
- // 覆写 plugins
170
- Object.defineProperty(navigator, 'plugins', {
171
- get: () => [1, 2, 3, 4, 5],
172
- });
173
- });
68
+ await context.addInitScript(getAntiDetectScript());
174
69
 
175
70
  const page = await context.newPage();
176
- await retryGoto(page, EXPLORE_URL, { waitUntil: 'load', timeout: 30000 });
177
- console.log(` [1/6] 页面已加载`);
71
+ await retryWithBackoff(() => page.goto(EXPLORE_URL, { waitUntil: 'load', timeout: 30000 }));
72
+ console.log(' [1/6] 页面已加载');
178
73
 
179
74
  await sleep(5000);
180
75
 
181
- const allUrls = await scrollCollector.scrollAndCollect(page, {
76
+ const allUrls = await scrollAndCollect(page, {
182
77
  container: null,
183
- collectFn: () => {
184
- return {
185
- items: Array.from(document.querySelectorAll('a'))
186
- .filter(a => /\/video\/\d{16,20}/.test(a.href))
187
- .map(a => a.href),
188
- };
189
- },
78
+ collectFn: () => ({
79
+ items: Array.from(document.querySelectorAll('a'))
80
+ .filter(a => /\/video\/\d{16,20}/.test(a.href))
81
+ .map(a => a.href),
82
+ }),
190
83
  maxItems: count * 2,
191
84
  delayRange: [1500, 2500],
192
85
  staleThreshold: 5,
@@ -210,7 +103,7 @@ export async function fetchExplore(count = 100) {
210
103
  const videoId = url.match(/video\/(\d{16,20})$/)?.[1];
211
104
  if (videoId && !seen.has(videoId)) {
212
105
  seen.add(videoId);
213
- const user = url.match(/\/@([^\/]+)/)?.[1];
106
+ const user = url.match(/\/@([^/]+)/)?.[1];
214
107
  if (user) {
215
108
  results.push({ user, id: videoId, url });
216
109
  }
@@ -1,5 +1,6 @@
1
1
  import { fetch, ProxyAgent } from 'undici';
2
2
  import { DEFAULT_PROXY } from './constants.js';
3
+ import { isProfileUrl } from './url.js';
3
4
 
4
5
  const HEADERS = {
5
6
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
@@ -15,6 +16,8 @@ const HEADERS = {
15
16
  'Cache-Control': 'max-age=0',
16
17
  };
17
18
 
19
+ export { isProfileUrl } from './url.js';
20
+
18
21
  export async function fetchHtml(url, proxyUrl) {
19
22
  const p = proxyUrl || DEFAULT_PROXY;
20
23
  const agent = new ProxyAgent(p);
@@ -40,21 +43,3 @@ export async function fetchHtml(url, proxyUrl) {
40
43
 
41
44
  throw new Error(`请求 ${url} 失败(已重试 3 次),代理 ${p} 不可用`);
42
45
  }
43
-
44
- export function makeProfileUrl(handle) {
45
- if (handle.startsWith('http')) return handle;
46
- return `https://www.tiktok.com/${handle}`;
47
- }
48
-
49
- export function isProfileUrl(url) {
50
- return /\/@[\w-]+(?:$|[?#])/.test(url);
51
- }
52
-
53
- export function isVideoUrl(url) {
54
- return /\/video\/\d+/.test(url);
55
- }
56
-
57
- export function extractProfileHandle(url) {
58
- const m = url.match(/https:\/\/www\.tiktok\.com\/(@[\w-]+)/);
59
- return m ? m[1] : null;
60
- }
@@ -1,6 +1 @@
1
- import { createRequire } from 'module';
2
-
3
- const require = createRequire(import.meta.url);
4
- const core = require('../get-user-videos-core.cjs');
5
-
6
- export const runGetUserVideos = core.runGetUserVideos;
1
+ export { runGetUserVideos } from '../get-user-videos-core.mjs';
package/src/lib/io.js CHANGED
@@ -1,19 +1,7 @@
1
- import { writeFileSync, readFileSync } from 'fs';
1
+ import { extractDisplayPath } from './url.js';
2
2
 
3
3
  let lastBarCount = 0;
4
4
 
5
- export function writeOutput(data, outputFile) {
6
- const output = JSON.stringify(data, null, 2);
7
- const target = outputFile || 'tiktok_data.json';
8
- writeFileSync(target, output, 'utf-8');
9
- console.log(`结果已写入: ${target}`);
10
- }
11
-
12
- export function readUrlFile(filePath) {
13
- const content = readFileSync(filePath, 'utf-8');
14
- return content.split(/\r?\n/).map(l => l.trim()).filter(l => l.startsWith('http'));
15
- }
16
-
17
5
  export function createProgressBar(current, total, maxWidth = 30) {
18
6
  const filled = Math.round((current / total) * maxWidth);
19
7
  return '█'.repeat(filled).padEnd(maxWidth);
@@ -23,16 +11,6 @@ export function calculateConcurrency(total) {
23
11
  return Math.min(5, Math.max(1, Math.floor(total / 10)), total);
24
12
  }
25
13
 
26
- export function extractUrlDisplay(url) {
27
- try {
28
- const pathname = new URL(url).pathname;
29
- const parts = pathname.split('/').filter(Boolean);
30
- return parts.slice(-2).join('/');
31
- } catch {
32
- return url;
33
- }
34
- }
35
-
36
14
  export function createMultiProgressBars(count) {
37
15
  return Array.from({ length: count }, () => ({
38
16
  current: 0,
@@ -44,26 +22,26 @@ export function createMultiProgressBars(count) {
44
22
 
45
23
  export function renderMultiProgressBars(bars, maxWidth = 30) {
46
24
  const activeBars = bars.filter(bar => bar.total > 0);
47
-
25
+
48
26
  if (activeBars.length === 0) return;
49
-
27
+
50
28
  const lines = activeBars.map((bar) => {
51
29
  const prog = createProgressBar(bar.current, bar.total, maxWidth);
52
30
  const icon = bar.status === 'done' ? '✓' :
53
31
  bar.status === 'error' ? '' : '⟳';
54
- const urlDisplay = bar.url ? extractUrlDisplay(bar.url) : '';
32
+ const urlDisplay = bar.url ? extractDisplayPath(bar.url) : '';
55
33
  return ` [${prog}] ${bar.current}/${bar.total} ${icon} ${urlDisplay}`;
56
34
  });
57
-
35
+
58
36
  const output = lines.join('\n');
59
-
37
+
60
38
  if (lastBarCount > 0) {
61
39
  process.stdout.write(`\x1b[${lastBarCount}A`);
62
40
  }
63
-
41
+
64
42
  process.stdout.write('\x1b[0J');
65
43
  process.stdout.write(output + '\n');
66
-
44
+
67
45
  lastBarCount = activeBars.length;
68
46
  }
69
47
 
package/src/lib/parser.js CHANGED
@@ -1,4 +1,4 @@
1
- import { USER_SECTION_SIZE } from './constants.js';
1
+ export const USER_SECTION_SIZE = 12000;
2
2
 
3
3
  export function extractUserSection(html) {
4
4
  const idx = html.indexOf('"uniqueId"');
@@ -0,0 +1,44 @@
1
+ import { delay } from './delay.js';
2
+
3
+ const RETRYABLE_PATTERNS = [
4
+ 'interrupted',
5
+ 'Navigation.*interrupted',
6
+ 'net::',
7
+ 'ECONN',
8
+ 'ETIMEDOUT',
9
+ 'ENOTFOUND',
10
+ 'EAI_AGAIN',
11
+ 'ESOCKETRESET',
12
+ 'connection.*refused',
13
+ 'connection.*reset',
14
+ 'failed.*navigate',
15
+ 'target.*closed',
16
+ 'crash',
17
+ ];
18
+
19
+ export function isRetryableError(error) {
20
+ if (!error) return false;
21
+ const msg = (error.message || error.toString() || '').toLowerCase();
22
+ return RETRYABLE_PATTERNS.some(p => new RegExp(p, 'i').test(msg));
23
+ }
24
+
25
+ export async function retryWithBackoff(fn, { maxRetries = 3, baseDelay = 3000, log } = {}) {
26
+ let lastError;
27
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
28
+ try {
29
+ return await fn();
30
+ } catch (error) {
31
+ lastError = error;
32
+ if (attempt >= maxRetries || !isRetryableError(error)) {
33
+ throw error;
34
+ }
35
+ const jitter = Math.random() * 2000;
36
+ const waitTime = baseDelay * Math.pow(2, attempt) + jitter;
37
+ if (log) {
38
+ log(` [重试] ${attempt + 1}/${maxRetries},${Math.round(waitTime / 1000)}s 后重试...`);
39
+ }
40
+ await delay(Math.round(waitTime), Math.round(waitTime));
41
+ }
42
+ }
43
+ throw lastError;
44
+ }
@@ -1,6 +1 @@
1
- import { createRequire } from 'module';
2
-
3
- const require = createRequire(import.meta.url);
4
- const core = require('../scraper/core.cjs');
5
-
6
- export const runScrape = core.runScrape;
1
+ export { runScrape } from '../scraper/core.mjs';
package/src/lib/scrape.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { extractUserSection, parseUserSection, extractLocationCreated } from './parser.js';
2
- import { fetchHtml, makeProfileUrl, isProfileUrl, isVideoUrl, extractProfileHandle } from './fetcher.js';
2
+ import { fetchHtml, isProfileUrl } from './fetcher.js';
3
+ import { toProfileUrl, isVideoUrl, extractUniqueId } from './url.js';
3
4
 
4
5
  export async function extractUserData(profileUrl, proxyUrl) {
5
6
  const profileHtml = await fetchHtml(profileUrl, proxyUrl);
@@ -17,16 +18,16 @@ export async function extractVideoLocation(videoUrl, proxyUrl) {
17
18
 
18
19
  export async function processUrl(url, proxyUrl) {
19
20
  if (isProfileUrl(url)) {
20
- const profileUrl = makeProfileUrl(url);
21
+ const profileUrl = toProfileUrl(url);
21
22
  const profileData = await extractUserData(profileUrl, proxyUrl);
22
23
  return [profileData];
23
24
  }
24
25
 
25
26
  if (isVideoUrl(url)) {
26
- const profileHandle = extractProfileHandle(url);
27
+ const profileHandle = extractUniqueId(url);
27
28
  if (!profileHandle) throw new Error(`无法从视频URL提取用户主页: ${url}`);
28
29
 
29
- const profileUrl = makeProfileUrl(profileHandle);
30
+ const profileUrl = toProfileUrl(profileHandle);
30
31
  const [profileData, locationCreated] = await Promise.all([
31
32
  extractUserData(profileUrl, proxyUrl),
32
33
  extractVideoLocation(url, proxyUrl),
package/src/lib/url.js ADDED
@@ -0,0 +1,52 @@
1
+ const BASE_URL = 'https://www.tiktok.com';
2
+
3
+ export function extractUniqueId(url) {
4
+ const m = url.match(/\/@([^/]+)/);
5
+ return m ? m[1] : null;
6
+ }
7
+
8
+ export function extractVideoId(url) {
9
+ const m = url.match(/\/video\/(\d+)/);
10
+ return m ? m[1] : null;
11
+ }
12
+
13
+ export function normalizeUsername(input) {
14
+ return (input || '').replace(/^@/, '');
15
+ }
16
+
17
+ export function toProfileUrl(handle) {
18
+ const clean = normalizeUsername(handle);
19
+ return `${BASE_URL}/@${clean}`;
20
+ }
21
+
22
+ export function toVideoUrl(handle, videoId) {
23
+ const clean = normalizeUsername(handle);
24
+ return `${BASE_URL}/@${clean}/video/${videoId}`;
25
+ }
26
+
27
+ export function ensureAbsoluteUrl(href) {
28
+ if (href.startsWith('http')) return href;
29
+ return `${BASE_URL}${href}`;
30
+ }
31
+
32
+ export function isProfileUrl(url) {
33
+ return /\/@[\w-]+(?:$|[?#])/.test(url);
34
+ }
35
+
36
+ export function isVideoUrl(url) {
37
+ return /\/video\/\d+/.test(url);
38
+ }
39
+
40
+ export function extractDisplayPath(url) {
41
+ try {
42
+ const parts = new URL(url).pathname.split('/').filter(Boolean);
43
+ return parts.slice(-2).join('/');
44
+ } catch {
45
+ return url;
46
+ }
47
+ }
48
+
49
+ export function extractAuthorFromVideoUrl(url) {
50
+ const m = url.match(/@([^/]+)\/video/);
51
+ return m ? '@' + m[1] : null;
52
+ }