tt-help-cli-ycl 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +17 -17
  2. package/cli.js +9 -9
  3. package/package.json +44 -44
  4. package/src/cli/auto.js +94 -0
  5. package/src/cli/explore.js +117 -0
  6. package/src/cli/progress.js +111 -0
  7. package/src/cli/scrape.js +47 -0
  8. package/src/cli/utils.js +18 -0
  9. package/src/cli/videos.js +41 -0
  10. package/src/cli/watch.js +28 -0
  11. package/src/lib/args.js +386 -397
  12. package/src/lib/browser/anti-detect.js +23 -0
  13. package/src/lib/browser/cdp.js +142 -0
  14. package/src/lib/browser/launch.js +43 -0
  15. package/src/lib/browser/page.js +80 -0
  16. package/src/lib/constants.js +85 -168
  17. package/src/lib/delay.js +54 -0
  18. package/src/lib/explore-fetch.js +118 -0
  19. package/src/lib/fetcher.js +45 -60
  20. package/src/lib/filter.js +66 -66
  21. package/src/lib/io.js +54 -76
  22. package/src/lib/output.js +80 -80
  23. package/src/lib/parser.js +47 -47
  24. package/src/lib/retry.js +44 -0
  25. package/src/lib/scrape.js +40 -39
  26. package/src/lib/url.js +52 -0
  27. package/src/main.mjs +199 -962
  28. package/src/results/user-videos-bar.lar.lar.moeta.json +37 -0
  29. package/src/scraper/auto-core.mjs +183 -0
  30. package/src/scraper/{core.cjs → core.mjs} +188 -214
  31. package/src/{explore-core.cjs → scraper/explore-core.mjs} +44 -42
  32. package/src/scraper/modules/captcha-handler.mjs +114 -0
  33. package/src/scraper/modules/comment-extractor.mjs +69 -0
  34. package/src/scraper/modules/follow-extractor.mjs +121 -0
  35. package/src/scraper/modules/{guess-extractor.cjs → guess-extractor.mjs} +51 -53
  36. package/src/scraper/modules/page-error-detector.mjs +70 -0
  37. package/src/scraper/modules/page-helpers.mjs +46 -0
  38. package/src/scraper/modules/scroll-collector.mjs +189 -0
  39. package/src/{get-user-videos-core.cjs → videos/core.mjs} +126 -143
  40. package/src/watch/data-store.mjs +239 -0
  41. package/src/watch/public/index.html +446 -271
  42. package/src/watch/server.mjs +257 -153
  43. package/src/auto-core.cjs +0 -367
  44. package/src/data-store.cjs +0 -69
  45. package/src/get-user-videos.cjs +0 -59
  46. package/src/lib/auto-browser.mjs +0 -13
  47. package/src/lib/explore.js +0 -225
  48. package/src/lib/get-user-videos-browser.mjs +0 -6
  49. package/src/lib/scrape-browser.mjs +0 -6
  50. package/src/scraper/index.cjs +0 -97
  51. package/src/scraper/modules/comment-extractor.cjs +0 -49
  52. package/src/scraper/modules/follow-extractor.cjs +0 -112
  53. package/src/scraper/modules/page-helpers.cjs +0 -422
  54. package/src/scraper/modules/scroll-collector.cjs +0 -173
  55. package/src/scraper/modules/video-scanner.cjs +0 -43
  56. package/src/test-auto-follow.cjs +0 -109
  57. package/src/test-extractors.cjs +0 -75
  58. package/src/test-follow.cjs +0 -41
@@ -0,0 +1,189 @@
1
+ import { delay } from "../../lib/delay.js";
2
+ import { detectPageError } from "./page-error-detector.mjs";
3
+
4
+ async function doCollect(
5
+ page,
6
+ { container, findScrollable, fnStr, extraArgs },
7
+ ) {
8
+ return page.evaluate(
9
+ ({ fn: fnStr, containerSelector, findScrollableFlag, args }) => {
10
+ let el;
11
+ if (!containerSelector) {
12
+ el = window;
13
+ } else {
14
+ el = document.querySelector(containerSelector);
15
+ if (!el) {
16
+ el = window;
17
+ } else if (findScrollableFlag) {
18
+ let current = el;
19
+ let found = false;
20
+ while (current && current !== document.body) {
21
+ if (current.scrollHeight > current.clientHeight + 10) {
22
+ el = current;
23
+ found = true;
24
+ break;
25
+ }
26
+ current = current.parentElement;
27
+ }
28
+ if (!found) el = document.body;
29
+ }
30
+ }
31
+ const fn = eval("(" + fnStr + ")");
32
+ return fn(el, args);
33
+ },
34
+ {
35
+ fn: fnStr,
36
+ containerSelector: container,
37
+ findScrollableFlag: findScrollable,
38
+ args: extraArgs,
39
+ },
40
+ );
41
+ }
42
+
43
+ const LOADING_SELECTORS = [
44
+ '[class*="loading"]',
45
+ '[class*="Loading"]',
46
+ '[class*="spinner"]',
47
+ '[class*="Spinner"]',
48
+ '[class*="skeleton"]',
49
+ '[class*="Skeleton"]',
50
+ '[aria-busy="true"]',
51
+ ];
52
+
53
+ async function waitForLoading(page) {
54
+ const maxWait = 5000;
55
+ const startTime = Date.now();
56
+ while (Date.now() - startTime < maxWait) {
57
+ const isLoading = await page.evaluate((sels) => {
58
+ if (document.readyState !== "complete") return true;
59
+ for (const sel of sels) {
60
+ const el = document.querySelector(sel);
61
+ if (el && el.offsetParent !== null) return true;
62
+ }
63
+ return false;
64
+ }, LOADING_SELECTORS);
65
+ if (!isLoading) return;
66
+ await delay(300, 600);
67
+ }
68
+ }
69
+
70
+ export async function scrollAndCollect(page, options) {
71
+ const {
72
+ container,
73
+ findScrollable = false,
74
+ collectFn,
75
+ extraArgs,
76
+ delayRange = [800, 1500],
77
+ maxItems,
78
+ maxRounds = 200,
79
+ staleThreshold = 3,
80
+ uniqueKey,
81
+ onRound,
82
+ } = options;
83
+
84
+ if (!collectFn) throw new Error("collectFn is required");
85
+
86
+ const fnStr =
87
+ typeof collectFn === "function" ? collectFn.toString() : collectFn;
88
+ const allItems = [];
89
+ const seenKeys = uniqueKey ? new Set() : null;
90
+ let staleCount = 0;
91
+
92
+ const processItems = (result) => {
93
+ const raw = result.items || [];
94
+ const newItems = uniqueKey
95
+ ? raw.filter((item) => {
96
+ const key = uniqueKey(item);
97
+ if (seenKeys.has(key)) return false;
98
+ seenKeys.add(key);
99
+ return true;
100
+ })
101
+ : raw;
102
+ allItems.push(...newItems);
103
+ return newItems;
104
+ };
105
+
106
+ const isDone = (newItems) => {
107
+ if (maxItems !== undefined && allItems.length >= maxItems) return true;
108
+ if (newItems.length === 0) {
109
+ staleCount++;
110
+ if (staleCount >= staleThreshold) return true;
111
+ } else {
112
+ staleCount = 0;
113
+ }
114
+ return false;
115
+ };
116
+
117
+ const collectCtx = { container, findScrollable, fnStr, extraArgs };
118
+
119
+ const pageError = await detectPageError(page);
120
+
121
+ if (pageError) return [];
122
+
123
+ await waitForLoading(page);
124
+ let result = await doCollect(page, collectCtx);
125
+ let newItems = processItems(result);
126
+ if (onRound) onRound(0, newItems, allItems);
127
+ if (isDone(newItems)) return allItems;
128
+
129
+ for (let round = 1; round < maxRounds; round++) {
130
+ await threePhaseScroll(page, { container, findScrollable });
131
+ await delay(delayRange[0], delayRange[1]);
132
+ await waitForLoading(page);
133
+
134
+ result = await doCollect(page, collectCtx);
135
+ newItems = processItems(result);
136
+
137
+ if (onRound) onRound(round, newItems, allItems);
138
+
139
+ if (isDone(newItems)) break;
140
+ }
141
+
142
+ return allItems;
143
+ }
144
+
145
+ async function threePhaseScroll(page, { container, findScrollable }) {
146
+ await page.evaluate(
147
+ async (opts) => {
148
+ let el;
149
+ if (!opts.container) {
150
+ el = window;
151
+ } else {
152
+ el = document.querySelector(opts.container);
153
+ if (!el) {
154
+ el = window;
155
+ } else if (opts.findScrollable) {
156
+ let current = el;
157
+ let found = false;
158
+ while (current && current !== document.body) {
159
+ if (current.scrollHeight > current.clientHeight + 10) {
160
+ el = current;
161
+ found = true;
162
+ break;
163
+ }
164
+ current = current.parentElement;
165
+ }
166
+ if (!found) el = document.body;
167
+ }
168
+ }
169
+
170
+ const randDelay = (min, max) =>
171
+ new Promise((r) => setTimeout(r, min + Math.random() * (max - min)));
172
+
173
+ if (el === window) {
174
+ window.scrollBy(0, window.innerHeight);
175
+ await randDelay(400, 800);
176
+ window.scrollBy(0, -200);
177
+ await randDelay(200, 400);
178
+ window.scrollBy(0, window.innerHeight);
179
+ } else {
180
+ el.scrollTop = el.scrollHeight;
181
+ await randDelay(400, 800);
182
+ el.scrollTop -= 100 + Math.random() * 100;
183
+ await randDelay(200, 400);
184
+ el.scrollTop = el.scrollHeight;
185
+ }
186
+ },
187
+ { container, findScrollable },
188
+ );
189
+ }
@@ -1,143 +1,126 @@
1
- const { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } = require('./scraper/modules/page-helpers.cjs');
2
- const { scrollAndCollect } = require('./scraper/modules/scroll-collector.cjs');
3
-
4
- async function getUserInfo(page) {
5
- return await page.evaluate(() => {
6
- const html = document.documentElement.outerHTML;
7
- const result = {};
8
-
9
- const m = window.location.href.match(/\/@([^\/]+)/);
10
- if (m) result.uniqueId = m[1];
11
-
12
- const patterns = {
13
- secUid: /"secUid":"([^"]+)"/,
14
- nickname: /"nickname":"((?:[^"\\]|\\.)*)"/,
15
- ttSeller: /"ttSeller":\s*(true|false)/,
16
- verified: /"verified":\s*(true|false)/,
17
- followerCount: /"followerCount":(\d+)/,
18
- videoCount: /"videoCount":(\d+)/,
19
- followingCount: /"followingCount":(\d+)/,
20
- heartCount: /"heartCount":(\d+)/,
21
- signature: /"signature":"((?:[^"\\]|\\.)*)"/,
22
- locationCreated: /"locationCreated":"([^"]*)/,
23
- region: /"region":"([^"]*)/,
24
- };
25
-
26
- const boolKeys = ['ttSeller', 'verified'];
27
- const numKeys = ['followerCount', 'videoCount', 'followingCount', 'heartCount'];
28
-
29
- for (const [key, pat] of Object.entries(patterns)) {
30
- const match = html.match(pat);
31
- if (match) {
32
- if (boolKeys.includes(key)) result[key] = match[1] === 'true';
33
- else if (numKeys.includes(key)) result[key] = parseInt(match[1], 10);
34
- else if (key === 'signature') result[key] = match[1].replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
35
- else result[key] = match[1];
36
- }
37
- }
38
-
39
- return result;
40
- });
41
- }
42
-
43
- async function collectVideos(page, username, maxVideos, log) {
44
- const allLinks = await scrollAndCollect(page, {
45
- container: '[class*="ColumnListContainer"]',
46
- extraArgs: { handle: username },
47
- collectFn: (container, args) => {
48
- const pattern = '/@' + args.handle + '/video/';
49
- return {
50
- items: Array.from(document.querySelectorAll('a'))
51
- .filter(el => (el.getAttribute('href') || '').includes(pattern))
52
- .map(el => {
53
- const href = el.getAttribute('href') || '';
54
- const idMatch = href.match(/\/video\/(\d+)/);
55
- return { id: idMatch ? idMatch[1] : null, href };
56
- })
57
- .filter(v => v.id),
58
- };
59
- },
60
- maxItems: maxVideos,
61
- delayRange: [2000, 3000],
62
- staleThreshold: 5,
63
- maxRounds: 500,
64
- onRound: (round, items, allItems) => {
65
- const uniqueCount = new Set(allItems.map(v => v.id)).size;
66
- if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
67
- log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
68
- }
69
- },
70
- });
71
-
72
- // 去重
73
- const uniqueVideos = new Map();
74
- allLinks.forEach(v => {
75
- if (!uniqueVideos.has(v.id)) {
76
- uniqueVideos.set(v.id, v);
77
- }
78
- });
79
-
80
- log(`收集完成: ${uniqueVideos.size} 个视频`);
81
- return uniqueVideos;
82
- }
83
-
84
- async function runGetUserVideos(options) {
85
- const {
86
- username,
87
- maxVideos = 5,
88
- log = console.error,
89
- } = options;
90
-
91
- const url = `https://www.tiktok.com/@${username}`;
92
-
93
- log(`用户: @${username}`);
94
- log(`URL: ${url}`);
95
- log(`最大视频数: ${maxVideos}\n`);
96
-
97
- log('连接浏览器...');
98
- const browser = await ensureBrowserReady();
99
-
100
- let page;
101
- try {
102
- page = await ensureTikTokPage(browser, url);
103
- } catch (e) {
104
- await browser.close().catch(() => {});
105
- throw e;
106
- }
107
-
108
- await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }), { log });
109
- await delay(3000, 5000);
110
- await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
111
-
112
- log('获取用户信息...');
113
- const userInfo = await getUserInfo(page);
114
- log('用户信息: ' + JSON.stringify(userInfo, null, 2));
115
-
116
- log('\n开始滚动收集视频...');
117
- const videos = await collectVideos(page, username, maxVideos, log);
118
- const allVideos = Array.from(videos.values());
119
-
120
- log(`\n总计: ${allVideos.length} 个视频`);
121
-
122
- const output = {
123
- user: userInfo,
124
- totalVideos: Math.min(allVideos.length, maxVideos),
125
- videos: allVideos.slice(0, maxVideos).map(v => ({
126
- id: v.id,
127
- url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
128
- })),
129
- };
130
-
131
- return { output, browser };
132
- }
133
-
134
- async function isPageRestricted(page) {
135
- return await page.evaluate(() => {
136
- const bodyText = document.body.innerText;
137
- return !!(bodyText.includes('登录 TikTok') ||
138
- bodyText.includes('观众管理功能') ||
139
- bodyText.includes('Login to TikTok'));
140
- });
141
- }
142
-
143
- module.exports = { getUserInfo, collectVideos, runGetUserVideos, isPageRestricted };
1
+ import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.mjs';
2
+ import { scrollAndCollect } from '../scraper/modules/scroll-collector.mjs';
3
+
4
+ async function getUserInfo(page) {
5
+ return await page.evaluate(() => {
6
+ const html = document.documentElement.outerHTML;
7
+ const result = {};
8
+
9
+ const m = window.location.href.match(/\/@([^/]+)/);
10
+ if (m) result.uniqueId = m[1];
11
+
12
+ const patterns = {
13
+ secUid: /"secUid":"([^"]+)"/,
14
+ nickname: /"nickname":"((?:[^"\\]|\\.)*)"/,
15
+ ttSeller: /"ttSeller":\s*(true|false)/,
16
+ verified: /"verified":\s*(true|false)/,
17
+ followerCount: /"followerCount":(\d+)/,
18
+ videoCount: /"videoCount":(\d+)/,
19
+ followingCount: /"followingCount":(\d+)/,
20
+ heartCount: /"heartCount":(\d+)/,
21
+ signature: /"signature":"((?:[^"\\]|\\.)*)"/,
22
+ locationCreated: /"locationCreated":"([^"]*)/,
23
+ region: /"region":"([^"]*)/,
24
+ };
25
+
26
+ const boolKeys = ['ttSeller', 'verified'];
27
+ const numKeys = ['followerCount', 'videoCount', 'followingCount', 'heartCount'];
28
+
29
+ for (const [key, pat] of Object.entries(patterns)) {
30
+ const match = html.match(pat);
31
+ if (match) {
32
+ if (boolKeys.includes(key)) result[key] = match[1] === 'true';
33
+ else if (numKeys.includes(key)) result[key] = parseInt(match[1], 10);
34
+ else if (key === 'signature') result[key] = match[1].replace(/\\n/g, '\n').replace(/\\\\/g, '\\');
35
+ else result[key] = match[1];
36
+ }
37
+ }
38
+
39
+ return result;
40
+ });
41
+ }
42
+
43
+ async function collectVideos(page, username, maxVideos, log) {
44
+ const allLinks = await scrollAndCollect(page, {
45
+ container: '[class*="ColumnListContainer"]',
46
+ extraArgs: { handle: username },
47
+ collectFn: (container, args) => {
48
+ const pattern = '/@' + args.handle + '/video/';
49
+ return {
50
+ items: Array.from(document.querySelectorAll('a'))
51
+ .filter(el => (el.getAttribute('href') || '').includes(pattern))
52
+ .map(el => {
53
+ const href = el.getAttribute('href') || '';
54
+ const idMatch = href.match(/\/video\/(\d+)/);
55
+ return { id: idMatch ? idMatch[1] : null, href };
56
+ })
57
+ .filter(v => v.id),
58
+ };
59
+ },
60
+ maxItems: maxVideos,
61
+ delayRange: [2000, 3000],
62
+ staleThreshold: 5,
63
+ maxRounds: 500,
64
+ onRound: (round, items, allItems) => {
65
+ const uniqueCount = new Set(allItems.map(v => v.id)).size;
66
+ if (uniqueCount > 0 && (uniqueCount % 10 === 0 || items.length > 0)) {
67
+ log(`滚动 ${round + 1}: ${uniqueCount} 个视频 (本轮 ${items.length} 条)`);
68
+ }
69
+ },
70
+ });
71
+
72
+ const uniqueVideos = new Map();
73
+ allLinks.forEach(v => {
74
+ if (!uniqueVideos.has(v.id)) uniqueVideos.set(v.id, v);
75
+ });
76
+
77
+ log(`收集完成: ${uniqueVideos.size} 个视频`);
78
+ return uniqueVideos;
79
+ }
80
+
81
+ async function runGetUserVideos(options) {
82
+ const { username, maxVideos = 5, log = console.error } = options;
83
+ const url = `https://www.tiktok.com/@${username}`;
84
+
85
+ log(`用户: @${username}`);
86
+ log(`URL: ${url}`);
87
+ log(`最大视频数: ${maxVideos}\n`);
88
+
89
+ log('连接浏览器...');
90
+ const browser = await ensureBrowserReady();
91
+
92
+ let page;
93
+ try {
94
+ page = await ensureTikTokPage(browser, url);
95
+ } catch (e) {
96
+ await browser.close().catch(() => {});
97
+ throw e;
98
+ }
99
+
100
+ await retryWithBackoff(() => page.goto(url, { waitUntil: 'load', timeout: 30000 }), { log });
101
+ await delay(3000, 5000);
102
+ await page.waitForSelector('[class*="DivVideoList"]', { timeout: 10000 }).catch(() => {});
103
+
104
+ log('获取用户信息...');
105
+ const userInfo = await getUserInfo(page);
106
+ log('用户信息: ' + JSON.stringify(userInfo, null, 2));
107
+
108
+ log('\n开始滚动收集视频...');
109
+ const videos = await collectVideos(page, username, maxVideos, log);
110
+ const allVideos = Array.from(videos.values());
111
+
112
+ log(`\n总计: ${allVideos.length} 个视频`);
113
+
114
+ const output = {
115
+ user: userInfo,
116
+ totalVideos: Math.min(allVideos.length, maxVideos),
117
+ videos: allVideos.slice(0, maxVideos).map(v => ({
118
+ id: v.id,
119
+ url: v.href.startsWith('http') ? v.href : `https://www.tiktok.com${v.href}`,
120
+ })),
121
+ };
122
+
123
+ return { output, browser };
124
+ }
125
+
126
+ export { getUserInfo, collectVideos, runGetUserVideos };