tt-help-cli-ycl 1.3.11 → 1.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/cli.js +1 -1
  2. package/package.json +4 -5
  3. package/src/cli/auto.js +1 -1
  4. package/src/cli/config.js +116 -0
  5. package/src/cli/explore-default.js +83 -0
  6. package/src/cli/explore.js +16 -4
  7. package/src/cli/scrape.js +1 -1
  8. package/src/cli/videos.js +1 -1
  9. package/src/cli/watch.js +4 -4
  10. package/src/lib/args.js +12 -1
  11. package/src/lib/browser/cdp.js +152 -142
  12. package/src/lib/constants.js +0 -4
  13. package/src/lib/explore-fetch.js +1 -1
  14. package/src/{scraper/modules/page-error-detector.mjs → lib/page-error-detector.js} +70 -70
  15. package/src/{scraper/modules/scroll-collector.mjs → lib/scroll-collector.js} +231 -189
  16. package/src/main.js +46 -0
  17. package/src/scraper/{auto-core.mjs → auto-core.js} +5 -5
  18. package/src/scraper/{core.mjs → core.js} +3 -3
  19. package/src/scraper/{explore-core.mjs → explore-core.js} +7 -7
  20. package/src/scraper/modules/{comment-extractor.mjs → comment-extractor.js} +3 -3
  21. package/src/scraper/modules/{follow-extractor.mjs → follow-extractor.js} +2 -2
  22. package/src/scraper/modules/{guess-extractor.mjs → guess-extractor.js} +2 -2
  23. package/src/scraper/modules/page-error-detector.js +1 -0
  24. package/src/scraper/modules/{page-helpers.mjs → page-helpers.js} +1 -1
  25. package/src/scraper/modules/scroll-collector.js +8 -0
  26. package/src/videos/{core.mjs → core.js} +2 -2
  27. package/src/watch/{data-store.mjs → data-store.js} +38 -10
  28. package/src/watch/public/index.html +13 -2
  29. package/src/watch/{server.mjs → server.js} +16 -5
  30. package/src/main.mjs +0 -234
  31. package/src/test-auto-follow.cjs +0 -109
  32. package/src/test-extractors.cjs +0 -75
  33. package/src/test-follow.cjs +0 -41
  34. /package/{bat → scripts}/run-explore.bat +0 -0
  35. /package/{bat → scripts}/run-explore.ps1 +0 -0
  36. /package/{bat → scripts}/run-explore.sh +0 -0
  37. /package/src/scraper/modules/{captcha-handler.mjs → captcha-handler.js} +0 -0
@@ -1,189 +1,231 @@
1
- import { delay } from "../../lib/delay.js";
2
- import { detectPageError } from "./page-error-detector.mjs";
3
-
4
- async function doCollect(
5
- page,
6
- { container, findScrollable, fnStr, extraArgs },
7
- ) {
8
- return page.evaluate(
9
- ({ fn: fnStr, containerSelector, findScrollableFlag, args }) => {
10
- let el;
11
- if (!containerSelector) {
12
- el = window;
13
- } else {
14
- el = document.querySelector(containerSelector);
15
- if (!el) {
16
- el = window;
17
- } else if (findScrollableFlag) {
18
- let current = el;
19
- let found = false;
20
- while (current && current !== document.body) {
21
- if (current.scrollHeight > current.clientHeight + 10) {
22
- el = current;
23
- found = true;
24
- break;
25
- }
26
- current = current.parentElement;
27
- }
28
- if (!found) el = document.body;
29
- }
30
- }
31
- const fn = eval("(" + fnStr + ")");
32
- return fn(el, args);
33
- },
34
- {
35
- fn: fnStr,
36
- containerSelector: container,
37
- findScrollableFlag: findScrollable,
38
- args: extraArgs,
39
- },
40
- );
41
- }
42
-
43
- const LOADING_SELECTORS = [
44
- '[class*="loading"]',
45
- '[class*="Loading"]',
46
- '[class*="spinner"]',
47
- '[class*="Spinner"]',
48
- '[class*="skeleton"]',
49
- '[class*="Skeleton"]',
50
- '[aria-busy="true"]',
51
- ];
52
-
53
- async function waitForLoading(page) {
54
- const maxWait = 5000;
55
- const startTime = Date.now();
56
- while (Date.now() - startTime < maxWait) {
57
- const isLoading = await page.evaluate((sels) => {
58
- if (document.readyState !== "complete") return true;
59
- for (const sel of sels) {
60
- const el = document.querySelector(sel);
61
- if (el && el.offsetParent !== null) return true;
62
- }
63
- return false;
64
- }, LOADING_SELECTORS);
65
- if (!isLoading) return;
66
- await delay(300, 600);
67
- }
68
- }
69
-
70
- export async function scrollAndCollect(page, options) {
71
- const {
72
- container,
73
- findScrollable = false,
74
- collectFn,
75
- extraArgs,
76
- delayRange = [800, 1500],
77
- maxItems,
78
- maxRounds = 200,
79
- staleThreshold = 3,
80
- uniqueKey,
81
- onRound,
82
- } = options;
83
-
84
- if (!collectFn) throw new Error("collectFn is required");
85
-
86
- const fnStr =
87
- typeof collectFn === "function" ? collectFn.toString() : collectFn;
88
- const allItems = [];
89
- const seenKeys = uniqueKey ? new Set() : null;
90
- let staleCount = 0;
91
-
92
- const processItems = (result) => {
93
- const raw = result.items || [];
94
- const newItems = uniqueKey
95
- ? raw.filter((item) => {
96
- const key = uniqueKey(item);
97
- if (seenKeys.has(key)) return false;
98
- seenKeys.add(key);
99
- return true;
100
- })
101
- : raw;
102
- allItems.push(...newItems);
103
- return newItems;
104
- };
105
-
106
- const isDone = (newItems) => {
107
- if (maxItems !== undefined && allItems.length >= maxItems) return true;
108
- if (newItems.length === 0) {
109
- staleCount++;
110
- if (staleCount >= staleThreshold) return true;
111
- } else {
112
- staleCount = 0;
113
- }
114
- return false;
115
- };
116
-
117
- const collectCtx = { container, findScrollable, fnStr, extraArgs };
118
-
119
- const pageError = await detectPageError(page);
120
-
121
- if (pageError) return [];
122
-
123
- await waitForLoading(page);
124
- let result = await doCollect(page, collectCtx);
125
- let newItems = processItems(result);
126
- if (onRound) onRound(0, newItems, allItems);
127
- if (isDone(newItems)) return allItems;
128
-
129
- for (let round = 1; round < maxRounds; round++) {
130
- await threePhaseScroll(page, { container, findScrollable });
131
- await delay(delayRange[0], delayRange[1]);
132
- await waitForLoading(page);
133
-
134
- result = await doCollect(page, collectCtx);
135
- newItems = processItems(result);
136
-
137
- if (onRound) onRound(round, newItems, allItems);
138
-
139
- if (isDone(newItems)) break;
140
- }
141
-
142
- return allItems;
143
- }
144
-
145
- async function threePhaseScroll(page, { container, findScrollable }) {
146
- await page.evaluate(
147
- async (opts) => {
148
- let el;
149
- if (!opts.container) {
150
- el = window;
151
- } else {
152
- el = document.querySelector(opts.container);
153
- if (!el) {
154
- el = window;
155
- } else if (opts.findScrollable) {
156
- let current = el;
157
- let found = false;
158
- while (current && current !== document.body) {
159
- if (current.scrollHeight > current.clientHeight + 10) {
160
- el = current;
161
- found = true;
162
- break;
163
- }
164
- current = current.parentElement;
165
- }
166
- if (!found) el = document.body;
167
- }
168
- }
169
-
170
- const randDelay = (min, max) =>
171
- new Promise((r) => setTimeout(r, min + Math.random() * (max - min)));
172
-
173
- if (el === window) {
174
- window.scrollBy(0, window.innerHeight);
175
- await randDelay(400, 800);
176
- window.scrollBy(0, -200);
177
- await randDelay(200, 400);
178
- window.scrollBy(0, window.innerHeight);
179
- } else {
180
- el.scrollTop = el.scrollHeight;
181
- await randDelay(400, 800);
182
- el.scrollTop -= 100 + Math.random() * 100;
183
- await randDelay(200, 400);
184
- el.scrollTop = el.scrollHeight;
185
- }
186
- },
187
- { container, findScrollable },
188
- );
189
- }
1
+ import { delay } from './delay.js';
2
+ import { detectPageError } from './page-error-detector.js';
3
+
4
+ async function doCollect(
5
+ page,
6
+ { container, findScrollable, fnStr, extraArgs },
7
+ ) {
8
+ return page.evaluate(
9
+ ({ fn: fnStr, containerSelector, findScrollableFlag, args }) => {
10
+ let el;
11
+ if (!containerSelector) {
12
+ el = window;
13
+ } else {
14
+ el = document.querySelector(containerSelector);
15
+ if (!el) {
16
+ el = window;
17
+ } else if (findScrollableFlag) {
18
+ let current = el;
19
+ let found = false;
20
+ while (current && current !== document.body) {
21
+ if (current.scrollHeight > current.clientHeight + 10) {
22
+ el = current;
23
+ found = true;
24
+ break;
25
+ }
26
+ current = current.parentElement;
27
+ }
28
+ if (!found) el = document.body;
29
+ }
30
+ }
31
+
32
+ const fn = eval("(" + fnStr + ")");
33
+ return fn(el, args);
34
+ },
35
+ { fn: fnStr, containerSelector: container, findScrollableFlag: findScrollable, args: extraArgs },
36
+ );
37
+ }
38
+
39
+ const LOADING_SELECTORS = [
40
+ '[class*="loading"]',
41
+ '[class*="Loading"]',
42
+ '[class*="spinner"]',
43
+ '[class*="Spinner"]',
44
+ '[class*="skeleton"]',
45
+ '[class*="Skeleton"]',
46
+ '[aria-busy="true"]',
47
+ ];
48
+
49
+ async function waitForLoading(page) {
50
+ const maxWait = 5000;
51
+ const startTime = Date.now();
52
+ while (Date.now() - startTime < maxWait) {
53
+ const isLoading = await page.evaluate((sels) => {
54
+ if (document.readyState !== "complete") return true;
55
+ for (const sel of sels) {
56
+ const el = document.querySelector(sel);
57
+ if (el && el.offsetParent !== null) return true;
58
+ }
59
+ return false;
60
+ }, LOADING_SELECTORS);
61
+ if (!isLoading) return;
62
+ await delay(300, 600);
63
+ }
64
+ }
65
+
66
+ async function collectWithoutScroll(page, options) {
67
+ const {
68
+ container,
69
+ findScrollable = false,
70
+ collectFn,
71
+ extraArgs,
72
+ } = options;
73
+
74
+ if (!collectFn) throw new Error("collectFn is required");
75
+
76
+ const fnStr =
77
+ typeof collectFn === "function" ? collectFn.toString() : collectFn;
78
+
79
+ const result = await doCollect(page, {
80
+ container,
81
+ findScrollable,
82
+ fnStr,
83
+ extraArgs,
84
+ });
85
+
86
+ return result.items || [];
87
+ }
88
+
89
+ async function scrollToBottom(page, options = {}) {
90
+ const {
91
+ container,
92
+ findScrollable = false,
93
+ delayRange = [800, 1500],
94
+ maxScroll = 200,
95
+ } = options;
96
+
97
+ for (let i = 0; i < maxScroll; i++) {
98
+ await threePhaseScroll(page, { container, findScrollable });
99
+ await delay(delayRange[0], delayRange[1]);
100
+ }
101
+ }
102
+
103
+ async function scrollAndCollect(page, options) {
104
+ const {
105
+ container,
106
+ findScrollable = false,
107
+ collectFn,
108
+ extraArgs,
109
+ delayRange = [800, 1500],
110
+ maxItems,
111
+ maxRounds = 200,
112
+ staleThreshold = 3,
113
+ uniqueKey,
114
+ onRound,
115
+ } = options;
116
+
117
+ if (!collectFn) throw new Error("collectFn is required");
118
+
119
+ const fnStr =
120
+ typeof collectFn === "function" ? collectFn.toString() : collectFn;
121
+ const allItems = [];
122
+ const seenKeys = uniqueKey ? new Set() : null;
123
+ let staleCount = 0;
124
+
125
+ const processItems = (result) => {
126
+ const raw = result.items || [];
127
+ const newItems = uniqueKey
128
+ ? raw.filter((item) => {
129
+ const key = uniqueKey(item);
130
+ if (seenKeys.has(key)) return false;
131
+ seenKeys.add(key);
132
+ return true;
133
+ })
134
+ : raw;
135
+ allItems.push(...newItems);
136
+ return newItems;
137
+ };
138
+
139
+ const isDone = (newItems) => {
140
+ if (maxItems !== undefined && allItems.length >= maxItems) return true;
141
+ if (newItems.length === 0) {
142
+ staleCount++;
143
+ if (staleCount >= staleThreshold) return true;
144
+ } else {
145
+ staleCount = 0;
146
+ }
147
+ return false;
148
+ };
149
+
150
+ const collectCtx = { container, findScrollable, fnStr, extraArgs };
151
+
152
+ const pageError = await detectPageError(page);
153
+
154
+ if (pageError) return [];
155
+
156
+ await waitForLoading(page);
157
+ let result = await doCollect(page, collectCtx);
158
+ let newItems = processItems(result);
159
+ if (onRound) onRound(0, newItems, allItems);
160
+ if (isDone(newItems)) return allItems;
161
+
162
+ for (let round = 1; round < maxRounds; round++) {
163
+ await threePhaseScroll(page, { container, findScrollable });
164
+ await delay(delayRange[0], delayRange[1]);
165
+ await waitForLoading(page);
166
+
167
+ result = await doCollect(page, collectCtx);
168
+ newItems = processItems(result);
169
+
170
+ if (onRound) onRound(round, newItems, allItems);
171
+
172
+ if (isDone(newItems)) break;
173
+ }
174
+
175
+ return allItems;
176
+ }
177
+
178
+ async function threePhaseScroll(page, { container, findScrollable }) {
179
+ await page.evaluate(
180
+ async (opts) => {
181
+ let el;
182
+ if (!opts.container) {
183
+ el = window;
184
+ } else {
185
+ el = document.querySelector(opts.container);
186
+ if (!el) {
187
+ el = window;
188
+ } else if (opts.findScrollable) {
189
+ let current = el;
190
+ let found = false;
191
+ while (current && current !== document.body) {
192
+ if (current.scrollHeight > current.clientHeight + 10) {
193
+ el = current;
194
+ found = true;
195
+ break;
196
+ }
197
+ current = current.parentElement;
198
+ }
199
+ if (!found) el = document.body;
200
+ }
201
+ }
202
+
203
+ const randDelay = (min, max) =>
204
+ new Promise((r) => setTimeout(r, min + Math.random() * (max - min)));
205
+
206
+ if (el === window) {
207
+ window.scrollBy(0, window.innerHeight);
208
+ await randDelay(400, 800);
209
+ window.scrollBy(0, -200);
210
+ await randDelay(200, 400);
211
+ window.scrollBy(0, window.innerHeight);
212
+ } else {
213
+ el.scrollTop = el.scrollHeight;
214
+ await randDelay(400, 800);
215
+ el.scrollTop -= 100 + Math.random() * 100;
216
+ await randDelay(200, 400);
217
+ el.scrollTop = el.scrollHeight;
218
+ }
219
+ },
220
+ { container, findScrollable },
221
+ );
222
+ }
223
+
224
+ export {
225
+ scrollAndCollect,
226
+ scrollToBottom,
227
+ collectWithoutScroll,
228
+ doCollect,
229
+ waitForLoading,
230
+ threePhaseScroll,
231
+ };
package/src/main.js ADDED
@@ -0,0 +1,46 @@
1
+ import { parseArgs } from './lib/args.js';
2
+ import { proxy, HELP_TEXT, getConfigText } from './lib/constants.js';
3
+ import { parseFilter } from './lib/filter.js';
4
+ import { handleScrape } from './cli/scrape.js';
5
+ import { handleVideos } from './cli/videos.js';
6
+ import { handleAuto } from './cli/auto.js';
7
+ import { handleExplore } from './cli/explore.js';
8
+ import { handleWatch } from './cli/watch.js';
9
+ import { handleConfig, showConfig, showUsage, version } from './cli/config.js';
10
+ import { runExploreDefault, runScrapeDefault } from './cli/explore-default.js';
11
+
12
+ async function main() {
13
+ const parsed = parseArgs();
14
+
15
+ switch (parsed.subcommand) {
16
+ case 'scrape': return handleScrape(parsed);
17
+ case 'videos': return handleVideos(parsed);
18
+ case 'auto': return handleAuto(parsed);
19
+ case 'explore':return handleExplore(parsed);
20
+ case 'watch': return handleWatch(parsed);
21
+ }
22
+
23
+ const { urls, outputFile, outputFormat, exploreCount, showConfig: showCfg, showHelp, showVersion, customProxy, configAction, configKey, configValue, pipeMode, filterStr } = parsed;
24
+ const proxyUrl = customProxy || proxy;
25
+ const filter = parseFilter(filterStr);
26
+
27
+ if (showVersion) {
28
+ console.log(version);
29
+ process.exit(0);
30
+ }
31
+ if (showHelp) return showUsage();
32
+ if (configAction) return handleConfig(configAction, configKey, configValue);
33
+ if (showCfg) return showConfig(urls, outputFile);
34
+ if (urls.length === 0 && exploreCount === 0) return showUsage();
35
+
36
+ if (exploreCount > 0) {
37
+ await runExploreDefault(exploreCount, urls, proxyUrl, outputFile, outputFormat, pipeMode, filter);
38
+ } else {
39
+ await runScrapeDefault(urls, proxyUrl, outputFile, outputFormat, filter);
40
+ }
41
+ }
42
+
43
+ main().catch(err => {
44
+ console.error(`错误: ${err.message}`);
45
+ process.exit(1);
46
+ });
@@ -9,15 +9,15 @@ import {
9
9
  detectPageError,
10
10
  isLoggedIn,
11
11
  assertPageUrl,
12
- } from './modules/page-helpers.mjs';
13
- import { detectCaptcha } from './modules/captcha-handler.mjs';
12
+ } from './modules/page-helpers.js';
13
+ import { detectCaptcha } from './modules/captcha-handler.js';
14
14
  export { ensureBrowserReady };
15
15
  import {
16
16
  getUserInfo,
17
17
  collectVideos,
18
- } from '../videos/core.mjs';
19
- import { runScrape } from './core.mjs';
20
- import { extractFollowAndFollowers } from './modules/follow-extractor.mjs';
18
+ } from '../videos/core.js';
19
+ import { runScrape } from './core.js';
20
+ import { extractFollowAndFollowers } from './modules/follow-extractor.js';
21
21
 
22
22
  function mergeUserInfo(existing, incoming, source) {
23
23
  const merged = { ...existing };
@@ -7,9 +7,9 @@ import {
7
7
  getDelayConfig,
8
8
  retryWithBackoff,
9
9
  assertPageUrl,
10
- } from './modules/page-helpers.mjs';
11
- import { extractCommentAuthors } from './modules/comment-extractor.mjs';
12
- import { extractGuessVideos } from './modules/guess-extractor.mjs';
10
+ } from './modules/page-helpers.js';
11
+ import { extractCommentAuthors } from './modules/comment-extractor.js';
12
+ import { extractGuessVideos } from './modules/guess-extractor.js';
13
13
 
14
14
  async function scrapeSingleVideo(page, maxComments, maxGuess, log, location = 'ES') {
15
15
  const config = getDelayConfig();
@@ -7,17 +7,17 @@ import {
7
7
  detectPageError,
8
8
  isLoggedIn,
9
9
  assertPageUrl,
10
- } from './modules/page-helpers.mjs';
11
- import { detectCaptcha } from './modules/captcha-handler.mjs';
10
+ } from './modules/page-helpers.js';
11
+ import { detectCaptcha } from './modules/captcha-handler.js';
12
12
  export { ensureBrowserReady };
13
13
  import {
14
14
  getUserInfo,
15
15
  collectVideos,
16
- } from '../videos/core.mjs';
17
- import { scrapeSingleVideo } from './core.mjs';
18
- import { extractFollowAndFollowers } from './modules/follow-extractor.mjs';
19
- import { extractCommentAuthors } from './modules/comment-extractor.mjs';
20
- import { extractGuessVideos } from './modules/guess-extractor.mjs';
16
+ } from '../videos/core.js';
17
+ import { scrapeSingleVideo } from './core.js';
18
+ import { extractFollowAndFollowers } from './modules/follow-extractor.js';
19
+ import { extractCommentAuthors } from './modules/comment-extractor.js';
20
+ import { extractGuessVideos } from './modules/guess-extractor.js';
21
21
 
22
22
  async function processExplore(page, username, options, log) {
23
23
  const {
@@ -1,6 +1,6 @@
1
- import { delay, getDelayConfig, closeCommentPanel } from "./page-helpers.mjs";
2
- import { scrollAndCollect } from "./scroll-collector.mjs";
3
- import { waitAndGetCaptcha } from "./captcha-handler.mjs";
1
+ import { delay, getDelayConfig, closeCommentPanel } from "./page-helpers.js";
2
+ import { scrollAndCollect } from "./scroll-collector.js";
3
+ import { waitAndGetCaptcha } from "./captcha-handler.js";
4
4
 
5
5
  async function openCommentPanel(page) {
6
6
  const tabs = page.locator('[class*="tabbar-item"]');
@@ -1,5 +1,5 @@
1
- import { delay, getDelayConfig } from "./page-helpers.mjs";
2
- import { scrollAndCollect } from "./scroll-collector.mjs";
1
+ import { delay, getDelayConfig } from "./page-helpers.js";
2
+ import { scrollAndCollect } from "./scroll-collector.js";
3
3
 
4
4
  const FILTER_WORDS = ["主页", "已关注", "粉丝", "推荐"];
5
5
 
@@ -1,5 +1,5 @@
1
- import { delay, getDelayConfig, closeCommentPanel } from './page-helpers.mjs';
2
- import { scrollAndCollect } from './scroll-collector.mjs';
1
+ import { delay, getDelayConfig, closeCommentPanel } from './page-helpers.js';
2
+ import { scrollAndCollect } from './scroll-collector.js';
3
3
 
4
4
  async function openGuessTab(page) {
5
5
  const tabs = page.locator('[class*="tabbar-item"]');
@@ -0,0 +1 @@
1
+ export { detectPageError } from '../../lib/page-error-detector.js';
@@ -21,7 +21,7 @@ import {
21
21
  extractLocationCreated,
22
22
  USER_SECTION_SIZE,
23
23
  } from '../../lib/parser.js';
24
- import { detectPageError } from './page-error-detector.mjs';
24
+ import { detectPageError } from './page-error-detector.js';
25
25
 
26
26
  export {
27
27
  delay,
@@ -0,0 +1,8 @@
1
+ export {
2
+ scrollAndCollect,
3
+ scrollToBottom,
4
+ collectWithoutScroll,
5
+ doCollect,
6
+ waitForLoading,
7
+ threePhaseScroll,
8
+ } from '../../lib/scroll-collector.js';
@@ -1,5 +1,5 @@
1
- import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.mjs';
2
- import { scrollAndCollect } from '../scraper/modules/scroll-collector.mjs';
1
+ import { delay, ensureBrowserReady, ensureTikTokPage, retryWithBackoff } from '../scraper/modules/page-helpers.js';
2
+ import { scrollAndCollect } from '../scraper/modules/scroll-collector.js';
3
3
 
4
4
  async function getUserInfo(page) {
5
5
  return await page.evaluate(() => {