@web-auto/webauto 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/apps/desktop-console/default-settings.json +2 -2
  2. package/apps/desktop-console/dist/main/index.mjs +915 -85
  3. package/apps/desktop-console/dist/main/preload.mjs +7 -0
  4. package/apps/desktop-console/dist/renderer/index.html +622 -50
  5. package/apps/desktop-console/dist/renderer/index.js +2415 -470
  6. package/apps/desktop-console/dist/renderer/run.mts +6 -5
  7. package/apps/desktop-console/entry/ui-cli.mjs +672 -0
  8. package/apps/desktop-console/entry/ui-console.mjs +416 -29
  9. package/apps/webauto/entry/account.mjs +89 -53
  10. package/apps/webauto/entry/browser-status.mjs +7 -10
  11. package/apps/webauto/entry/lib/account-detect.mjs +254 -28
  12. package/apps/webauto/entry/lib/account-store.mjs +219 -30
  13. package/apps/webauto/entry/lib/bus-publish.mjs +63 -0
  14. package/apps/webauto/entry/lib/camo-cli.mjs +93 -0
  15. package/apps/webauto/entry/lib/profilepool.mjs +14 -5
  16. package/apps/webauto/entry/lib/quota-status.mjs +23 -0
  17. package/apps/webauto/entry/lib/schedule-store.mjs +1068 -0
  18. package/apps/webauto/entry/profilepool.mjs +106 -17
  19. package/apps/webauto/entry/schedule.mjs +612 -0
  20. package/apps/webauto/entry/weibo-unified.mjs +134 -0
  21. package/apps/webauto/entry/xhs-install.mjs +236 -29
  22. package/apps/webauto/entry/xhs-status.mjs +5 -2
  23. package/apps/webauto/entry/xhs-unified.mjs +631 -98
  24. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/comment_item/container.json +40 -0
  25. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_expand_button/container.json +38 -0
  26. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_list/container.json +37 -0
  27. package/apps/webauto/resources/container-library/weibo/weibo_search_page/container.json +8 -3
  28. package/apps/webauto/resources/container-library/weibo/weibo_search_page/login_anchor/container.json +30 -0
  29. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_bar/container.json +47 -0
  30. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_button/container.json +39 -0
  31. package/bin/camoufox-cli.mjs +61 -0
  32. package/bin/webauto.mjs +301 -54
  33. package/dist/modules/camo-backend/src/index.js +49 -1
  34. package/dist/modules/camo-backend/src/internal/BrowserSession.js +572 -3
  35. package/dist/modules/camo-backend/src/internal/SessionManager.js +13 -1
  36. package/dist/modules/camo-backend/src/internal/storage-paths.js +6 -0
  37. package/dist/modules/collection-manager/bloom-filter.js +91 -0
  38. package/dist/modules/collection-manager/date-utils.js +275 -0
  39. package/dist/modules/collection-manager/index.js +258 -0
  40. package/dist/modules/collection-manager/storage.js +195 -0
  41. package/dist/modules/collection-manager/types.js +47 -0
  42. package/dist/modules/logging/src/index.js +1 -1
  43. package/dist/modules/process-registry/index.js +230 -0
  44. package/dist/modules/rate-limiter/index.js +242 -0
  45. package/dist/modules/workflow/blocks/ExecuteWeiboSearchBlock.js +128 -0
  46. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +7 -3
  47. package/dist/modules/workflow/blocks/RenderMarkdown.js +4 -1
  48. package/dist/modules/workflow/blocks/WeiboCollectCommentsBlock.js +282 -0
  49. package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +283 -0
  50. package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +208 -0
  51. package/dist/modules/workflow/blocks/WeiboCollectTimelineListBlock.js +128 -0
  52. package/dist/modules/workflow/blocks/WeiboCollectUserPostsListBlock.js +127 -0
  53. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +21 -0
  54. package/dist/modules/workflow/config/workflowRegistry.js +2 -0
  55. package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +47 -0
  56. package/dist/modules/workflow/src/runner.js +6 -0
  57. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +4 -0
  58. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +2 -2
  59. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +123 -0
  60. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.d.ts +37 -0
  61. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.js +184 -0
  62. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.d.ts +31 -0
  63. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.js +71 -0
  64. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.d.ts +48 -0
  65. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.js +259 -0
  66. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.d.ts +28 -0
  67. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  68. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.d.ts +36 -0
  69. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  70. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.d.ts +36 -0
  71. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.js +301 -0
  72. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.d.ts +29 -0
  73. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.js +195 -0
  74. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.d.ts +25 -0
  75. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.js +164 -0
  76. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.d.ts +66 -0
  77. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  78. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.d.ts +16 -0
  79. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  80. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.d.ts +27 -0
  81. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  82. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.d.ts +18 -0
  83. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  84. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.d.ts +34 -0
  85. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  86. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.d.ts +17 -0
  87. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  88. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.d.ts +15 -0
  89. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  90. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.d.ts +26 -0
  91. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  92. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.d.ts +29 -0
  93. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  94. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.d.ts +38 -0
  95. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  96. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.d.ts +30 -0
  97. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  98. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.d.ts +23 -0
  99. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  100. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.d.ts +32 -0
  101. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  102. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.d.ts +35 -0
  103. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  104. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.d.ts +34 -0
  105. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  106. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.d.ts +111 -0
  107. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  108. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.d.ts +20 -0
  109. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  110. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.d.ts +48 -0
  111. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  112. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.d.ts +23 -0
  113. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  114. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.d.ts +55 -0
  115. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  116. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.d.ts +21 -0
  117. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  118. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.d.ts +5 -0
  119. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  120. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.d.ts +37 -0
  121. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.js +165 -0
  122. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.d.ts +33 -0
  123. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  124. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.d.ts +9 -0
  125. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.js +9 -0
  126. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.d.ts +50 -0
  127. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  128. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.d.ts +10 -0
  129. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  130. package/dist/services/shared/serviceProcessLogger.js +1 -1
  131. package/dist/services/unified-api/server.js +105 -11
  132. package/modules/camo-backend/src/index.ts +46 -1
  133. package/modules/camo-backend/src/internal/BrowserSession.ts +619 -3
  134. package/modules/camo-backend/src/internal/SessionManager.ts +12 -1
  135. package/modules/camo-backend/src/internal/storage-paths.ts +5 -0
  136. package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +38 -2
  137. package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +47 -2
  138. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +94 -11
  139. package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +208 -2
  140. package/modules/camo-runtime/src/autoscript/runtime.mjs +7 -1
  141. package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +76 -43
  142. package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +75 -1
  143. package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +71 -4
  144. package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +183 -27
  145. package/modules/collection-manager/bloom-filter.ts +112 -0
  146. package/modules/collection-manager/date-utils.ts +316 -0
  147. package/modules/collection-manager/index.ts +309 -0
  148. package/modules/collection-manager/package.json +10 -0
  149. package/modules/collection-manager/storage.ts +174 -0
  150. package/modules/collection-manager/types.ts +156 -0
  151. package/modules/logging/src/index.ts +1 -1
  152. package/modules/process-registry/index.ts +284 -0
  153. package/modules/rate-limiter/index.ts +322 -0
  154. package/modules/state/src/paths.ts +9 -1
  155. package/modules/task-scheduler/index.ts +293 -0
  156. package/modules/workflow/blocks/ExecuteWeiboSearchBlock.ts +167 -0
  157. package/modules/workflow/blocks/PersistXhsNoteBlock.ts +7 -3
  158. package/modules/workflow/blocks/RenderMarkdown.ts +4 -1
  159. package/modules/workflow/blocks/WeiboCollectCommentsBlock.ts +339 -0
  160. package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +338 -0
  161. package/modules/workflow/blocks/helpers/downloadPaths.ts +16 -0
  162. package/modules/workflow/config/workflowRegistry.ts +2 -0
  163. package/modules/workflow/definitions/weibo-search-workflow-v1.ts +47 -0
  164. package/modules/workflow/src/runner.ts +6 -0
  165. package/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.ts +1 -1
  166. package/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.ts +4 -0
  167. package/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.ts +2 -3
  168. package/modules/xiaohongshu/app/src/blocks/helpers/sharding.ts +152 -0
  169. package/package.json +13 -4
  170. package/scripts/postinstall-resources.mjs +62 -0
  171. package/scripts/test/run-coverage.mjs +76 -0
  172. package/scripts/weibo/search.ts +49 -0
  173. package/services/shared/serviceProcessLogger.ts +1 -1
  174. package/services/unified-api/server.ts +98 -12
@@ -0,0 +1,165 @@
1
+ import path from 'node:path';
2
+ import os from 'node:os';
3
+ import fs from 'node:fs/promises';
4
+ function sanitizeForPath(name, fallback = 'unknown') {
5
+ const text = String(name || '').trim();
6
+ if (!text)
7
+ return fallback;
8
+ const cleaned = text.replace(/[\\/:"*?<>|]+/g, '_').trim();
9
+ return cleaned || fallback;
10
+ }
11
+ function asStringArray(input) {
12
+ if (!Array.isArray(input))
13
+ return [];
14
+ return input
15
+ .map((item) => String(item || '').trim())
16
+ .filter(Boolean);
17
+ }
18
+ function normalizeCollectedUrls(input, completedNoteIds) {
19
+ if (!Array.isArray(input))
20
+ return [];
21
+ const out = [];
22
+ const seen = new Set();
23
+ for (const row of input) {
24
+ if (!row || typeof row !== 'object')
25
+ continue;
26
+ const noteId = String(row.noteId || '').trim();
27
+ if (!noteId || completedNoteIds.has(noteId) || seen.has(noteId))
28
+ continue;
29
+ seen.add(noteId);
30
+ const safeUrl = String(row.safeUrl || '').trim();
31
+ const searchUrl = String(row.searchUrl || '').trim();
32
+ const timestampRaw = Number(row.timestamp);
33
+ const item = {
34
+ noteId,
35
+ safeUrl,
36
+ ...(searchUrl ? { searchUrl } : {}),
37
+ ...(Number.isFinite(timestampRaw) ? { timestamp: timestampRaw } : {}),
38
+ };
39
+ out.push(item);
40
+ }
41
+ return out;
42
+ }
43
+ async function resolveStatePath(input) {
44
+ const root = resolveDownloadRoot(input.downloadRoot);
45
+ const sanitizedEnv = sanitizeForPath(input.env, 'debug');
46
+ const sanitizedKeyword = sanitizeForPath(input.keyword, 'unknown');
47
+ const rawEnv = String(input.env || '').trim();
48
+ const rawKeyword = String(input.keyword || '').trim();
49
+ const candidates = [
50
+ path.join(root, 'xiaohongshu', sanitizedEnv, sanitizedKeyword, '.collect-state.json'),
51
+ ];
52
+ const legacyPath = path.join(root, 'xiaohongshu', rawEnv, rawKeyword, '.collect-state.json');
53
+ if (legacyPath !== candidates[0])
54
+ candidates.push(legacyPath);
55
+ for (const filePath of candidates) {
56
+ try {
57
+ await fs.access(filePath);
58
+ return filePath;
59
+ }
60
+ catch {
61
+ // continue
62
+ }
63
+ }
64
+ return null;
65
+ }
66
+ async function getXhsPendingItems(input) {
67
+ const statePath = await resolveStatePath(input);
68
+ if (!statePath)
69
+ return [];
70
+ try {
71
+ const content = await fs.readFile(statePath, 'utf8');
72
+ const state = JSON.parse(content);
73
+ const completed = new Set(asStringArray(state?.detailCollection?.completedNoteIds));
74
+ return normalizeCollectedUrls(state?.listCollection?.collectedUrls, completed);
75
+ }
76
+ catch (error) {
77
+ const code = String(error?.code || '');
78
+ const kind = code === 'ENOENT' ? 'missing_state' : code === 'EACCES' ? 'access_denied' : 'invalid_state';
79
+ console.warn(`[xhs.sharding] failed to load pending items (${kind}) from ${statePath}: ${error?.message || String(error)}`);
80
+ return [];
81
+ }
82
+ }
83
+ export function fnv1a32(input) {
84
+ // FNV-1a 32-bit
85
+ let hash = 0x811c9dc5;
86
+ const str = String(input || '');
87
+ for (let i = 0; i < str.length; i += 1) {
88
+ hash ^= str.charCodeAt(i);
89
+ // hash *= 16777619 (with 32-bit overflow)
90
+ hash = (hash + ((hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24))) >>> 0;
91
+ }
92
+ return hash >>> 0;
93
+ }
94
+ export function normalizeShard(spec) {
95
+ if (!spec)
96
+ return null;
97
+ const count = Math.max(1, Math.floor(Number(spec.count)));
98
+ const index = Math.max(0, Math.floor(Number(spec.index)));
99
+ if (!Number.isFinite(count) || !Number.isFinite(index))
100
+ return null;
101
+ if (count <= 1)
102
+ return null;
103
+ if (index >= count)
104
+ return null;
105
+ return { index, count, by: spec.by || 'noteId-hash' };
106
+ }
107
+ export function shardFilterByNoteIdHash(items, shard) {
108
+ const list = Array.isArray(items) ? items : [];
109
+ if (shard.count <= 1)
110
+ return list;
111
+ return list.filter((it) => {
112
+ const id = String(it?.noteId || '').trim();
113
+ if (!id)
114
+ return false;
115
+ return fnv1a32(id) % shard.count === shard.index;
116
+ });
117
+ }
118
+ export function shardFilterByIndexMod(items, shard) {
119
+ const list = Array.isArray(items) ? items : [];
120
+ if (shard.count <= 1)
121
+ return list;
122
+ return list.filter((_, idx) => idx % shard.count === shard.index);
123
+ }
124
+ export function resolveDownloadRoot(customRoot) {
125
+ const fromArg = String(customRoot || '').trim();
126
+ if (fromArg)
127
+ return path.resolve(fromArg);
128
+ const fromEnv = String(process.env.WEBAUTO_DOWNLOAD_ROOT || process.env.WEBAUTO_DOWNLOAD_DIR || '').trim();
129
+ if (fromEnv)
130
+ return path.resolve(fromEnv);
131
+ const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
132
+ return path.join(home, '.webauto', 'download');
133
+ }
134
+ export async function buildDynamicShardPlan(input) {
135
+ const { keyword, env, downloadRoot, validProfiles } = input;
136
+ if (!validProfiles.length)
137
+ return [];
138
+ // Load pending items (not completed yet)
139
+ const pendingItems = await getXhsPendingItems({ keyword, env, downloadRoot });
140
+ if (pendingItems.length === 0) {
141
+ return validProfiles.map((profileId) => ({ profileId, assignedNoteIds: [], totalPending: 0 }));
142
+ }
143
+ // Calculate how many notes each profile should handle
144
+ // Using ceil to ensure we cover all pending items
145
+ const perProfileCount = Math.ceil(pendingItems.length / validProfiles.length);
146
+ const plans = [];
147
+ for (let i = 0; i < validProfiles.length; i++) {
148
+ const profileId = validProfiles[i];
149
+ const startIdx = i * perProfileCount;
150
+ const endIdx = Math.min(startIdx + perProfileCount, pendingItems.length);
151
+ const assignedNoteIds = pendingItems.slice(startIdx, endIdx).map((item) => item.noteId);
152
+ plans.push({
153
+ profileId,
154
+ assignedNoteIds,
155
+ totalPending: pendingItems.length,
156
+ });
157
+ }
158
+ return plans;
159
+ }
160
+ export async function getPendingItemsByNoteIds(input) {
161
+ const pendingItems = await getXhsPendingItems({ keyword: input.keyword, env: input.env, downloadRoot: input.downloadRoot });
162
+ const noteIdSet = new Set(input.noteIds);
163
+ return pendingItems.filter((item) => noteIdSet.has(item.noteId));
164
+ }
165
+ //# sourceMappingURL=sharding.js.map
@@ -0,0 +1,33 @@
1
+ export type XhsExtractedComment = {
2
+ user_id?: string;
3
+ user_name?: string;
4
+ text?: string;
5
+ timestamp?: string;
6
+ like_status?: string;
7
+ };
8
+ export declare function ensureCommentsOpened(sessionId: string, apiUrl: string): Promise<void>;
9
+ export declare function isCommentEnd(sessionId: string, apiUrl: string): Promise<boolean>;
10
+ /**
11
+ * 往返滚动检测(风控友好):
12
+ * - 下滚无变化 -> 上滚一点 -> 再下滚
13
+ * - 往返 maxRounds 次仍无变化:判定到底(或评论区无法继续加载)
14
+ */
15
+ export declare function checkBottomWithBackAndForth(sessionId: string, apiUrl: string, maxRounds?: number): Promise<{
16
+ reachedBottom: boolean;
17
+ reason: string;
18
+ }>;
19
+ export declare function extractVisibleComments(sessionId: string, apiUrl: string, maxItems: number): Promise<Array<XhsExtractedComment & {
20
+ domIndex: number;
21
+ }>>;
22
+ export declare function highlightCommentRow(sessionId: string, index: number, apiUrl: string, channel?: string): Promise<any>;
23
+ export declare function scrollComments(sessionId: string, apiUrl: string, distance?: number): Promise<any>;
24
+ export declare function expandAllVisibleReplyButtons(sessionId: string, apiUrl: string, options?: {
25
+ maxPasses?: number;
26
+ maxClicksPerPass?: number;
27
+ }): Promise<{
28
+ clicked: number;
29
+ passes: number;
30
+ remaining: number;
31
+ detected: number;
32
+ }>;
33
+ //# sourceMappingURL=xhsComments.d.ts.map
@@ -0,0 +1,270 @@
1
+ import { controllerAction, delay } from '../../utils/controllerAction.js';
2
+ export async function ensureCommentsOpened(sessionId, apiUrl) {
3
+ await controllerAction('container:operation', {
4
+ containerId: 'xiaohongshu_detail.comment_button',
5
+ operationId: 'highlight',
6
+ sessionId,
7
+ timeoutMs: 15000,
8
+ config: { duration: 1800, channel: 'xhs-comments' },
9
+ }, apiUrl).catch(() => null);
10
+ await controllerAction('container:operation', { containerId: 'xiaohongshu_detail.comment_button', operationId: 'click', sessionId, timeoutMs: 15000 }, apiUrl).catch(() => { });
11
+ await delay(1200);
12
+ }
13
+ export async function isCommentEnd(sessionId, apiUrl) {
14
+ const end = await controllerAction('container:operation', { containerId: 'xiaohongshu_detail.comment_section.end_marker', operationId: 'extract', sessionId, timeoutMs: 12000 }, apiUrl).catch(() => null);
15
+ if (end && end?.success === true) {
16
+ const extracted = Array.isArray(end?.extracted) ? end.extracted : [];
17
+ if (extracted.length > 0)
18
+ return true;
19
+ }
20
+ // 空评论标记:"这是一片荒地"
21
+ try {
22
+ const res = await controllerAction('browser:execute', {
23
+ profile: sessionId,
24
+ timeoutMs: 12000,
25
+ script: `(() => {
26
+ const candidates = [
27
+ 'p.no-comments-text',
28
+ '.no-comments-text',
29
+ '[class*="no-comments"]',
30
+ '[class*="no-comment"]',
31
+ '[class*="empty-comment"]',
32
+ ];
33
+ for (const sel of candidates) {
34
+ const el = document.querySelector(sel);
35
+ const t = (el?.textContent || '').replace(/\s+/g, ' ').trim();
36
+ if (t && (t.includes('这是一片荒地') || t.includes('荒地'))) return true;
37
+ }
38
+ return false;
39
+ })()`,
40
+ }, apiUrl);
41
+ if (res?.result === true)
42
+ return true;
43
+ }
44
+ catch {
45
+ // ignore
46
+ }
47
+ return false;
48
+ }
49
+ async function getVisibleCommentSignature(sessionId, apiUrl) {
50
+ try {
51
+ const res = await controllerAction('browser:execute', {
52
+ profile: sessionId,
53
+ timeoutMs: 12000,
54
+ script: `(() => {
55
+ const isVisible = (el) => {
56
+ const r = el.getBoundingClientRect();
57
+ return r.width > 0 && r.height > 0 && r.bottom > 0 && r.top < window.innerHeight;
58
+ };
59
+ const items = Array.from(document.querySelectorAll('.comment-item')).filter(isVisible);
60
+ const texts = items
61
+ .map((el) => {
62
+ const t = el.querySelector('.content')?.textContent || el.querySelector('p')?.textContent || '';
63
+ return t.replace(/\s+/g, ' ').trim();
64
+ })
65
+ .filter(Boolean)
66
+ .slice(0, 6);
67
+ return { count: items.length, head: texts.join('|').slice(0, 200) };
68
+ })()`,
69
+ }, apiUrl);
70
+ return {
71
+ count: Number(res?.result?.count ?? -1),
72
+ head: String(res?.result?.head ?? ''),
73
+ };
74
+ }
75
+ catch {
76
+ return { count: -1, head: '' };
77
+ }
78
+ }
79
+ /**
80
+ * 往返滚动检测(风控友好):
81
+ * - 下滚无变化 -> 上滚一点 -> 再下滚
82
+ * - 往返 maxRounds 次仍无变化:判定到底(或评论区无法继续加载)
83
+ */
84
+ export async function checkBottomWithBackAndForth(sessionId, apiUrl, maxRounds = 3) {
85
+ let prev = await getVisibleCommentSignature(sessionId, apiUrl);
86
+ for (let i = 0; i < maxRounds; i += 1) {
87
+ await scrollComments(sessionId, apiUrl, 420);
88
+ await delay(800);
89
+ const afterDown = await getVisibleCommentSignature(sessionId, apiUrl);
90
+ if (afterDown.count !== prev.count || afterDown.head !== prev.head) {
91
+ return { reachedBottom: false, reason: 'down_changed' };
92
+ }
93
+ // 往回滚动几次再尝试向下,防止卡住
94
+ await scrollComments(sessionId, apiUrl, -240);
95
+ await delay(500);
96
+ await scrollComments(sessionId, apiUrl, 240);
97
+ await delay(700);
98
+ const afterBounce = await getVisibleCommentSignature(sessionId, apiUrl);
99
+ if (afterBounce.count !== prev.count || afterBounce.head !== prev.head) {
100
+ return { reachedBottom: false, reason: 'bounce_changed' };
101
+ }
102
+ prev = afterBounce;
103
+ }
104
+ return { reachedBottom: true, reason: 'no_change_after_back_and_forth' };
105
+ }
106
+ export async function extractVisibleComments(sessionId, apiUrl, maxItems) {
107
+ const res = await controllerAction('container:operation', {
108
+ containerId: 'xiaohongshu_detail.comment_section.comment_item',
109
+ operationId: 'extract',
110
+ sessionId,
111
+ timeoutMs: 15000,
112
+ config: { max_items: Math.max(1, Math.min(80, Math.floor(maxItems))), visibleOnly: true },
113
+ }, apiUrl);
114
+ if (!res?.success)
115
+ return [];
116
+ const extracted = Array.isArray(res?.extracted) ? res.extracted : [];
117
+ // Map extracted items with their DOM index (position in selectorAll result)
118
+ const containerRes = await controllerAction('browser:execute', {
119
+ profile: sessionId,
120
+ timeoutMs: 12000,
121
+ script: `(() => {
122
+ const items = Array.from(document.querySelectorAll('.comment-item'));
123
+ return items.map((el, idx) => {
124
+ const rect = el.getBoundingClientRect();
125
+ return {
126
+ domIndex: idx,
127
+ visible: rect.top >= 0 && rect.bottom <= window.innerHeight && rect.height > 0
128
+ };
129
+ }).filter(x => x.visible);
130
+ })()`,
131
+ }, apiUrl);
132
+ const visibleIndices = (containerRes?.result || []).map((x) => x.domIndex);
133
+ return extracted.map((item, idx) => ({ ...item, domIndex: visibleIndices[idx] ?? idx }));
134
+ }
135
+ export async function highlightCommentRow(sessionId, index, apiUrl, channel = 'xhs-comment-row') {
136
+ return controllerAction('container:operation', {
137
+ containerId: 'xiaohongshu_detail.comment_section.comment_item',
138
+ operationId: 'highlight',
139
+ sessionId,
140
+ config: {
141
+ index,
142
+ target: 'self',
143
+ style: '6px solid #ff00ff',
144
+ duration: 8000,
145
+ channel,
146
+ visibleOnly: true,
147
+ },
148
+ }, apiUrl);
149
+ }
150
+ export async function scrollComments(sessionId, apiUrl, distance = 650) {
151
+ return controllerAction('container:operation', {
152
+ containerId: 'xiaohongshu_detail.comment_section',
153
+ operationId: 'scroll',
154
+ sessionId,
155
+ timeoutMs: 15000,
156
+ config: { direction: 'down', distance: Math.max(60, Math.min(800, Math.floor(distance))) },
157
+ }, apiUrl);
158
+ }
159
+ export async function expandAllVisibleReplyButtons(sessionId, apiUrl, options = {}) {
160
+ const maxPasses = Math.max(1, Math.min(12, Number(options.maxPasses || 6)));
161
+ const maxClicksPerPass = Math.max(1, Math.min(30, Number(options.maxClicksPerPass || 12)));
162
+ const probeTargets = async () => {
163
+ const probe = await controllerAction('browser:execute', {
164
+ profile: sessionId,
165
+ timeoutMs: 12000,
166
+ script: `(() => {
167
+ const root =
168
+ document.querySelector('.comments-el') ||
169
+ document.querySelector('.comment-list') ||
170
+ document.querySelector('.comments-container') ||
171
+ document.querySelector('[class*="comment-section"]') ||
172
+ document.body;
173
+
174
+ const selector = '.show-more, .reply-expand, [class*="show-more"], [class*="expand"]';
175
+ const nodes = Array.from(root.querySelectorAll(selector));
176
+ const viewportH = window.innerHeight || 0;
177
+ const viewportW = window.innerWidth || 0;
178
+
179
+ const targets = [];
180
+
181
+ const isVisible = (el) => {
182
+ const r = el.getBoundingClientRect();
183
+ return r.width > 0 && r.height > 0 && r.bottom > 0 && r.top < viewportH && r.right > 0 && r.left < viewportW;
184
+ };
185
+
186
+ for (const el of nodes) {
187
+ if (!(el instanceof HTMLElement)) continue;
188
+ if (!isVisible(el)) continue;
189
+
190
+ const text = (el.textContent || '').replace(/\s+/g, ' ').trim();
191
+ if (!text || !text.includes('展开')) continue;
192
+ if (text.includes('收起') || text.includes('折叠')) continue;
193
+ if (!(text.includes('回复') || text.includes('评论') || text.includes('更多'))) continue;
194
+
195
+ const r = el.getBoundingClientRect();
196
+ const points = [
197
+ { x: Math.round(r.left + r.width * 0.72), y: Math.round(r.top + r.height * 0.55) },
198
+ { x: Math.round(r.left + r.width * 0.55), y: Math.round(r.top + r.height * 0.55) },
199
+ { x: Math.round(r.left + r.width * 0.85), y: Math.round(r.top + r.height * 0.5) },
200
+ ];
201
+
202
+ let picked = null;
203
+ for (const p of points) {
204
+ if (!Number.isFinite(p.x) || !Number.isFinite(p.y)) continue;
205
+ if (p.x < 8 || p.y < 8 || p.x > viewportW - 8 || p.y > viewportH - 8) continue;
206
+ const hit = document.elementFromPoint(p.x, p.y);
207
+ if (!hit || !(hit instanceof Element)) continue;
208
+ if (!(hit === el || el.contains(hit) || hit.contains(el))) continue;
209
+ if (hit.closest && hit.closest('a[href]')) continue;
210
+ picked = p;
211
+ break;
212
+ }
213
+
214
+ if (!picked) continue;
215
+ targets.push({ x: picked.x, y: picked.y, text });
216
+ }
217
+
218
+ targets.sort((a, b) => (a.y - b.y) || (a.x - b.x));
219
+ return { targets };
220
+ })()`,
221
+ }, apiUrl).catch(() => null);
222
+ const raw = Array.isArray(probe?.result?.targets)
223
+ ? probe.result.targets
224
+ : Array.isArray(probe?.targets)
225
+ ? probe.targets
226
+ : [];
227
+ return raw
228
+ .map((t) => ({
229
+ x: Math.round(Number(t?.x)),
230
+ y: Math.round(Number(t?.y)),
231
+ text: String(t?.text || ''),
232
+ }))
233
+ .filter((t) => Number.isFinite(t.x) && Number.isFinite(t.y));
234
+ };
235
+ let totalClicked = 0;
236
+ let totalDetected = 0;
237
+ let passes = 0;
238
+ for (let pass = 0; pass < maxPasses; pass += 1) {
239
+ const targets = await probeTargets();
240
+ if (!targets.length)
241
+ break;
242
+ totalDetected += targets.length;
243
+ let clickedThisPass = 0;
244
+ const toClick = targets.slice(0, maxClicksPerPass);
245
+ for (const t of toClick) {
246
+ const clicked = await controllerAction('container:operation', {
247
+ containerId: 'xiaohongshu_detail.comment_section.show_more_button',
248
+ operationId: 'click',
249
+ sessionId,
250
+ timeoutMs: 15000,
251
+ config: {
252
+ x: t.x,
253
+ y: t.y,
254
+ },
255
+ }, apiUrl).catch(() => null);
256
+ if (clicked?.success !== false)
257
+ clickedThisPass += 1;
258
+ await delay(220);
259
+ }
260
+ passes += 1;
261
+ totalClicked += clickedThisPass;
262
+ // No successful click in this pass means target is currently not actionable.
263
+ if (clickedThisPass <= 0)
264
+ break;
265
+ await delay(380);
266
+ }
267
+ const remaining = (await probeTargets()).length;
268
+ return { clicked: totalClicked, passes, remaining, detected: totalDetected };
269
+ }
270
+ //# sourceMappingURL=xhsComments.js.map
@@ -0,0 +1,9 @@
1
+ /**
2
+ * 小红书 App 模块入口
3
+ */
4
+ export * as Phase1EnsureServicesBlock from './blocks/Phase1EnsureServicesBlock.js';
5
+ export * as Phase1StartProfileBlock from './blocks/Phase1StartProfileBlock.js';
6
+ export * as Phase1MonitorCookieBlock from './blocks/Phase1MonitorCookieBlock.js';
7
+ export * as Phase2SearchBlock from './blocks/Phase2SearchBlock.js';
8
+ export * as Phase2CollectLinksBlock from './blocks/Phase2CollectLinksBlock.js';
9
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,9 @@
1
+ /**
2
+ * 小红书 App 模块入口
3
+ */
4
+ export * as Phase1EnsureServicesBlock from './blocks/Phase1EnsureServicesBlock.js';
5
+ export * as Phase1StartProfileBlock from './blocks/Phase1StartProfileBlock.js';
6
+ export * as Phase1MonitorCookieBlock from './blocks/Phase1MonitorCookieBlock.js';
7
+ export * as Phase2SearchBlock from './blocks/Phase2SearchBlock.js';
8
+ export * as Phase2CollectLinksBlock from './blocks/Phase2CollectLinksBlock.js';
9
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,50 @@
1
+ export type XhsCheckpointId = 'home_ready' | 'search_ready' | 'detail_ready' | 'comments_ready' | 'login_guard' | 'risk_control' | 'offsite' | 'unknown';
2
+ export interface DetectCheckpointInput {
3
+ sessionId: string;
4
+ serviceUrl?: string;
5
+ }
6
+ export interface DetectCheckpointOutput {
7
+ success: boolean;
8
+ checkpoint: XhsCheckpointId;
9
+ stage: string;
10
+ url: string;
11
+ rootId?: string | null;
12
+ matchIds?: string[];
13
+ signals: string[];
14
+ /** DOM side signals from DetectPageStateBlock (when available) */
15
+ dom?: {
16
+ hasDetailMask?: boolean;
17
+ hasSearchInput?: boolean;
18
+ readyState?: string;
19
+ title?: string;
20
+ };
21
+ error?: string;
22
+ }
23
+ export declare function detectXhsCheckpoint(input: DetectCheckpointInput): Promise<DetectCheckpointOutput>;
24
+ export interface EnsureCheckpointInput {
25
+ sessionId: string;
26
+ target: XhsCheckpointId;
27
+ serviceUrl?: string;
28
+ timeoutMs?: number;
29
+ allowOneLevelUpFallback?: boolean;
30
+ evidence?: {
31
+ highlightMs?: number;
32
+ };
33
+ }
34
+ export interface EnsureCheckpointOutput {
35
+ success: boolean;
36
+ from: XhsCheckpointId;
37
+ to: XhsCheckpointId;
38
+ reached: XhsCheckpointId;
39
+ url: string;
40
+ stage: string;
41
+ attempts: Array<{
42
+ action: string;
43
+ ok: boolean;
44
+ reason?: string;
45
+ }>;
46
+ signals: string[];
47
+ error?: string;
48
+ }
49
+ export declare function ensureXhsCheckpoint(input: EnsureCheckpointInput): Promise<EnsureCheckpointOutput>;
50
+ //# sourceMappingURL=checkpoints.d.ts.map