@web-auto/webauto 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/apps/desktop-console/default-settings.json +2 -2
  2. package/apps/desktop-console/dist/main/index.mjs +915 -85
  3. package/apps/desktop-console/dist/main/preload.mjs +7 -0
  4. package/apps/desktop-console/dist/renderer/index.html +622 -50
  5. package/apps/desktop-console/dist/renderer/index.js +2415 -470
  6. package/apps/desktop-console/dist/renderer/run.mts +6 -5
  7. package/apps/desktop-console/entry/ui-cli.mjs +672 -0
  8. package/apps/desktop-console/entry/ui-console.mjs +416 -29
  9. package/apps/webauto/entry/account.mjs +89 -53
  10. package/apps/webauto/entry/browser-status.mjs +7 -10
  11. package/apps/webauto/entry/lib/account-detect.mjs +254 -28
  12. package/apps/webauto/entry/lib/account-store.mjs +219 -30
  13. package/apps/webauto/entry/lib/bus-publish.mjs +63 -0
  14. package/apps/webauto/entry/lib/camo-cli.mjs +93 -0
  15. package/apps/webauto/entry/lib/profilepool.mjs +14 -5
  16. package/apps/webauto/entry/lib/quota-status.mjs +23 -0
  17. package/apps/webauto/entry/lib/schedule-store.mjs +1068 -0
  18. package/apps/webauto/entry/profilepool.mjs +106 -17
  19. package/apps/webauto/entry/schedule.mjs +612 -0
  20. package/apps/webauto/entry/weibo-unified.mjs +134 -0
  21. package/apps/webauto/entry/xhs-install.mjs +236 -29
  22. package/apps/webauto/entry/xhs-status.mjs +5 -2
  23. package/apps/webauto/entry/xhs-unified.mjs +631 -98
  24. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/comment_item/container.json +40 -0
  25. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_expand_button/container.json +38 -0
  26. package/apps/webauto/resources/container-library/weibo/weibo_detail_page/reply_list/container.json +37 -0
  27. package/apps/webauto/resources/container-library/weibo/weibo_search_page/container.json +8 -3
  28. package/apps/webauto/resources/container-library/weibo/weibo_search_page/login_anchor/container.json +30 -0
  29. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_bar/container.json +47 -0
  30. package/apps/webauto/resources/container-library/weibo/weibo_search_page/search_button/container.json +39 -0
  31. package/bin/camoufox-cli.mjs +61 -0
  32. package/bin/webauto.mjs +301 -54
  33. package/dist/modules/camo-backend/src/index.js +49 -1
  34. package/dist/modules/camo-backend/src/internal/BrowserSession.js +572 -3
  35. package/dist/modules/camo-backend/src/internal/SessionManager.js +13 -1
  36. package/dist/modules/camo-backend/src/internal/storage-paths.js +6 -0
  37. package/dist/modules/collection-manager/bloom-filter.js +91 -0
  38. package/dist/modules/collection-manager/date-utils.js +275 -0
  39. package/dist/modules/collection-manager/index.js +258 -0
  40. package/dist/modules/collection-manager/storage.js +195 -0
  41. package/dist/modules/collection-manager/types.js +47 -0
  42. package/dist/modules/logging/src/index.js +1 -1
  43. package/dist/modules/process-registry/index.js +230 -0
  44. package/dist/modules/rate-limiter/index.js +242 -0
  45. package/dist/modules/workflow/blocks/ExecuteWeiboSearchBlock.js +128 -0
  46. package/dist/modules/workflow/blocks/PersistXhsNoteBlock.js +7 -3
  47. package/dist/modules/workflow/blocks/RenderMarkdown.js +4 -1
  48. package/dist/modules/workflow/blocks/WeiboCollectCommentsBlock.js +282 -0
  49. package/dist/modules/workflow/blocks/WeiboCollectFromLinksBlock.js +283 -0
  50. package/dist/modules/workflow/blocks/WeiboCollectSearchLinksBlock.js +208 -0
  51. package/dist/modules/workflow/blocks/WeiboCollectTimelineListBlock.js +128 -0
  52. package/dist/modules/workflow/blocks/WeiboCollectUserPostsListBlock.js +127 -0
  53. package/dist/modules/workflow/blocks/helpers/downloadPaths.js +21 -0
  54. package/dist/modules/workflow/config/workflowRegistry.js +2 -0
  55. package/dist/modules/workflow/definitions/weibo-search-workflow-v1.js +47 -0
  56. package/dist/modules/workflow/src/runner.js +6 -0
  57. package/dist/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +4 -0
  58. package/dist/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +2 -2
  59. package/dist/modules/xiaohongshu/app/src/blocks/helpers/sharding.js +123 -0
  60. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.d.ts +37 -0
  61. package/dist/modules/xiaohongshu/app/src/container-registry/src/index.js +184 -0
  62. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.d.ts +31 -0
  63. package/dist/modules/xiaohongshu/app/src/workflow/blocks/AnchorVerificationBlock.js +71 -0
  64. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.d.ts +48 -0
  65. package/dist/modules/xiaohongshu/app/src/workflow/blocks/DetectPageStateBlock.js +259 -0
  66. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.d.ts +28 -0
  67. package/dist/modules/xiaohongshu/app/src/workflow/blocks/ErrorRecoveryBlock.js +319 -0
  68. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.d.ts +36 -0
  69. package/dist/modules/xiaohongshu/app/src/workflow/blocks/WaitSearchPermitBlock.js +162 -0
  70. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.d.ts +36 -0
  71. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/containerAnchors.js +301 -0
  72. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.d.ts +29 -0
  73. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/operationLogger.js +195 -0
  74. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.d.ts +25 -0
  75. package/dist/modules/xiaohongshu/app/src/workflow/blocks/helpers/searchPageState.js +164 -0
  76. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.d.ts +66 -0
  77. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/MatchCommentsBlock.js +139 -0
  78. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.d.ts +16 -0
  79. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1EnsureServicesBlock.js +36 -0
  80. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.d.ts +27 -0
  81. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1MonitorCookieBlock.js +213 -0
  82. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.d.ts +18 -0
  83. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.js +121 -0
  84. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.d.ts +34 -0
  85. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2CollectLinksBlock.js +1249 -0
  86. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.d.ts +17 -0
  87. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase2SearchBlock.js +703 -0
  88. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.d.ts +15 -0
  89. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseDetailBlock.js +41 -0
  90. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.d.ts +26 -0
  91. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CloseTabsBlock.js +44 -0
  92. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.d.ts +29 -0
  93. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34CollectCommentsBlock.js +150 -0
  94. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.d.ts +38 -0
  95. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ExtractDetailBlock.js +117 -0
  96. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.d.ts +30 -0
  97. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenDetailBlock.js +102 -0
  98. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.d.ts +23 -0
  99. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34OpenTabsBlock.js +109 -0
  100. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.d.ts +32 -0
  101. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.js +117 -0
  102. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.d.ts +35 -0
  103. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ProcessSingleNoteBlock.js +114 -0
  104. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.d.ts +34 -0
  105. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase34ValidateLinksBlock.js +90 -0
  106. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.d.ts +111 -0
  107. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase3InteractBlock.js +1009 -0
  108. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.d.ts +20 -0
  109. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/Phase4MultiTabHarvestBlock.js +233 -0
  110. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.d.ts +48 -0
  111. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/ReplyInteractBlock.js +291 -0
  112. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.d.ts +23 -0
  113. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/XhsDiscoverFallbackBlock.js +240 -0
  114. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.d.ts +55 -0
  115. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatchDsl.js +126 -0
  116. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.d.ts +21 -0
  117. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/commentMatcher.js +99 -0
  118. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.d.ts +5 -0
  119. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/evidence.js +27 -0
  120. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.d.ts +37 -0
  121. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/sharding.js +165 -0
  122. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.d.ts +33 -0
  123. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/blocks/helpers/xhsComments.js +270 -0
  124. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.d.ts +9 -0
  125. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/index.js +9 -0
  126. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.d.ts +50 -0
  127. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/checkpoints.js +222 -0
  128. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.d.ts +10 -0
  129. package/dist/modules/xiaohongshu/app/src/xiaohongshu/app/src/utils/controllerAction.js +43 -0
  130. package/dist/services/shared/serviceProcessLogger.js +1 -1
  131. package/dist/services/unified-api/server.js +105 -11
  132. package/modules/camo-backend/src/index.ts +46 -1
  133. package/modules/camo-backend/src/internal/BrowserSession.ts +619 -3
  134. package/modules/camo-backend/src/internal/SessionManager.ts +12 -1
  135. package/modules/camo-backend/src/internal/storage-paths.ts +5 -0
  136. package/modules/camo-runtime/src/autoscript/action-providers/xhs/comments.mjs +38 -2
  137. package/modules/camo-runtime/src/autoscript/action-providers/xhs/interaction.mjs +47 -2
  138. package/modules/camo-runtime/src/autoscript/action-providers/xhs/search.mjs +94 -11
  139. package/modules/camo-runtime/src/autoscript/action-providers/xhs.mjs +208 -2
  140. package/modules/camo-runtime/src/autoscript/runtime.mjs +7 -1
  141. package/modules/camo-runtime/src/autoscript/xhs-unified-template.mjs +76 -43
  142. package/modules/camo-runtime/src/container/runtime-core/operations/index.mjs +75 -1
  143. package/modules/camo-runtime/src/container/runtime-core/operations/selector-scripts.mjs +71 -4
  144. package/modules/camo-runtime/src/container/runtime-core/operations/tab-pool.mjs +183 -27
  145. package/modules/collection-manager/bloom-filter.ts +112 -0
  146. package/modules/collection-manager/date-utils.ts +316 -0
  147. package/modules/collection-manager/index.ts +309 -0
  148. package/modules/collection-manager/package.json +10 -0
  149. package/modules/collection-manager/storage.ts +174 -0
  150. package/modules/collection-manager/types.ts +156 -0
  151. package/modules/logging/src/index.ts +1 -1
  152. package/modules/process-registry/index.ts +284 -0
  153. package/modules/rate-limiter/index.ts +322 -0
  154. package/modules/state/src/paths.ts +9 -1
  155. package/modules/task-scheduler/index.ts +293 -0
  156. package/modules/workflow/blocks/ExecuteWeiboSearchBlock.ts +167 -0
  157. package/modules/workflow/blocks/PersistXhsNoteBlock.ts +7 -3
  158. package/modules/workflow/blocks/RenderMarkdown.ts +4 -1
  159. package/modules/workflow/blocks/WeiboCollectCommentsBlock.ts +339 -0
  160. package/modules/workflow/blocks/WeiboCollectFromLinksBlock.ts +338 -0
  161. package/modules/workflow/blocks/helpers/downloadPaths.ts +16 -0
  162. package/modules/workflow/config/workflowRegistry.ts +2 -0
  163. package/modules/workflow/definitions/weibo-search-workflow-v1.ts +47 -0
  164. package/modules/workflow/src/runner.ts +6 -0
  165. package/modules/xiaohongshu/app/src/blocks/Phase1StartProfileBlock.ts +1 -1
  166. package/modules/xiaohongshu/app/src/blocks/Phase34PersistDetailBlock.ts +4 -0
  167. package/modules/xiaohongshu/app/src/blocks/Phase3InteractBlock.ts +2 -3
  168. package/modules/xiaohongshu/app/src/blocks/helpers/sharding.ts +152 -0
  169. package/package.json +13 -4
  170. package/scripts/postinstall-resources.mjs +62 -0
  171. package/scripts/test/run-coverage.mjs +76 -0
  172. package/scripts/weibo/search.ts +49 -0
  173. package/services/shared/serviceProcessLogger.ts +1 -1
  174. package/services/unified-api/server.ts +98 -12
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ /**
3
+ * Simple Bloom Filter for memory-efficient deduplication
4
+ * Uses non-cryptographic hash functions for speed
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.BloomFilter = void 0;
8
+ class BloomFilter {
9
+ bitmap;
10
+ size;
11
+ hashCount;
12
+ count = 0;
13
+ constructor(expectedItems = 100000, falsePositiveRate = 0.01) {
14
+ // Calculate optimal size and hash count
15
+ this.size = Math.ceil(-expectedItems * Math.log(falsePositiveRate) / Math.pow(Math.LN2, 2));
16
+ this.hashCount = Math.ceil((this.size / expectedItems) * Math.LN2);
17
+ this.bitmap = new Uint8Array(Math.ceil(this.size / 8));
18
+ }
19
+ hash(str, seed) {
20
+ // FNV-1a variant with seed
21
+ let hash = 2166136261 ^ seed;
22
+ for (let i = 0; i < str.length; i++) {
23
+ hash ^= str.charCodeAt(i);
24
+ hash = Math.imul(hash, 16777619);
25
+ }
26
+ return Math.abs(hash) % this.size;
27
+ }
28
+ add(item) {
29
+ for (let i = 0; i < this.hashCount; i++) {
30
+ const bit = this.hash(item, i);
31
+ const byteIndex = Math.floor(bit / 8);
32
+ const bitIndex = bit % 8;
33
+ this.bitmap[byteIndex] |= (1 << bitIndex);
34
+ }
35
+ this.count++;
36
+ }
37
+ mightContain(item) {
38
+ for (let i = 0; i < this.hashCount; i++) {
39
+ const bit = this.hash(item, i);
40
+ const byteIndex = Math.floor(bit / 8);
41
+ const bitIndex = bit % 8;
42
+ if ((this.bitmap[byteIndex] & (1 << bitIndex)) === 0) {
43
+ return false;
44
+ }
45
+ }
46
+ return true;
47
+ }
48
+ getCount() {
49
+ return this.count;
50
+ }
51
+ /**
52
+ * Export to base64 string for persistence
53
+ */
54
+ export() {
55
+ const buffer = Buffer.from(this.bitmap);
56
+ return buffer.toString('base64');
57
+ }
58
+ /**
59
+ * Import from base64 string
60
+ */
61
+ static import(base64, expectedItems = 100000) {
62
+ const filter = new BloomFilter(expectedItems);
63
+ const buffer = Buffer.from(base64, 'base64');
64
+ filter.bitmap = new Uint8Array(buffer);
65
+ return filter;
66
+ }
67
+ /**
68
+ * Serialize to JSON
69
+ */
70
+ toJSON() {
71
+ return {
72
+ bitmap: this.export(),
73
+ size: this.size,
74
+ hashCount: this.hashCount,
75
+ count: this.count
76
+ };
77
+ }
78
+ /**
79
+ * Deserialize from JSON
80
+ */
81
+ static fromJSON(json) {
82
+ const filter = new BloomFilter(1000); // Dummy, will be overwritten
83
+ filter.bitmap = new Uint8Array(Buffer.from(json.bitmap, 'base64'));
84
+ filter.size = json.size;
85
+ filter.hashCount = json.hashCount;
86
+ filter.count = json.count;
87
+ return filter;
88
+ }
89
+ }
90
+ exports.BloomFilter = BloomFilter;
91
+ //# sourceMappingURL=bloom-filter.js.map
@@ -0,0 +1,275 @@
1
+ "use strict";
2
+ /**
3
+ * Date extraction utilities for social media posts
4
+ * Handles various time formats from platforms like Weibo, Xiaohongshu
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.parsePlatformDate = parsePlatformDate;
8
+ exports.getCurrentTimestamp = getCurrentTimestamp;
9
+ exports.extractWeiboPostDate = extractWeiboPostDate;
10
+ /**
11
+ * Parse relative time strings to absolute date
12
+ *
13
+ * Supported formats:
14
+ * - "刚刚" / "刚刚来自..." → now
15
+ * - "5分钟前" / "30秒前" → relative to now
16
+ * - "今天 08:30" / "今天08:30" → today
17
+ * - "昨天 14:20" / "昨天14:20" → yesterday
18
+ * - "前天 10:00" → 2 days ago
19
+ * - "01-15" / "1月15日" → this year
20
+ * - "2025-12-01" / "2025年12月01日" → exact date
21
+ * - "12-01 15:30" → this year with time
22
+ */
23
+ function parsePlatformDate(text, options = {}) {
24
+ const { now = new Date(), timezone = 'Asia/Shanghai' } = options;
25
+ const trimmed = text.trim();
26
+ if (!trimmed)
27
+ return null;
28
+ // Get current date in specified timezone
29
+ const currentDate = new Date(now.toLocaleString('en-US', { timeZone: timezone }));
30
+ const currentYear = currentDate.getFullYear();
31
+ // "刚刚" / "刚刚来自..."
32
+ if (trimmed.includes('刚刚')) {
33
+ return {
34
+ date: formatDate(currentDate),
35
+ time: formatTime(currentDate),
36
+ fullText: formatDateTime(currentDate)
37
+ };
38
+ }
39
+ // "X分钟前" / "X秒前" / "X小时前"
40
+ const relativeMatch = trimmed.match(/(\d+)\s*(秒|分钟|小时)前/);
41
+ if (relativeMatch) {
42
+ const amount = parseInt(relativeMatch[1], 10);
43
+ const unit = relativeMatch[2];
44
+ const result = new Date(currentDate);
45
+ if (unit === '秒')
46
+ result.setSeconds(result.getSeconds() - amount);
47
+ else if (unit === '分钟')
48
+ result.setMinutes(result.getMinutes() - amount);
49
+ else if (unit === '小时')
50
+ result.setHours(result.getHours() - amount);
51
+ return {
52
+ date: formatDate(result),
53
+ time: formatTime(result),
54
+ fullText: formatDateTime(result)
55
+ };
56
+ }
57
+ // "今天 08:30" / "今天08:30"
58
+ const todayMatch = trimmed.match(/今天\s*(\d{1,2}):(\d{2})/);
59
+ if (todayMatch) {
60
+ const hour = parseInt(todayMatch[1], 10);
61
+ const minute = parseInt(todayMatch[2], 10);
62
+ const result = new Date(currentDate);
63
+ result.setHours(hour, minute, 0, 0);
64
+ return {
65
+ date: formatDate(result),
66
+ time: formatTime(result),
67
+ fullText: formatDateTime(result)
68
+ };
69
+ }
70
+ // "昨天 14:20" / "昨天14:20"
71
+ const yesterdayMatch = trimmed.match(/昨天\s*(\d{1,2}):(\d{2})/);
72
+ if (yesterdayMatch) {
73
+ const hour = parseInt(yesterdayMatch[1], 10);
74
+ const minute = parseInt(yesterdayMatch[2], 10);
75
+ const result = new Date(currentDate);
76
+ result.setDate(result.getDate() - 1);
77
+ result.setHours(hour, minute, 0, 0);
78
+ return {
79
+ date: formatDate(result),
80
+ time: formatTime(result),
81
+ fullText: formatDateTime(result)
82
+ };
83
+ }
84
+ // "前天 10:00"
85
+ const dayBeforeYesterdayMatch = trimmed.match(/前天\s*(\d{1,2}):(\d{2})/);
86
+ if (dayBeforeYesterdayMatch) {
87
+ const hour = parseInt(dayBeforeYesterdayMatch[1], 10);
88
+ const minute = parseInt(dayBeforeYesterdayMatch[2], 10);
89
+ const result = new Date(currentDate);
90
+ result.setDate(result.getDate() - 2);
91
+ result.setHours(hour, minute, 0, 0);
92
+ return {
93
+ date: formatDate(result),
94
+ time: formatTime(result),
95
+ fullText: formatDateTime(result)
96
+ };
97
+ }
98
+ // "2天前" / "3天前"
99
+ const daysAgoMatch = trimmed.match(/(\d+)\s*天前/);
100
+ if (daysAgoMatch) {
101
+ const days = parseInt(daysAgoMatch[1], 10);
102
+ const result = new Date(currentDate);
103
+ result.setDate(result.getDate() - days);
104
+ return {
105
+ date: formatDate(result),
106
+ time: '',
107
+ fullText: formatDate(result)
108
+ };
109
+ }
110
+ // "2025-12-01" / "2025年12月01日" - Full date (must match before MM-DD)
111
+ const fullDateMatch = trimmed.match(/(\d{4})[-年](\d{1,2})[-月](\d{1,2})/);
112
+ if (fullDateMatch) {
113
+ const year = parseInt(fullDateMatch[1], 10);
114
+ const month = parseInt(fullDateMatch[2], 10);
115
+ const day = parseInt(fullDateMatch[3], 10);
116
+ const result = new Date(year, month - 1, day);
117
+ // Check if there's time info
118
+ const timeMatch = trimmed.match(/(\d{1,2}):(\d{2})/);
119
+ if (timeMatch) {
120
+ result.setHours(parseInt(timeMatch[1], 10), parseInt(timeMatch[2], 10), 0, 0);
121
+ if (result.getTime() > currentDate.getTime()) {
122
+ result.setFullYear(currentYear - 1);
123
+ }
124
+ return {
125
+ date: formatDate(result),
126
+ time: formatTime(result),
127
+ fullText: formatDateTime(result)
128
+ };
129
+ }
130
+ return {
131
+ date: formatDate(result),
132
+ time: '',
133
+ fullText: formatDate(result)
134
+ };
135
+ }
136
+ // "01-15" / "1月15日" (this year, fallback to previous year when parsed date is in future)
137
+ const monthDayMatch = trimmed.match(/(\d{1,2})[-月](\d{1,2})日?/);
138
+ if (monthDayMatch && !trimmed.includes('年')) {
139
+ const month = parseInt(monthDayMatch[1], 10);
140
+ const day = parseInt(monthDayMatch[2], 10);
141
+ const result = new Date(currentYear, month - 1, day);
142
+ // Check if there's time info
143
+ const timeMatch = trimmed.match(/(\d{1,2}):(\d{2})/);
144
+ if (timeMatch) {
145
+ result.setHours(parseInt(timeMatch[1], 10), parseInt(timeMatch[2], 10), 0, 0);
146
+ if (result.getTime() > currentDate.getTime()) {
147
+ result.setFullYear(currentYear - 1);
148
+ }
149
+ return {
150
+ date: formatDate(result),
151
+ time: formatTime(result),
152
+ fullText: formatDateTime(result)
153
+ };
154
+ }
155
+ if (result.getTime() > currentDate.getTime()) {
156
+ result.setFullYear(currentYear - 1);
157
+ }
158
+ return {
159
+ date: formatDate(result),
160
+ time: '',
161
+ fullText: formatDate(result)
162
+ };
163
+ }
164
+ return null;
165
+ }
166
+ /**
167
+ * Format date as YYYY-MM-DD
168
+ */
169
+ function formatDate(date) {
170
+ const year = date.getFullYear();
171
+ const month = String(date.getMonth() + 1).padStart(2, '0');
172
+ const day = String(date.getDate()).padStart(2, '0');
173
+ return `${year}-${month}-${day}`;
174
+ }
175
+ /**
176
+ * Format time as HH:MM
177
+ */
178
+ function formatTime(date) {
179
+ const hour = String(date.getHours()).padStart(2, '0');
180
+ const minute = String(date.getMinutes()).padStart(2, '0');
181
+ return `${hour}:${minute}`;
182
+ }
183
+ /**
184
+ * Format datetime as YYYY-MM-DD HH:MM
185
+ */
186
+ function formatDateTime(date) {
187
+ return `${formatDate(date)} ${formatTime(date)}`;
188
+ }
189
+ /**
190
+ * Get current timestamp in ISO format with timezone
191
+ */
192
+ function getCurrentTimestamp(timezone = 'Asia/Shanghai') {
193
+ const now = new Date();
194
+ // UTC ISO string
195
+ const collectedAt = now.toISOString();
196
+ // Local time with timezone
197
+ const formatter = new Intl.DateTimeFormat('en-US', {
198
+ timeZone: timezone,
199
+ year: 'numeric',
200
+ month: '2-digit',
201
+ day: '2-digit',
202
+ hour: '2-digit',
203
+ minute: '2-digit',
204
+ second: '2-digit',
205
+ fractionalSecondDigits: 3,
206
+ hour12: false
207
+ });
208
+ const parts = formatter.formatToParts(now);
209
+ const get = (type) => parts.find(p => p.type === type)?.value || '00';
210
+ const year = get('year');
211
+ const month = get('month');
212
+ const day = get('day');
213
+ const hour = get('hour');
214
+ const minute = get('minute');
215
+ const second = get('second');
216
+ const ms = get('fractionalSecond');
217
+ // Get timezone offset
218
+ const tzOffset = new Intl.DateTimeFormat('en-US', {
219
+ timeZone: timezone,
220
+ timeZoneName: 'shortOffset'
221
+ }).format(now).split(' ').pop() || '+08:00';
222
+ const collectedAtLocal = `${year}-${month}-${day} ${hour}:${minute}:${second}.${ms} ${tzOffset}`;
223
+ const collectedDate = `${year}-${month}-${day}`;
224
+ return {
225
+ collectedAt,
226
+ collectedAtLocal,
227
+ collectedDate
228
+ };
229
+ }
230
+ /**
231
+ * Weibo-specific date extraction from post element
232
+ */
233
+ function extractWeiboPostDate(postElement, now = new Date()) {
234
+ // Weibo post time is usually in:
235
+ // - <a class="head-info_time_..."> or similar
236
+ // - Element with from, time info
237
+ const timeSelectors = [
238
+ 'a[class*="time"]',
239
+ 'a[class*="date"]',
240
+ 'span[class*="time"]',
241
+ '.from a',
242
+ 'a[href*="weibo.com"]'
243
+ ];
244
+ for (const selector of timeSelectors) {
245
+ const timeEl = postElement.querySelector(selector);
246
+ if (timeEl) {
247
+ const text = timeEl.textContent?.trim();
248
+ if (text) {
249
+ const parsed = parsePlatformDate(text, { now });
250
+ if (parsed)
251
+ return parsed;
252
+ }
253
+ }
254
+ }
255
+ // Fallback: search all text content for date patterns
256
+ const allText = postElement.textContent || '';
257
+ const datePatterns = [
258
+ /刚刚/,
259
+ /\d+\s*(秒|分钟|小时)前/,
260
+ /今天\s*\d{1,2}:\d{2}/,
261
+ /昨天\s*\d{1,2}:\d{2}/,
262
+ /\d{1,2}-\d{1,2}/,
263
+ /\d{4}-\d{1,2}-\d{1,2}/
264
+ ];
265
+ for (const pattern of datePatterns) {
266
+ const match = allText.match(pattern);
267
+ if (match) {
268
+ const parsed = parsePlatformDate(match[0], { now });
269
+ if (parsed)
270
+ return parsed;
271
+ }
272
+ }
273
+ return null;
274
+ }
275
+ //# sourceMappingURL=date-utils.js.map
@@ -0,0 +1,258 @@
1
+ "use strict";
2
+ /**
3
+ * Collection Data Manager
4
+ *
5
+ * Unified data management for all platforms and collection types.
6
+ * Handles:
7
+ * - Deduplication via Bloom Filter
8
+ * - Fresh/Incremental modes
9
+ * - File storage with human-readable paths
10
+ * - Stats and persistence
11
+ */
12
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ var desc = Object.getOwnPropertyDescriptor(m, k);
15
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
16
+ desc = { enumerable: true, get: function() { return m[k]; } };
17
+ }
18
+ Object.defineProperty(o, k2, desc);
19
+ }) : (function(o, m, k, k2) {
20
+ if (k2 === undefined) k2 = k;
21
+ o[k2] = m[k];
22
+ }));
23
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
24
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
25
+ }) : function(o, v) {
26
+ o["default"] = v;
27
+ });
28
+ var __importStar = (this && this.__importStar) || (function () {
29
+ var ownKeys = function(o) {
30
+ ownKeys = Object.getOwnPropertyNames || function (o) {
31
+ var ar = [];
32
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
33
+ return ar;
34
+ };
35
+ return ownKeys(o);
36
+ };
37
+ return function (mod) {
38
+ if (mod && mod.__esModule) return mod;
39
+ var result = {};
40
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
41
+ __setModuleDefault(result, mod);
42
+ return result;
43
+ };
44
+ })();
45
+ Object.defineProperty(exports, "__esModule", { value: true });
46
+ exports.parseCollectionId = exports.buildCollectionId = exports.CollectionDataManager = void 0;
47
+ const os = __importStar(require("os"));
48
+ const path = __importStar(require("path"));
49
+ const bloom_filter_1 = require("./bloom-filter");
50
+ const storage_1 = require("./storage");
51
+ const types_1 = require("./types");
52
+ Object.defineProperty(exports, "buildCollectionId", { enumerable: true, get: function () { return types_1.buildCollectionId; } });
53
+ Object.defineProperty(exports, "parseCollectionId", { enumerable: true, get: function () { return types_1.parseCollectionId; } });
54
+ class CollectionDataManager {
55
+ platform;
56
+ env;
57
+ spec;
58
+ mode;
59
+ collectionId;
60
+ storage;
61
+ bloomFilter;
62
+ meta = null;
63
+ stats = {
64
+ totalPosts: 0,
65
+ totalComments: 0,
66
+ newPosts: 0,
67
+ newComments: 0,
68
+ duplicatesSkipped: 0
69
+ };
70
+ constructor(options) {
71
+ this.platform = options.platform;
72
+ this.env = options.env;
73
+ this.spec = options.spec;
74
+ this.mode = options.mode || 'incremental';
75
+ // Build human-readable collection ID
76
+ this.collectionId = (0, types_1.buildCollectionId)(options.spec);
77
+ // Default base directory
78
+ const baseDir = options.baseDir || path.join(os.homedir(), '.webauto', 'download');
79
+ this.storage = new storage_1.CollectionStorage(baseDir, this.platform, this.env, this.collectionId);
80
+ this.bloomFilter = new bloom_filter_1.BloomFilter(500000, 0.001); // 500k items, 0.1% false positive
81
+ }
82
+ /**
83
+ * Initialize collection manager
84
+ * - Load existing meta if available
85
+ * - Load existing bloom filter for incremental mode
86
+ * - Clear data for fresh mode
87
+ */
88
+ async init() {
89
+ await this.storage.init();
90
+ const existingMeta = await this.storage.readMeta();
91
+ if (this.mode === 'fresh') {
92
+ // Clear all existing data
93
+ await this.storage.clear();
94
+ this.meta = this.createMeta();
95
+ await this.storage.writeMeta(this.meta);
96
+ }
97
+ else if (existingMeta) {
98
+ // Incremental mode with existing data
99
+ this.meta = existingMeta;
100
+ this.meta.updatedAt = new Date().toISOString();
101
+ // Load existing posts into bloom filter
102
+ const posts = await this.storage.readPosts();
103
+ for (const post of posts) {
104
+ this.bloomFilter.add(post.id);
105
+ }
106
+ // Load existing bloom filter state if available
107
+ if (existingMeta.bloomFilter) {
108
+ try {
109
+ this.bloomFilter = bloom_filter_1.BloomFilter.import(existingMeta.bloomFilter, 500000);
110
+ }
111
+ catch {
112
+ // Ignore if bloom filter is corrupted
113
+ }
114
+ }
115
+ await this.storage.writeMeta(this.meta);
116
+ }
117
+ else {
118
+ // Incremental mode, but no existing data
119
+ this.meta = this.createMeta();
120
+ await this.storage.writeMeta(this.meta);
121
+ }
122
+ // Initialize stats from existing data
123
+ const storageStats = await this.storage.getStats();
124
+ this.stats.totalPosts = storageStats.postCount;
125
+ this.stats.totalComments = storageStats.commentCount;
126
+ }
127
+ createMeta() {
128
+ return {
129
+ platform: this.platform,
130
+ env: this.env,
131
+ collectionId: this.collectionId,
132
+ source: this.spec.source,
133
+ createdAt: new Date().toISOString(),
134
+ updatedAt: new Date().toISOString(),
135
+ totalPosts: 0,
136
+ totalComments: 0,
137
+ mode: this.mode,
138
+ ...(this.spec.source === 'search' && { keyword: this.spec.keyword }),
139
+ ...(this.spec.source === 'user' && {
140
+ userId: this.spec.userId,
141
+ userName: this.spec.userName
142
+ })
143
+ };
144
+ }
145
+ /**
146
+ * Check if post ID already exists (via bloom filter)
147
+ */
148
+ hasPost(postId) {
149
+ return this.bloomFilter.mightContain(postId);
150
+ }
151
+ /**
152
+ * Add a post if not duplicate
153
+ * Returns true if added, false if duplicate
154
+ */
155
+ async addPost(post) {
156
+ // Check bloom filter
157
+ if (this.bloomFilter.mightContain(post.id)) {
158
+ this.stats.duplicatesSkipped++;
159
+ return false;
160
+ }
161
+ // Add to bloom filter and storage
162
+ this.bloomFilter.add(post.id);
163
+ await this.storage.appendPost(post);
164
+ this.stats.totalPosts++;
165
+ this.stats.newPosts++;
166
+ return true;
167
+ }
168
+ /**
169
+ * Add a comment (no deduplication for comments within same post)
170
+ * But we use postId:commentId as the key to dedupe
171
+ */
172
+ async addComment(comment) {
173
+ const commentKey = `${comment.postId}:${comment.id}`;
174
+ if (this.bloomFilter.mightContain(commentKey)) {
175
+ return false;
176
+ }
177
+ this.bloomFilter.add(commentKey);
178
+ await this.storage.appendComment(comment);
179
+ this.stats.totalComments++;
180
+ this.stats.newComments++;
181
+ return true;
182
+ }
183
+ /**
184
+ * Persist metadata and bloom filter state
185
+ */
186
+ async persist() {
187
+ if (!this.meta)
188
+ return;
189
+ this.meta.updatedAt = new Date().toISOString();
190
+ this.meta.totalPosts = this.stats.totalPosts;
191
+ this.meta.totalComments = this.stats.totalComments;
192
+ this.meta.bloomFilter = this.bloomFilter.export();
193
+ await this.storage.writeMeta(this.meta);
194
+ }
195
+ /**
196
+ * Get current stats
197
+ */
198
+ getStats() {
199
+ return { ...this.stats };
200
+ }
201
+ /**
202
+ * Get collection metadata
203
+ */
204
+ getMeta() {
205
+ return this.meta;
206
+ }
207
+ /**
208
+ * Get collection ID
209
+ */
210
+ getCollectionId() {
211
+ return this.collectionId;
212
+ }
213
+ /**
214
+ * Get storage paths for external use (e.g., logging)
215
+ */
216
+ getPaths() {
217
+ return {
218
+ collectionDir: this.storage.collectionDir,
219
+ postsPath: this.storage.getPostsPath(),
220
+ commentsPath: this.storage.getCommentsPath(),
221
+ linksPath: this.storage.getLinksPath(),
222
+ runLogPath: this.storage.getRunLogPath()
223
+ };
224
+ }
225
+ /**
226
+ * List all collections for this platform
227
+ */
228
+ static async listCollections(platform, env, baseDir) {
229
+ const dir = baseDir || path.join(os.homedir(), '.webauto', 'download');
230
+ const ids = await storage_1.CollectionStorage.listCollections(dir, platform, env);
231
+ return ids.map(id => ({
232
+ collectionId: id,
233
+ spec: (0, types_1.parseCollectionId)(id)
234
+ }));
235
+ }
236
+ /**
237
+ * Merge multiple collections into one
238
+ * Useful for combining timeline data from multiple dates
239
+ */
240
+ static async mergeCollections(sourceIds, targetId, platform, env, baseDir) {
241
+ const dir = baseDir || path.join(os.homedir(), '.webauto', 'download');
242
+ const targetStorage = new storage_1.CollectionStorage(dir, platform, env, targetId);
243
+ await targetStorage.init();
244
+ const seenPostIds = new Set();
245
+ for (const sourceId of sourceIds) {
246
+ const sourceStorage = new storage_1.CollectionStorage(dir, platform, env, sourceId);
247
+ const posts = await sourceStorage.readPosts();
248
+ for (const post of posts) {
249
+ if (!seenPostIds.has(post.id)) {
250
+ seenPostIds.add(post.id);
251
+ await targetStorage.appendPost(post);
252
+ }
253
+ }
254
+ }
255
+ }
256
+ }
257
+ exports.CollectionDataManager = CollectionDataManager;
258
+ //# sourceMappingURL=index.js.map