@chainlesschain/personal-data-hub 0.3.1 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
  2. package/__tests__/adapters/email-adapter.test.js +1 -1
  3. package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
  4. package/__tests__/adapters/email-retry-progress.test.js +1 -1
  5. package/__tests__/adapters/email-templates.test.js +1 -1
  6. package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
  7. package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
  8. package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
  9. package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
  10. package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
  11. package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
  12. package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
  13. package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
  14. package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
  15. package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
  16. package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
  17. package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
  18. package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
  19. package/__tests__/adapters/system-data-android.test.js +32 -1
  20. package/__tests__/longtail-adapters.test.js +15 -2
  21. package/__tests__/shopping-adapters.test.js +96 -0
  22. package/__tests__/sign-providers.test.js +62 -0
  23. package/__tests__/travel-adapters.test.js +66 -0
  24. package/__tests__/whatsapp-adapter.test.js +5 -2
  25. package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
  26. package/lib/adapters/email-imap/email-adapter.js +224 -17
  27. package/lib/adapters/messaging-telegram/index.js +15 -12
  28. package/lib/adapters/messaging-whatsapp/index.js +15 -12
  29. package/lib/adapters/shopping-taobao/index.js +161 -21
  30. package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
  31. package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
  32. package/lib/adapters/social-bilibili-adb/collector.js +190 -0
  33. package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
  34. package/lib/adapters/social-bilibili-adb/index.js +51 -0
  35. package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
  36. package/lib/adapters/social-douyin/index.js +4 -0
  37. package/lib/adapters/social-douyin-adb/collector.js +165 -0
  38. package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
  39. package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
  40. package/lib/adapters/social-douyin-adb/index.js +57 -0
  41. package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
  42. package/lib/adapters/social-weibo-adb/api-client.js +281 -0
  43. package/lib/adapters/social-weibo-adb/collector.js +169 -0
  44. package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
  45. package/lib/adapters/social-weibo-adb/index.js +55 -0
  46. package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
  47. package/lib/adapters/social-xiaohongshu-adb/api-client.js +278 -0
  48. package/lib/adapters/social-xiaohongshu-adb/collector.js +158 -0
  49. package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
  50. package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
  51. package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
  52. package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
  53. package/lib/adapters/system-data-android/adapter.js +77 -3
  54. package/lib/adapters/travel-amap/index.js +16 -10
  55. package/lib/adapters/travel-ctrip/index.js +25 -9
  56. package/lib/adapters/vscode/vscode-reader.js +7 -1
  57. package/lib/sign-providers/index.js +20 -0
  58. package/lib/sign-providers/interface.js +82 -0
  59. package/lib/sign-providers/null-sign-provider.js +30 -0
  60. package/package.json +6 -1
@@ -0,0 +1,174 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * Phase 2a (Douyin C 路径 — 2026-05-25): IM-db parse result → snapshot JSON.
5
+ *
6
+ * Takes the `{messages, contacts}` shape from im-db-parser.js and produces
7
+ * a snapshot matching the existing `social-douyin` adapter's
8
+ * SNAPSHOT_SCHEMA_VERSION=1 contract — so we reuse the adapter's snapshot
9
+ * mode (`_syncViaSnapshot`) instead of opening a second adapter.
10
+ *
11
+ * Mirrors social-bilibili-adb/snapshot-builder.js. Single-source-of-truth
12
+ * for the adapter; we feed it via different upstreams.
13
+ *
14
+ * Snapshot schema (matches social-douyin/index.js:SNAPSHOT_SCHEMA_VERSION):
15
+ *
16
+ * {
17
+ * "schemaVersion": 1,
18
+ * "snapshottedAt": <epoch-ms>,
19
+ * "account": {
20
+ * "secUid": null, // C 路径不调 X-Bogus profile, 不知 secUid
21
+ * "shortId": null,
22
+ * "displayName": ""
23
+ * },
24
+ * "events": [
25
+ * { "kind": "message", "id": "msg-<conv>-<time>", "capturedAt": <ms>,
26
+ * "senderUid": "...", "conversationId": "...",
27
+ * "text": "...", "readStatus": 0/1, "contentBlob": "..." },
28
+ * { "kind": "contact", "id": "contact-<uid>", "capturedAt": <ms>,
29
+ * "uid": "...", "shortId": "...", "name": "...",
30
+ * "avatarUrl": "...", "followStatus": 0/1/2 }
31
+ * ]
32
+ * }
33
+ *
34
+ * Note: Douyin IM doesn't have a "this is me" marker — the db includes
35
+ * messages where `senderUid === <db-filename-uid>` (sent by self) and
36
+ * `senderUid !== <db-filename-uid>` (received). Both go into the snapshot;
37
+ * the consumer (e.g. PDH search) can filter by senderUid if needed.
38
+ */
39
+
40
+ const fs = require("node:fs");
41
+ const path = require("node:path");
42
+ const os = require("node:os");
43
+ const crypto = require("node:crypto");
44
+
45
+ const SNAPSHOT_SCHEMA_VERSION = 1;
46
+
47
+ /**
48
+ * Build an in-memory snapshot from parsed IM db rows. Pure function — no
49
+ * disk IO.
50
+ *
51
+ * @param {{
52
+ * uid: string,
53
+ * messages?: Array,
54
+ * contacts?: Array,
55
+ * snapshottedAt?: number,
56
+ * displayName?: string,
57
+ * }} input
58
+ * @returns {{schemaVersion: number, snapshottedAt: number, account: object, events: Array}}
59
+ */
60
+ function buildSnapshot(input) {
61
+ if (!input || typeof input !== "object") {
62
+ throw new TypeError("buildSnapshot: input must be an object");
63
+ }
64
+ const uid = input.uid;
65
+ if (typeof uid !== "string" || uid.length === 0) {
66
+ throw new TypeError("buildSnapshot: input.uid must be a non-empty string");
67
+ }
68
+ const snapshottedAt =
69
+ Number.isFinite(input.snapshottedAt) && input.snapshottedAt > 0
70
+ ? input.snapshottedAt
71
+ : Date.now();
72
+ const account = {
73
+ // secUid / shortId unknown via pure-db extraction (those live in the
74
+ // app's webview cookies / passport endpoint). Leave null so consumers
75
+ // know not to use them as canonical IDs.
76
+ secUid: null,
77
+ shortId: uid, // Douyin numeric uid is the shortId equivalent
78
+ displayName:
79
+ typeof input.displayName === "string" ? input.displayName : "",
80
+ };
81
+ const events = [];
82
+
83
+ // messages
84
+ const messages = Array.isArray(input.messages) ? input.messages : [];
85
+ messages.forEach((m, idx) => {
86
+ if (!m || typeof m !== "object") return;
87
+ const capturedAt =
88
+ typeof m.createdTimeMs === "number" && m.createdTimeMs > 0
89
+ ? m.createdTimeMs
90
+ : snapshottedAt;
91
+ // ID strategy: conversationId + createdTime is a stable composite
92
+ // key (both required by Douyin's IM protocol). Fallback to senderUid
93
+ // + time for very old rows that pre-date conversation_id.
94
+ const idPart =
95
+ m.conversationId && m.createdTimeMs
96
+ ? `${m.conversationId}-${m.createdTimeMs}`
97
+ : m.senderUid && m.createdTimeMs
98
+ ? `${m.senderUid}-${m.createdTimeMs}`
99
+ : `msg-${idx}`;
100
+ events.push({
101
+ kind: "message",
102
+ id: `msg-${idPart}`,
103
+ capturedAt,
104
+ senderUid: m.senderUid || null,
105
+ conversationId: m.conversationId || null,
106
+ text: m.text || null,
107
+ readStatus: typeof m.readStatus === "number" ? m.readStatus : null,
108
+ contentBlob: m.contentBlob || null,
109
+ });
110
+ });
111
+
112
+ // contacts
113
+ const contacts = Array.isArray(input.contacts) ? input.contacts : [];
114
+ contacts.forEach((c, idx) => {
115
+ if (!c || typeof c !== "object") return;
116
+ events.push({
117
+ kind: "contact",
118
+ id: c.uid ? `contact-${c.uid}` : `contact-${idx}`,
119
+ capturedAt: snapshottedAt, // SIMPLE_USER has no per-row timestamp
120
+ uid: c.uid || null,
121
+ shortId: c.shortId || null,
122
+ name: c.name || null,
123
+ avatarUrl: c.avatarUrl || null,
124
+ followStatus:
125
+ typeof c.followStatus === "number" ? c.followStatus : null,
126
+ });
127
+ });
128
+
129
+ return {
130
+ schemaVersion: SNAPSHOT_SCHEMA_VERSION,
131
+ snapshottedAt,
132
+ account,
133
+ events,
134
+ };
135
+ }
136
+
137
+ /**
138
+ * Write a snapshot to disk as JSON. Default destination is
139
+ * `<os.tmpdir()>/cc-douyin-snapshot-<uuid>.json`. Returns the absolute
140
+ * path. Caller is responsible for cleanup.
141
+ */
142
+ function writeSnapshotJson(snapshot, opts = {}) {
143
+ const dir = opts.dir || os.tmpdir();
144
+ const fileName =
145
+ opts.fileName || `cc-douyin-snapshot-${crypto.randomUUID()}.json`;
146
+ if (fileName.includes("/") || fileName.includes("\\")) {
147
+ throw new Error(
148
+ "writeSnapshotJson: opts.fileName must be a basename, not a path",
149
+ );
150
+ }
151
+ const full = path.join(dir, fileName);
152
+ fs.writeFileSync(full, JSON.stringify(snapshot), "utf-8");
153
+ return full;
154
+ }
155
+
156
+ /**
157
+ * Best-effort delete of a snapshot file. Used in finally blocks; never
158
+ * throws.
159
+ */
160
+ function cleanupSnapshotJson(filePath) {
161
+ if (!filePath) return;
162
+ try {
163
+ fs.unlinkSync(filePath);
164
+ } catch (_e) {
165
+ // ignore
166
+ }
167
+ }
168
+
169
+ module.exports = {
170
+ buildSnapshot,
171
+ writeSnapshotJson,
172
+ cleanupSnapshotJson,
173
+ SNAPSHOT_SCHEMA_VERSION,
174
+ };
@@ -0,0 +1,281 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * Phase 3a (Weibo C 路径 — 2026-05-25): Node-side WeiboApiClient.
5
+ *
6
+ * Byte-parity port of
7
+ * `android-app/.../pdh/social/weibo/WeiboApiClient.kt` for the desktop
8
+ * PC + ADB path. Same m.weibo.cn endpoints, same headers, same JSON
9
+ * parse shape. Lockstep with the Kotlin version — if a real-device trap
10
+ * surfaces fix both sides.
11
+ *
12
+ * **Key differences from Bilibili Phase 1b**:
13
+ * 1. **No WBI signing** — m.weibo.cn mobile API requires cookie + UA +
14
+ * XHR header but no signature. Simpler client, no /nav handshake.
15
+ * 2. **UID via /api/config** — Weibo cookie has no DedeUserID equivalent;
16
+ * fetchUid() must do an HTTP roundtrip and persist the result.
17
+ * 3. **Time field is ISO 8601** — "Sun Jan 12 13:45:00 +0800 2026"
18
+ * format (not unix seconds like Bilibili). Java's SimpleDateFormat
19
+ * parses it; Node's Date can too once we know the format.
20
+ * 4. **Timeline endpoint via containerid** — user posts go through
21
+ * /api/container/getIndex?containerid=107603<uid>, not a dedicated
22
+ * /api/posts.
23
+ * 5. **Anti-bot signal**: missing `X-Requested-With: XMLHttpRequest` +
24
+ * `MWeibo-Pwa: 1` → 30x redirect to login HTML.
25
+ *
26
+ * 4 endpoints:
27
+ * - config /api/config (fetchUid + login state check)
28
+ * - posts /api/container/getIndex?type=uid&value=<uid>&containerid=107603<uid>
29
+ * - favourites /api/favorites?page=1
30
+ * - follows /api/friendships/friends?uid=<uid>&page=1
31
+ *
32
+ * Errors don't throw — endpoints that fail return [] and lastErrorCode +
33
+ * lastErrorMessage surface the cause for partial-result diagnostics.
34
+ */
35
+
36
+ const DEFAULT_BASE_URL = "https://m.weibo.cn/";
37
+
38
+ // Pinned Chrome 120 mobile UA — must look like a browser, default
39
+ // `node-fetch/x.y.z` returns -100 silentband.
40
+ const BROWSER_UA =
41
+ "Mozilla/5.0 (Linux; Android 14; ChainlessChain) AppleWebKit/537.36 " +
42
+ "(KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36";
43
+
44
+ const BROWSER_HEADERS = Object.freeze({
45
+ "User-Agent": BROWSER_UA,
46
+ Referer: "https://m.weibo.cn/",
47
+ Accept: "application/json, text/plain, */*",
48
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
49
+ // m.weibo.cn anti-bot: missing these → HTML redirect, not JSON
50
+ "X-Requested-With": "XMLHttpRequest",
51
+ "MWeibo-Pwa": "1",
52
+ });
53
+
54
+ /**
55
+ * Parse Weibo's ISO-8601-ish timestamp.
56
+ * "Sun Jan 12 13:45:00 +0800 2026" → epoch ms
57
+ * "1716383021" → epoch ms (× 1000 since it's < 1e12)
58
+ * "1716383021000" → epoch ms (verbatim)
59
+ *
60
+ * Mirrors WeiboApiClient.kt:parseWeiboTime.
61
+ */
62
+ function parseWeiboTime(raw) {
63
+ if (typeof raw !== "string" || raw.length === 0) return 0;
64
+ // Digits-only fallback — Weibo occasionally serves unix-seconds verbatim
65
+ if (/^\d+$/.test(raw)) {
66
+ const n = parseInt(raw, 10);
67
+ return n > 1e12 ? n : n * 1000;
68
+ }
69
+ // "EEE MMM dd HH:mm:ss Z yyyy" — JS Date.parse handles this in V8 / Node.
70
+ const t = Date.parse(raw);
71
+ return Number.isFinite(t) ? t : 0;
72
+ }
73
+
74
+ /**
75
+ * Strip HTML from Weibo's `text` field (contains <a>, <span>, &nbsp; etc.).
76
+ * Mirrors WeiboApiClient.kt:stripHtml.
77
+ */
78
+ function stripHtml(raw) {
79
+ if (typeof raw !== "string" || raw.length === 0) return "";
80
+ return raw
81
+ .replace(/<[^>]+>/g, "")
82
+ .replace(/&nbsp;/g, " ")
83
+ .replace(/&amp;/g, "&")
84
+ .replace(/&lt;/g, "<")
85
+ .replace(/&gt;/g, ">")
86
+ .replace(/&quot;/g, '"')
87
+ .trim();
88
+ }
89
+
90
+ class WeiboApiClient {
91
+ constructor(opts = {}) {
92
+ this.baseUrl = opts.baseUrl || DEFAULT_BASE_URL;
93
+ if (!this.baseUrl.endsWith("/")) this.baseUrl += "/";
94
+ this._fetch = opts.fetch || globalThis.fetch;
95
+ if (typeof this._fetch !== "function") {
96
+ throw new Error(
97
+ "WeiboApiClient: fetch not available — pass opts.fetch or run on Node 18+",
98
+ );
99
+ }
100
+ this.lastErrorCode = 0;
101
+ this.lastErrorMessage = null;
102
+ }
103
+
104
+ /**
105
+ * GET <url> with browser-like headers. Mirrors Kotlin doGetJson —
106
+ * including the non-JSON-body check (Weibo redirects to login HTML
107
+ * when cookie expired).
108
+ */
109
+ async _doGetJson(url, cookie) {
110
+ try {
111
+ const resp = await this._fetch(url.toString(), {
112
+ method: "GET",
113
+ headers: { ...BROWSER_HEADERS, Cookie: cookie },
114
+ });
115
+ const body = await resp.text();
116
+ if (!resp.ok) {
117
+ this._setLastError(resp.status, `HTTP ${resp.status}`);
118
+ return null;
119
+ }
120
+ const trimmed = body.trimStart();
121
+ if (!trimmed.startsWith("{")) {
122
+ // Login redirect / anti-bot HTML — cookie expired or anti-spider hit
123
+ this._setLastError(-4, "non-json (cookie expired?)");
124
+ return null;
125
+ }
126
+ let obj;
127
+ try {
128
+ obj = JSON.parse(body);
129
+ } catch (e) {
130
+ this._setLastError(-3, "parse: " + (e.message || String(e)));
131
+ return null;
132
+ }
133
+ const ok = typeof obj.ok === "number" ? obj.ok : 1;
134
+ if (ok !== 1) {
135
+ this._setLastError(ok, (obj.msg || "").toString());
136
+ return null;
137
+ }
138
+ this._clearLastError();
139
+ return obj;
140
+ } catch (e) {
141
+ this._setLastError(-2, "IO: " + (e.message || String(e)));
142
+ return null;
143
+ }
144
+ }
145
+
146
+ _setLastError(code, message) {
147
+ this.lastErrorCode = code;
148
+ this.lastErrorMessage = message;
149
+ }
150
+ _clearLastError() {
151
+ this.lastErrorCode = 0;
152
+ this.lastErrorMessage = null;
153
+ }
154
+
155
+ /**
156
+ * Fetch /api/config to get UID + validate login state. Returns numeric
157
+ * UID on success, null on failure (cookie expired / not logged in).
158
+ * Mirrors WeiboApiClient.kt:fetchUid.
159
+ */
160
+ async fetchUid(cookie) {
161
+ const url = new URL("api/config", this.baseUrl);
162
+ const obj = await this._doGetJson(url, cookie);
163
+ if (!obj) return null;
164
+ const data = obj.data || {};
165
+ if (!data.login) return null;
166
+ const uidStr = data.uid;
167
+ const uid = parseInt(uidStr, 10);
168
+ return Number.isFinite(uid) && uid > 0 ? uid : null;
169
+ }
170
+
171
+ /**
172
+ * Fetch the user's own posts (timeline). Mirrors fetchPosts —
173
+ * containerid=107603<uid> is the magic "user's own mblog" container.
174
+ */
175
+ async fetchPosts(cookie, uid, opts = {}) {
176
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 100;
177
+ const containerid = `107603${uid}`;
178
+ const url = new URL("api/container/getIndex", this.baseUrl);
179
+ url.searchParams.set("type", "uid");
180
+ url.searchParams.set("value", String(uid));
181
+ url.searchParams.set("containerid", containerid);
182
+ const obj = await this._doGetJson(url, cookie);
183
+ if (!obj) return [];
184
+ const data = obj.data || {};
185
+ const cards = Array.isArray(data.cards) ? data.cards : [];
186
+ const out = [];
187
+ for (const card of cards) {
188
+ if (out.length >= limit) break;
189
+ if (!card || card.card_type !== 9) continue; // card_type=9 = mblog
190
+ const blog = card.mblog;
191
+ if (!blog) continue;
192
+ const mid = (blog.mid && String(blog.mid)) || (blog.id && String(blog.id));
193
+ if (!mid) continue;
194
+ out.push({
195
+ mid,
196
+ text: stripHtml(blog.text),
197
+ createdAt: parseWeiboTime(blog.created_at),
198
+ source: blog.source || null,
199
+ repostsCount: typeof blog.reposts_count === "number" ? blog.reposts_count : 0,
200
+ commentsCount:
201
+ typeof blog.comments_count === "number" ? blog.comments_count : 0,
202
+ likesCount:
203
+ typeof blog.attitudes_count === "number" ? blog.attitudes_count : 0,
204
+ picCount: typeof blog.pic_num === "number" ? blog.pic_num : 0,
205
+ });
206
+ }
207
+ return out;
208
+ }
209
+
210
+ /** Mirrors fetchFavourites. */
211
+ async fetchFavourites(cookie, opts = {}) {
212
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 100;
213
+ const url = new URL("api/favorites", this.baseUrl);
214
+ url.searchParams.set("page", "1");
215
+ const obj = await this._doGetJson(url, cookie);
216
+ if (!obj) return [];
217
+ const data = obj.data || {};
218
+ const favs = Array.isArray(data.favorites) ? data.favorites : [];
219
+ const out = [];
220
+ for (let i = 0; i < Math.min(limit, favs.length); i++) {
221
+ const fav = favs[i];
222
+ if (!fav) continue;
223
+ const status = fav.status;
224
+ if (!status) continue;
225
+ const mid = (status.mid && String(status.mid)) || (status.id && String(status.id));
226
+ if (!mid) continue;
227
+ const author = status.user || {};
228
+ const favAt =
229
+ parseWeiboTime(fav.favorited_time) ||
230
+ parseWeiboTime(status.created_at) ||
231
+ 0;
232
+ out.push({
233
+ mid,
234
+ text: stripHtml(status.text),
235
+ favAt,
236
+ authorScreenName: author.screen_name || null,
237
+ });
238
+ }
239
+ return out;
240
+ }
241
+
242
+ /** Mirrors fetchFollows. */
243
+ async fetchFollows(cookie, uid, opts = {}) {
244
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 200;
245
+ const url = new URL("api/friendships/friends", this.baseUrl);
246
+ url.searchParams.set("uid", String(uid));
247
+ url.searchParams.set("page", "1");
248
+ const obj = await this._doGetJson(url, cookie);
249
+ if (!obj) return [];
250
+ const data = obj.data || {};
251
+ const users = Array.isArray(data.users) ? data.users : [];
252
+ const out = [];
253
+ for (let i = 0; i < Math.min(limit, users.length); i++) {
254
+ const u = users[i];
255
+ if (!u) continue;
256
+ const followUid = typeof u.id === "number" ? u.id : 0;
257
+ if (followUid === 0) continue;
258
+ out.push({
259
+ uid: followUid,
260
+ screenName: u.screen_name || "(unnamed)",
261
+ description: u.description || null,
262
+ avatarUrl: u.profile_image_url || null,
263
+ // m.weibo.cn /api/friendships/friends doesn't return follow_time —
264
+ // 0 lets the snapshot builder fall back to snapshottedAt.
265
+ followedAt: 0,
266
+ });
267
+ }
268
+ return out;
269
+ }
270
+ }
271
+
272
+ module.exports = {
273
+ WeiboApiClient,
274
+ // Exposed for tests
275
+ _internals: {
276
+ parseWeiboTime,
277
+ stripHtml,
278
+ BROWSER_UA,
279
+ BROWSER_HEADERS,
280
+ },
281
+ };
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * Phase 3a (Weibo C 路径 — 2026-05-25): end-to-end orchestrator.
5
+ *
6
+ * bridge.invoke("weibo.cookies") ← Phase 3a cookies extension
7
+ * │
8
+ * ▼ {cookie, diagnostic}
9
+ * WeiboApiClient.fetchUid ← /api/config 拿 UID + 验登录
10
+ * │
11
+ * ▼ uid (numeric)
12
+ * fetchPosts + fetchFavourites + fetchFollows (partial-failure OK)
13
+ * │
14
+ * ▼ 3 arrays
15
+ * buildSnapshot + writeSnapshotJson ← schemaVersion=1
16
+ * │
17
+ * ▼
18
+ * registry.syncAdapter("social-weibo", { inputPath })
19
+ *
20
+ * Mirror of social-bilibili-adb/collector.js — same `{ok, report?, reason?,
21
+ * message?}` shape, same try/finally cleanup. **Key diff**: Weibo needs
22
+ * an extra fetchUid roundtrip after cookies extraction (cookie alone
23
+ * doesn't carry UID — Bilibili has DedeUserID inline).
24
+ */
25
+
26
+ const { WeiboApiClient } = require("./api-client");
27
+ const {
28
+ buildSnapshot,
29
+ writeSnapshotJson,
30
+ cleanupSnapshotJson,
31
+ } = require("./snapshot-builder");
32
+
33
+ /**
34
+ * Pull cookies → fetchUid → 3 endpoints → write snapshot. Returns the
35
+ * staging path + counts + diagnostic.
36
+ *
37
+ * Throws (with typed-reason BILIBILI_-style prefix) on cookie failures.
38
+ * Returns with empty events on /api/config failure or any endpoint
39
+ * failure (partial-result tolerated — lastErrorCode surfaces the cause
40
+ * for UI).
41
+ */
42
+ async function collect(bridge, opts = {}) {
43
+ if (!bridge || typeof bridge.invoke !== "function") {
44
+ throw new TypeError(
45
+ "WeiboAdbCollector.collect: bridge must expose invoke(method, params)",
46
+ );
47
+ }
48
+ const now = opts.now || Date.now;
49
+ const client = opts.apiClient || new WeiboApiClient();
50
+ const limits = opts.limits || {};
51
+
52
+ // 1. Pull cookies via Phase 3a extension.
53
+ const cookieResult = await bridge.invoke("weibo.cookies");
54
+ if (!cookieResult || typeof cookieResult.cookie !== "string") {
55
+ throw new Error(
56
+ "WeiboAdbCollector.collect: bridge.invoke('weibo.cookies') returned malformed payload — got cookie=" +
57
+ typeof cookieResult?.cookie,
58
+ );
59
+ }
60
+ const { cookie, diagnostic: cookieDiagnostic } = cookieResult;
61
+
62
+ // 2. fetchUid — required first call. Weibo cookie has no inline UID.
63
+ const uid = await client.fetchUid(cookie);
64
+ if (!uid) {
65
+ // /api/config returned login=false or non-2xx. Could be:
66
+ // - cookie expired (most common — user logged out on phone)
67
+ // - anti-bot 30x to login HTML (UA missing — but we set browser UA)
68
+ // - IO error
69
+ // Surface as ExtractFailed via the hub-level wrapper; here we
70
+ // produce an empty-event snapshot so the registry call doesn't
71
+ // throw (consumers can read douyin.lastErrorCode to disambiguate).
72
+ const snapshot = buildSnapshot({
73
+ uid: 1, // sentinel — buildSnapshot requires positive; sync emits 0 events
74
+ displayName: opts.displayName,
75
+ snapshottedAt: now(),
76
+ });
77
+ const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
78
+ return {
79
+ snapshotPath,
80
+ uid: null,
81
+ eventCounts: { post: 0, favourite: 0, follow: 0, total: 0 },
82
+ lastErrorCode: client.lastErrorCode,
83
+ lastErrorMessage: client.lastErrorMessage,
84
+ cookieDiagnostic: cookieDiagnostic || null,
85
+ uidFetchFailed: true,
86
+ };
87
+ }
88
+
89
+ // 3. Parallel fetch — partial failure tolerated (client returns []).
90
+ const [posts, favourites, follows] = await Promise.all([
91
+ client.fetchPosts(cookie, uid, {
92
+ limit: Number.isInteger(limits.post) ? limits.post : undefined,
93
+ }),
94
+ client.fetchFavourites(cookie, {
95
+ limit: Number.isInteger(limits.favourite) ? limits.favourite : undefined,
96
+ }),
97
+ client.fetchFollows(cookie, uid, {
98
+ limit: Number.isInteger(limits.follow) ? limits.follow : undefined,
99
+ }),
100
+ ]);
101
+
102
+ // 4. Build snapshot + write.
103
+ const snapshot = buildSnapshot({
104
+ uid,
105
+ displayName: opts.displayName,
106
+ posts,
107
+ favourites,
108
+ follows,
109
+ snapshottedAt: now(),
110
+ });
111
+ const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
112
+
113
+ return {
114
+ snapshotPath,
115
+ uid,
116
+ eventCounts: {
117
+ post: posts.length,
118
+ favourite: favourites.length,
119
+ follow: follows.length,
120
+ total: snapshot.events.length,
121
+ },
122
+ lastErrorCode: client.lastErrorCode,
123
+ lastErrorMessage: client.lastErrorMessage,
124
+ cookieDiagnostic: cookieDiagnostic || null,
125
+ uidFetchFailed: false,
126
+ };
127
+ }
128
+
129
+ /**
130
+ * Convenience: collect + registry.syncAdapter("social-weibo") + cleanup.
131
+ */
132
+ async function collectAndSync(bridge, registry, opts = {}) {
133
+ if (!registry || typeof registry.syncAdapter !== "function") {
134
+ throw new TypeError(
135
+ "WeiboAdbCollector.collectAndSync: registry must expose syncAdapter(name, options)",
136
+ );
137
+ }
138
+ const collectResult = await collect(bridge, opts);
139
+ let syncReport = null;
140
+ let cleanupFailed = false;
141
+ try {
142
+ syncReport = await registry.syncAdapter("social-weibo", {
143
+ inputPath: collectResult.snapshotPath,
144
+ });
145
+ } finally {
146
+ try {
147
+ cleanupSnapshotJson(collectResult.snapshotPath);
148
+ } catch (_e) {
149
+ cleanupFailed = true;
150
+ }
151
+ }
152
+ return {
153
+ ...syncReport,
154
+ weibo: {
155
+ uid: collectResult.uid,
156
+ eventCounts: collectResult.eventCounts,
157
+ lastErrorCode: collectResult.lastErrorCode,
158
+ lastErrorMessage: collectResult.lastErrorMessage,
159
+ cookieDiagnostic: collectResult.cookieDiagnostic,
160
+ uidFetchFailed: collectResult.uidFetchFailed,
161
+ cleanupFailed,
162
+ },
163
+ };
164
+ }
165
+
166
+ module.exports = {
167
+ collect,
168
+ collectAndSync,
169
+ };