@chainlesschain/personal-data-hub 0.3.1 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
- package/__tests__/adapters/email-adapter.test.js +1 -1
- package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
- package/__tests__/adapters/email-retry-progress.test.js +1 -1
- package/__tests__/adapters/email-templates.test.js +1 -1
- package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
- package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
- package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
- package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
- package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
- package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
- package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
- package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
- package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
- package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
- package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
- package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
- package/__tests__/adapters/system-data-android.test.js +32 -1
- package/__tests__/longtail-adapters.test.js +15 -2
- package/__tests__/shopping-adapters.test.js +96 -0
- package/__tests__/sign-providers.test.js +62 -0
- package/__tests__/travel-adapters.test.js +66 -0
- package/__tests__/whatsapp-adapter.test.js +5 -2
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
- package/lib/adapters/email-imap/email-adapter.js +224 -17
- package/lib/adapters/messaging-telegram/index.js +15 -12
- package/lib/adapters/messaging-whatsapp/index.js +15 -12
- package/lib/adapters/shopping-taobao/index.js +161 -21
- package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
- package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
- package/lib/adapters/social-bilibili-adb/collector.js +190 -0
- package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
- package/lib/adapters/social-bilibili-adb/index.js +51 -0
- package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
- package/lib/adapters/social-douyin/index.js +4 -0
- package/lib/adapters/social-douyin-adb/collector.js +165 -0
- package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
- package/lib/adapters/social-douyin-adb/index.js +57 -0
- package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
- package/lib/adapters/social-weibo-adb/api-client.js +281 -0
- package/lib/adapters/social-weibo-adb/collector.js +169 -0
- package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
- package/lib/adapters/social-weibo-adb/index.js +55 -0
- package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
- package/lib/adapters/social-xiaohongshu-adb/api-client.js +278 -0
- package/lib/adapters/social-xiaohongshu-adb/collector.js +158 -0
- package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
- package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
- package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
- package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
- package/lib/adapters/system-data-android/adapter.js +77 -3
- package/lib/adapters/travel-amap/index.js +16 -10
- package/lib/adapters/travel-ctrip/index.js +25 -9
- package/lib/adapters/vscode/vscode-reader.js +7 -1
- package/lib/sign-providers/index.js +20 -0
- package/lib/sign-providers/interface.js +82 -0
- package/lib/sign-providers/null-sign-provider.js +30 -0
- package/package.json +6 -1
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 2a (Douyin C 路径 — 2026-05-25): IM-db parse result → snapshot JSON.
|
|
5
|
+
*
|
|
6
|
+
* Takes the `{messages, contacts}` shape from im-db-parser.js and produces
|
|
7
|
+
* a snapshot matching the existing `social-douyin` adapter's
|
|
8
|
+
* SNAPSHOT_SCHEMA_VERSION=1 contract — so we reuse the adapter's snapshot
|
|
9
|
+
* mode (`_syncViaSnapshot`) instead of opening a second adapter.
|
|
10
|
+
*
|
|
11
|
+
* Mirrors social-bilibili-adb/snapshot-builder.js. Single-source-of-truth
|
|
12
|
+
* for the adapter; we feed it via different upstreams.
|
|
13
|
+
*
|
|
14
|
+
* Snapshot schema (matches social-douyin/index.js:SNAPSHOT_SCHEMA_VERSION):
|
|
15
|
+
*
|
|
16
|
+
* {
|
|
17
|
+
* "schemaVersion": 1,
|
|
18
|
+
* "snapshottedAt": <epoch-ms>,
|
|
19
|
+
* "account": {
|
|
20
|
+
* "secUid": null, // C 路径不调 X-Bogus profile, 不知 secUid
|
|
21
|
+
* "shortId": null,
|
|
22
|
+
* "displayName": ""
|
|
23
|
+
* },
|
|
24
|
+
* "events": [
|
|
25
|
+
* { "kind": "message", "id": "msg-<conv>-<time>", "capturedAt": <ms>,
|
|
26
|
+
* "senderUid": "...", "conversationId": "...",
|
|
27
|
+
* "text": "...", "readStatus": 0/1, "contentBlob": "..." },
|
|
28
|
+
* { "kind": "contact", "id": "contact-<uid>", "capturedAt": <ms>,
|
|
29
|
+
* "uid": "...", "shortId": "...", "name": "...",
|
|
30
|
+
* "avatarUrl": "...", "followStatus": 0/1/2 }
|
|
31
|
+
* ]
|
|
32
|
+
* }
|
|
33
|
+
*
|
|
34
|
+
* Note: Douyin IM doesn't have a "this is me" marker — the db includes
|
|
35
|
+
* messages where `senderUid === <db-filename-uid>` (sent by self) and
|
|
36
|
+
* `senderUid !== <db-filename-uid>` (received). Both go into the snapshot;
|
|
37
|
+
* the consumer (e.g. PDH search) can filter by senderUid if needed.
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
const fs = require("node:fs");
|
|
41
|
+
const path = require("node:path");
|
|
42
|
+
const os = require("node:os");
|
|
43
|
+
const crypto = require("node:crypto");
|
|
44
|
+
|
|
45
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Build an in-memory snapshot from parsed IM db rows. Pure function — no
|
|
49
|
+
* disk IO.
|
|
50
|
+
*
|
|
51
|
+
* @param {{
|
|
52
|
+
* uid: string,
|
|
53
|
+
* messages?: Array,
|
|
54
|
+
* contacts?: Array,
|
|
55
|
+
* snapshottedAt?: number,
|
|
56
|
+
* displayName?: string,
|
|
57
|
+
* }} input
|
|
58
|
+
* @returns {{schemaVersion: number, snapshottedAt: number, account: object, events: Array}}
|
|
59
|
+
*/
|
|
60
|
+
function buildSnapshot(input) {
|
|
61
|
+
if (!input || typeof input !== "object") {
|
|
62
|
+
throw new TypeError("buildSnapshot: input must be an object");
|
|
63
|
+
}
|
|
64
|
+
const uid = input.uid;
|
|
65
|
+
if (typeof uid !== "string" || uid.length === 0) {
|
|
66
|
+
throw new TypeError("buildSnapshot: input.uid must be a non-empty string");
|
|
67
|
+
}
|
|
68
|
+
const snapshottedAt =
|
|
69
|
+
Number.isFinite(input.snapshottedAt) && input.snapshottedAt > 0
|
|
70
|
+
? input.snapshottedAt
|
|
71
|
+
: Date.now();
|
|
72
|
+
const account = {
|
|
73
|
+
// secUid / shortId unknown via pure-db extraction (those live in the
|
|
74
|
+
// app's webview cookies / passport endpoint). Leave null so consumers
|
|
75
|
+
// know not to use them as canonical IDs.
|
|
76
|
+
secUid: null,
|
|
77
|
+
shortId: uid, // Douyin numeric uid is the shortId equivalent
|
|
78
|
+
displayName:
|
|
79
|
+
typeof input.displayName === "string" ? input.displayName : "",
|
|
80
|
+
};
|
|
81
|
+
const events = [];
|
|
82
|
+
|
|
83
|
+
// messages
|
|
84
|
+
const messages = Array.isArray(input.messages) ? input.messages : [];
|
|
85
|
+
messages.forEach((m, idx) => {
|
|
86
|
+
if (!m || typeof m !== "object") return;
|
|
87
|
+
const capturedAt =
|
|
88
|
+
typeof m.createdTimeMs === "number" && m.createdTimeMs > 0
|
|
89
|
+
? m.createdTimeMs
|
|
90
|
+
: snapshottedAt;
|
|
91
|
+
// ID strategy: conversationId + createdTime is a stable composite
|
|
92
|
+
// key (both required by Douyin's IM protocol). Fallback to senderUid
|
|
93
|
+
// + time for very old rows that pre-date conversation_id.
|
|
94
|
+
const idPart =
|
|
95
|
+
m.conversationId && m.createdTimeMs
|
|
96
|
+
? `${m.conversationId}-${m.createdTimeMs}`
|
|
97
|
+
: m.senderUid && m.createdTimeMs
|
|
98
|
+
? `${m.senderUid}-${m.createdTimeMs}`
|
|
99
|
+
: `msg-${idx}`;
|
|
100
|
+
events.push({
|
|
101
|
+
kind: "message",
|
|
102
|
+
id: `msg-${idPart}`,
|
|
103
|
+
capturedAt,
|
|
104
|
+
senderUid: m.senderUid || null,
|
|
105
|
+
conversationId: m.conversationId || null,
|
|
106
|
+
text: m.text || null,
|
|
107
|
+
readStatus: typeof m.readStatus === "number" ? m.readStatus : null,
|
|
108
|
+
contentBlob: m.contentBlob || null,
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
// contacts
|
|
113
|
+
const contacts = Array.isArray(input.contacts) ? input.contacts : [];
|
|
114
|
+
contacts.forEach((c, idx) => {
|
|
115
|
+
if (!c || typeof c !== "object") return;
|
|
116
|
+
events.push({
|
|
117
|
+
kind: "contact",
|
|
118
|
+
id: c.uid ? `contact-${c.uid}` : `contact-${idx}`,
|
|
119
|
+
capturedAt: snapshottedAt, // SIMPLE_USER has no per-row timestamp
|
|
120
|
+
uid: c.uid || null,
|
|
121
|
+
shortId: c.shortId || null,
|
|
122
|
+
name: c.name || null,
|
|
123
|
+
avatarUrl: c.avatarUrl || null,
|
|
124
|
+
followStatus:
|
|
125
|
+
typeof c.followStatus === "number" ? c.followStatus : null,
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
schemaVersion: SNAPSHOT_SCHEMA_VERSION,
|
|
131
|
+
snapshottedAt,
|
|
132
|
+
account,
|
|
133
|
+
events,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Write a snapshot to disk as JSON. Default destination is
|
|
139
|
+
* `<os.tmpdir()>/cc-douyin-snapshot-<uuid>.json`. Returns the absolute
|
|
140
|
+
* path. Caller is responsible for cleanup.
|
|
141
|
+
*/
|
|
142
|
+
function writeSnapshotJson(snapshot, opts = {}) {
|
|
143
|
+
const dir = opts.dir || os.tmpdir();
|
|
144
|
+
const fileName =
|
|
145
|
+
opts.fileName || `cc-douyin-snapshot-${crypto.randomUUID()}.json`;
|
|
146
|
+
if (fileName.includes("/") || fileName.includes("\\")) {
|
|
147
|
+
throw new Error(
|
|
148
|
+
"writeSnapshotJson: opts.fileName must be a basename, not a path",
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
const full = path.join(dir, fileName);
|
|
152
|
+
fs.writeFileSync(full, JSON.stringify(snapshot), "utf-8");
|
|
153
|
+
return full;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Best-effort delete of a snapshot file. Used in finally blocks; never
|
|
158
|
+
* throws.
|
|
159
|
+
*/
|
|
160
|
+
function cleanupSnapshotJson(filePath) {
|
|
161
|
+
if (!filePath) return;
|
|
162
|
+
try {
|
|
163
|
+
fs.unlinkSync(filePath);
|
|
164
|
+
} catch (_e) {
|
|
165
|
+
// ignore
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
module.exports = {
|
|
170
|
+
buildSnapshot,
|
|
171
|
+
writeSnapshotJson,
|
|
172
|
+
cleanupSnapshotJson,
|
|
173
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
174
|
+
};
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 3a (Weibo C 路径 — 2026-05-25): Node-side WeiboApiClient.
|
|
5
|
+
*
|
|
6
|
+
* Byte-parity port of
|
|
7
|
+
* `android-app/.../pdh/social/weibo/WeiboApiClient.kt` for the desktop
|
|
8
|
+
* PC + ADB path. Same m.weibo.cn endpoints, same headers, same JSON
|
|
9
|
+
* parse shape. Lockstep with the Kotlin version — if a real-device trap
|
|
10
|
+
* surfaces fix both sides.
|
|
11
|
+
*
|
|
12
|
+
* **Key differences from Bilibili Phase 1b**:
|
|
13
|
+
* 1. **No WBI signing** — m.weibo.cn mobile API requires cookie + UA +
|
|
14
|
+
* XHR header but no signature. Simpler client, no /nav handshake.
|
|
15
|
+
* 2. **UID via /api/config** — Weibo cookie has no DedeUserID equivalent;
|
|
16
|
+
* fetchUid() must do an HTTP roundtrip and persist the result.
|
|
17
|
+
* 3. **Time field is ISO 8601** — "Sun Jan 12 13:45:00 +0800 2026"
|
|
18
|
+
* format (not unix seconds like Bilibili). Java's SimpleDateFormat
|
|
19
|
+
* parses it; Node's Date can too once we know the format.
|
|
20
|
+
* 4. **Timeline endpoint via containerid** — user posts go through
|
|
21
|
+
* /api/container/getIndex?containerid=107603<uid>, not a dedicated
|
|
22
|
+
* /api/posts.
|
|
23
|
+
* 5. **Anti-bot signal**: missing `X-Requested-With: XMLHttpRequest` +
|
|
24
|
+
* `MWeibo-Pwa: 1` → 30x redirect to login HTML.
|
|
25
|
+
*
|
|
26
|
+
* 4 endpoints:
|
|
27
|
+
* - config /api/config (fetchUid + login state check)
|
|
28
|
+
* - posts /api/container/getIndex?type=uid&value=<uid>&containerid=107603<uid>
|
|
29
|
+
* - favourites /api/favorites?page=1
|
|
30
|
+
* - follows /api/friendships/friends?uid=<uid>&page=1
|
|
31
|
+
*
|
|
32
|
+
* Errors don't throw — endpoints that fail return [] and lastErrorCode +
|
|
33
|
+
* lastErrorMessage surface the cause for partial-result diagnostics.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
const DEFAULT_BASE_URL = "https://m.weibo.cn/";
|
|
37
|
+
|
|
38
|
+
// Pinned Chrome 120 mobile UA — must look like a browser, default
|
|
39
|
+
// `node-fetch/x.y.z` returns -100 silentband.
|
|
40
|
+
const BROWSER_UA =
|
|
41
|
+
"Mozilla/5.0 (Linux; Android 14; ChainlessChain) AppleWebKit/537.36 " +
|
|
42
|
+
"(KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36";
|
|
43
|
+
|
|
44
|
+
const BROWSER_HEADERS = Object.freeze({
|
|
45
|
+
"User-Agent": BROWSER_UA,
|
|
46
|
+
Referer: "https://m.weibo.cn/",
|
|
47
|
+
Accept: "application/json, text/plain, */*",
|
|
48
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
49
|
+
// m.weibo.cn anti-bot: missing these → HTML redirect, not JSON
|
|
50
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
51
|
+
"MWeibo-Pwa": "1",
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Parse Weibo's ISO-8601-ish timestamp.
|
|
56
|
+
* "Sun Jan 12 13:45:00 +0800 2026" → epoch ms
|
|
57
|
+
* "1716383021" → epoch ms (× 1000 since it's < 1e12)
|
|
58
|
+
* "1716383021000" → epoch ms (verbatim)
|
|
59
|
+
*
|
|
60
|
+
* Mirrors WeiboApiClient.kt:parseWeiboTime.
|
|
61
|
+
*/
|
|
62
|
+
function parseWeiboTime(raw) {
|
|
63
|
+
if (typeof raw !== "string" || raw.length === 0) return 0;
|
|
64
|
+
// Digits-only fallback — Weibo occasionally serves unix-seconds verbatim
|
|
65
|
+
if (/^\d+$/.test(raw)) {
|
|
66
|
+
const n = parseInt(raw, 10);
|
|
67
|
+
return n > 1e12 ? n : n * 1000;
|
|
68
|
+
}
|
|
69
|
+
// "EEE MMM dd HH:mm:ss Z yyyy" — JS Date.parse handles this in V8 / Node.
|
|
70
|
+
const t = Date.parse(raw);
|
|
71
|
+
return Number.isFinite(t) ? t : 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Strip HTML from Weibo's `text` field (contains <a>, <span>, etc.).
|
|
76
|
+
* Mirrors WeiboApiClient.kt:stripHtml.
|
|
77
|
+
*/
|
|
78
|
+
function stripHtml(raw) {
|
|
79
|
+
if (typeof raw !== "string" || raw.length === 0) return "";
|
|
80
|
+
return raw
|
|
81
|
+
.replace(/<[^>]+>/g, "")
|
|
82
|
+
.replace(/ /g, " ")
|
|
83
|
+
.replace(/&/g, "&")
|
|
84
|
+
.replace(/</g, "<")
|
|
85
|
+
.replace(/>/g, ">")
|
|
86
|
+
.replace(/"/g, '"')
|
|
87
|
+
.trim();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
class WeiboApiClient {
|
|
91
|
+
constructor(opts = {}) {
|
|
92
|
+
this.baseUrl = opts.baseUrl || DEFAULT_BASE_URL;
|
|
93
|
+
if (!this.baseUrl.endsWith("/")) this.baseUrl += "/";
|
|
94
|
+
this._fetch = opts.fetch || globalThis.fetch;
|
|
95
|
+
if (typeof this._fetch !== "function") {
|
|
96
|
+
throw new Error(
|
|
97
|
+
"WeiboApiClient: fetch not available — pass opts.fetch or run on Node 18+",
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
this.lastErrorCode = 0;
|
|
101
|
+
this.lastErrorMessage = null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* GET <url> with browser-like headers. Mirrors Kotlin doGetJson —
|
|
106
|
+
* including the non-JSON-body check (Weibo redirects to login HTML
|
|
107
|
+
* when cookie expired).
|
|
108
|
+
*/
|
|
109
|
+
async _doGetJson(url, cookie) {
|
|
110
|
+
try {
|
|
111
|
+
const resp = await this._fetch(url.toString(), {
|
|
112
|
+
method: "GET",
|
|
113
|
+
headers: { ...BROWSER_HEADERS, Cookie: cookie },
|
|
114
|
+
});
|
|
115
|
+
const body = await resp.text();
|
|
116
|
+
if (!resp.ok) {
|
|
117
|
+
this._setLastError(resp.status, `HTTP ${resp.status}`);
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
const trimmed = body.trimStart();
|
|
121
|
+
if (!trimmed.startsWith("{")) {
|
|
122
|
+
// Login redirect / anti-bot HTML — cookie expired or anti-spider hit
|
|
123
|
+
this._setLastError(-4, "non-json (cookie expired?)");
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
let obj;
|
|
127
|
+
try {
|
|
128
|
+
obj = JSON.parse(body);
|
|
129
|
+
} catch (e) {
|
|
130
|
+
this._setLastError(-3, "parse: " + (e.message || String(e)));
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
const ok = typeof obj.ok === "number" ? obj.ok : 1;
|
|
134
|
+
if (ok !== 1) {
|
|
135
|
+
this._setLastError(ok, (obj.msg || "").toString());
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
this._clearLastError();
|
|
139
|
+
return obj;
|
|
140
|
+
} catch (e) {
|
|
141
|
+
this._setLastError(-2, "IO: " + (e.message || String(e)));
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
_setLastError(code, message) {
|
|
147
|
+
this.lastErrorCode = code;
|
|
148
|
+
this.lastErrorMessage = message;
|
|
149
|
+
}
|
|
150
|
+
_clearLastError() {
|
|
151
|
+
this.lastErrorCode = 0;
|
|
152
|
+
this.lastErrorMessage = null;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Fetch /api/config to get UID + validate login state. Returns numeric
|
|
157
|
+
* UID on success, null on failure (cookie expired / not logged in).
|
|
158
|
+
* Mirrors WeiboApiClient.kt:fetchUid.
|
|
159
|
+
*/
|
|
160
|
+
async fetchUid(cookie) {
|
|
161
|
+
const url = new URL("api/config", this.baseUrl);
|
|
162
|
+
const obj = await this._doGetJson(url, cookie);
|
|
163
|
+
if (!obj) return null;
|
|
164
|
+
const data = obj.data || {};
|
|
165
|
+
if (!data.login) return null;
|
|
166
|
+
const uidStr = data.uid;
|
|
167
|
+
const uid = parseInt(uidStr, 10);
|
|
168
|
+
return Number.isFinite(uid) && uid > 0 ? uid : null;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Fetch the user's own posts (timeline). Mirrors fetchPosts —
|
|
173
|
+
* containerid=107603<uid> is the magic "user's own mblog" container.
|
|
174
|
+
*/
|
|
175
|
+
async fetchPosts(cookie, uid, opts = {}) {
|
|
176
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 100;
|
|
177
|
+
const containerid = `107603${uid}`;
|
|
178
|
+
const url = new URL("api/container/getIndex", this.baseUrl);
|
|
179
|
+
url.searchParams.set("type", "uid");
|
|
180
|
+
url.searchParams.set("value", String(uid));
|
|
181
|
+
url.searchParams.set("containerid", containerid);
|
|
182
|
+
const obj = await this._doGetJson(url, cookie);
|
|
183
|
+
if (!obj) return [];
|
|
184
|
+
const data = obj.data || {};
|
|
185
|
+
const cards = Array.isArray(data.cards) ? data.cards : [];
|
|
186
|
+
const out = [];
|
|
187
|
+
for (const card of cards) {
|
|
188
|
+
if (out.length >= limit) break;
|
|
189
|
+
if (!card || card.card_type !== 9) continue; // card_type=9 = mblog
|
|
190
|
+
const blog = card.mblog;
|
|
191
|
+
if (!blog) continue;
|
|
192
|
+
const mid = (blog.mid && String(blog.mid)) || (blog.id && String(blog.id));
|
|
193
|
+
if (!mid) continue;
|
|
194
|
+
out.push({
|
|
195
|
+
mid,
|
|
196
|
+
text: stripHtml(blog.text),
|
|
197
|
+
createdAt: parseWeiboTime(blog.created_at),
|
|
198
|
+
source: blog.source || null,
|
|
199
|
+
repostsCount: typeof blog.reposts_count === "number" ? blog.reposts_count : 0,
|
|
200
|
+
commentsCount:
|
|
201
|
+
typeof blog.comments_count === "number" ? blog.comments_count : 0,
|
|
202
|
+
likesCount:
|
|
203
|
+
typeof blog.attitudes_count === "number" ? blog.attitudes_count : 0,
|
|
204
|
+
picCount: typeof blog.pic_num === "number" ? blog.pic_num : 0,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
return out;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/** Mirrors fetchFavourites. */
|
|
211
|
+
async fetchFavourites(cookie, opts = {}) {
|
|
212
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 100;
|
|
213
|
+
const url = new URL("api/favorites", this.baseUrl);
|
|
214
|
+
url.searchParams.set("page", "1");
|
|
215
|
+
const obj = await this._doGetJson(url, cookie);
|
|
216
|
+
if (!obj) return [];
|
|
217
|
+
const data = obj.data || {};
|
|
218
|
+
const favs = Array.isArray(data.favorites) ? data.favorites : [];
|
|
219
|
+
const out = [];
|
|
220
|
+
for (let i = 0; i < Math.min(limit, favs.length); i++) {
|
|
221
|
+
const fav = favs[i];
|
|
222
|
+
if (!fav) continue;
|
|
223
|
+
const status = fav.status;
|
|
224
|
+
if (!status) continue;
|
|
225
|
+
const mid = (status.mid && String(status.mid)) || (status.id && String(status.id));
|
|
226
|
+
if (!mid) continue;
|
|
227
|
+
const author = status.user || {};
|
|
228
|
+
const favAt =
|
|
229
|
+
parseWeiboTime(fav.favorited_time) ||
|
|
230
|
+
parseWeiboTime(status.created_at) ||
|
|
231
|
+
0;
|
|
232
|
+
out.push({
|
|
233
|
+
mid,
|
|
234
|
+
text: stripHtml(status.text),
|
|
235
|
+
favAt,
|
|
236
|
+
authorScreenName: author.screen_name || null,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
return out;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/** Mirrors fetchFollows. */
|
|
243
|
+
async fetchFollows(cookie, uid, opts = {}) {
|
|
244
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 200;
|
|
245
|
+
const url = new URL("api/friendships/friends", this.baseUrl);
|
|
246
|
+
url.searchParams.set("uid", String(uid));
|
|
247
|
+
url.searchParams.set("page", "1");
|
|
248
|
+
const obj = await this._doGetJson(url, cookie);
|
|
249
|
+
if (!obj) return [];
|
|
250
|
+
const data = obj.data || {};
|
|
251
|
+
const users = Array.isArray(data.users) ? data.users : [];
|
|
252
|
+
const out = [];
|
|
253
|
+
for (let i = 0; i < Math.min(limit, users.length); i++) {
|
|
254
|
+
const u = users[i];
|
|
255
|
+
if (!u) continue;
|
|
256
|
+
const followUid = typeof u.id === "number" ? u.id : 0;
|
|
257
|
+
if (followUid === 0) continue;
|
|
258
|
+
out.push({
|
|
259
|
+
uid: followUid,
|
|
260
|
+
screenName: u.screen_name || "(unnamed)",
|
|
261
|
+
description: u.description || null,
|
|
262
|
+
avatarUrl: u.profile_image_url || null,
|
|
263
|
+
// m.weibo.cn /api/friendships/friends doesn't return follow_time —
|
|
264
|
+
// 0 lets the snapshot builder fall back to snapshottedAt.
|
|
265
|
+
followedAt: 0,
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
return out;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
module.exports = {
|
|
273
|
+
WeiboApiClient,
|
|
274
|
+
// Exposed for tests
|
|
275
|
+
_internals: {
|
|
276
|
+
parseWeiboTime,
|
|
277
|
+
stripHtml,
|
|
278
|
+
BROWSER_UA,
|
|
279
|
+
BROWSER_HEADERS,
|
|
280
|
+
},
|
|
281
|
+
};
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 3a (Weibo C 路径 — 2026-05-25): end-to-end orchestrator.
|
|
5
|
+
*
|
|
6
|
+
* bridge.invoke("weibo.cookies") ← Phase 3a cookies extension
|
|
7
|
+
* │
|
|
8
|
+
* ▼ {cookie, diagnostic}
|
|
9
|
+
* WeiboApiClient.fetchUid ← /api/config 拿 UID + 验登录
|
|
10
|
+
* │
|
|
11
|
+
* ▼ uid (numeric)
|
|
12
|
+
* fetchPosts + fetchFavourites + fetchFollows (partial-failure OK)
|
|
13
|
+
* │
|
|
14
|
+
* ▼ 3 arrays
|
|
15
|
+
* buildSnapshot + writeSnapshotJson ← schemaVersion=1
|
|
16
|
+
* │
|
|
17
|
+
* ▼
|
|
18
|
+
* registry.syncAdapter("social-weibo", { inputPath })
|
|
19
|
+
*
|
|
20
|
+
* Mirror of social-bilibili-adb/collector.js — same `{ok, report?, reason?,
|
|
21
|
+
* message?}` shape, same try/finally cleanup. **Key diff**: Weibo needs
|
|
22
|
+
* an extra fetchUid roundtrip after cookies extraction (cookie alone
|
|
23
|
+
* doesn't carry UID — Bilibili has DedeUserID inline).
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
const { WeiboApiClient } = require("./api-client");
|
|
27
|
+
const {
|
|
28
|
+
buildSnapshot,
|
|
29
|
+
writeSnapshotJson,
|
|
30
|
+
cleanupSnapshotJson,
|
|
31
|
+
} = require("./snapshot-builder");
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Pull cookies → fetchUid → 3 endpoints → write snapshot. Returns the
|
|
35
|
+
* staging path + counts + diagnostic.
|
|
36
|
+
*
|
|
37
|
+
* Throws (with typed-reason BILIBILI_-style prefix) on cookie failures.
|
|
38
|
+
* Returns with empty events on /api/config failure or any endpoint
|
|
39
|
+
* failure (partial-result tolerated — lastErrorCode surfaces the cause
|
|
40
|
+
* for UI).
|
|
41
|
+
*/
|
|
42
|
+
async function collect(bridge, opts = {}) {
|
|
43
|
+
if (!bridge || typeof bridge.invoke !== "function") {
|
|
44
|
+
throw new TypeError(
|
|
45
|
+
"WeiboAdbCollector.collect: bridge must expose invoke(method, params)",
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
const now = opts.now || Date.now;
|
|
49
|
+
const client = opts.apiClient || new WeiboApiClient();
|
|
50
|
+
const limits = opts.limits || {};
|
|
51
|
+
|
|
52
|
+
// 1. Pull cookies via Phase 3a extension.
|
|
53
|
+
const cookieResult = await bridge.invoke("weibo.cookies");
|
|
54
|
+
if (!cookieResult || typeof cookieResult.cookie !== "string") {
|
|
55
|
+
throw new Error(
|
|
56
|
+
"WeiboAdbCollector.collect: bridge.invoke('weibo.cookies') returned malformed payload — got cookie=" +
|
|
57
|
+
typeof cookieResult?.cookie,
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
const { cookie, diagnostic: cookieDiagnostic } = cookieResult;
|
|
61
|
+
|
|
62
|
+
// 2. fetchUid — required first call. Weibo cookie has no inline UID.
|
|
63
|
+
const uid = await client.fetchUid(cookie);
|
|
64
|
+
if (!uid) {
|
|
65
|
+
// /api/config returned login=false or non-2xx. Could be:
|
|
66
|
+
// - cookie expired (most common — user logged out on phone)
|
|
67
|
+
// - anti-bot 30x to login HTML (UA missing — but we set browser UA)
|
|
68
|
+
// - IO error
|
|
69
|
+
// Surface as ExtractFailed via the hub-level wrapper; here we
|
|
70
|
+
// produce an empty-event snapshot so the registry call doesn't
|
|
71
|
+
// throw (consumers can read douyin.lastErrorCode to disambiguate).
|
|
72
|
+
const snapshot = buildSnapshot({
|
|
73
|
+
uid: 1, // sentinel — buildSnapshot requires positive; sync emits 0 events
|
|
74
|
+
displayName: opts.displayName,
|
|
75
|
+
snapshottedAt: now(),
|
|
76
|
+
});
|
|
77
|
+
const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
|
|
78
|
+
return {
|
|
79
|
+
snapshotPath,
|
|
80
|
+
uid: null,
|
|
81
|
+
eventCounts: { post: 0, favourite: 0, follow: 0, total: 0 },
|
|
82
|
+
lastErrorCode: client.lastErrorCode,
|
|
83
|
+
lastErrorMessage: client.lastErrorMessage,
|
|
84
|
+
cookieDiagnostic: cookieDiagnostic || null,
|
|
85
|
+
uidFetchFailed: true,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// 3. Parallel fetch — partial failure tolerated (client returns []).
|
|
90
|
+
const [posts, favourites, follows] = await Promise.all([
|
|
91
|
+
client.fetchPosts(cookie, uid, {
|
|
92
|
+
limit: Number.isInteger(limits.post) ? limits.post : undefined,
|
|
93
|
+
}),
|
|
94
|
+
client.fetchFavourites(cookie, {
|
|
95
|
+
limit: Number.isInteger(limits.favourite) ? limits.favourite : undefined,
|
|
96
|
+
}),
|
|
97
|
+
client.fetchFollows(cookie, uid, {
|
|
98
|
+
limit: Number.isInteger(limits.follow) ? limits.follow : undefined,
|
|
99
|
+
}),
|
|
100
|
+
]);
|
|
101
|
+
|
|
102
|
+
// 4. Build snapshot + write.
|
|
103
|
+
const snapshot = buildSnapshot({
|
|
104
|
+
uid,
|
|
105
|
+
displayName: opts.displayName,
|
|
106
|
+
posts,
|
|
107
|
+
favourites,
|
|
108
|
+
follows,
|
|
109
|
+
snapshottedAt: now(),
|
|
110
|
+
});
|
|
111
|
+
const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
snapshotPath,
|
|
115
|
+
uid,
|
|
116
|
+
eventCounts: {
|
|
117
|
+
post: posts.length,
|
|
118
|
+
favourite: favourites.length,
|
|
119
|
+
follow: follows.length,
|
|
120
|
+
total: snapshot.events.length,
|
|
121
|
+
},
|
|
122
|
+
lastErrorCode: client.lastErrorCode,
|
|
123
|
+
lastErrorMessage: client.lastErrorMessage,
|
|
124
|
+
cookieDiagnostic: cookieDiagnostic || null,
|
|
125
|
+
uidFetchFailed: false,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Convenience: collect + registry.syncAdapter("social-weibo") + cleanup.
|
|
131
|
+
*/
|
|
132
|
+
async function collectAndSync(bridge, registry, opts = {}) {
|
|
133
|
+
if (!registry || typeof registry.syncAdapter !== "function") {
|
|
134
|
+
throw new TypeError(
|
|
135
|
+
"WeiboAdbCollector.collectAndSync: registry must expose syncAdapter(name, options)",
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
const collectResult = await collect(bridge, opts);
|
|
139
|
+
let syncReport = null;
|
|
140
|
+
let cleanupFailed = false;
|
|
141
|
+
try {
|
|
142
|
+
syncReport = await registry.syncAdapter("social-weibo", {
|
|
143
|
+
inputPath: collectResult.snapshotPath,
|
|
144
|
+
});
|
|
145
|
+
} finally {
|
|
146
|
+
try {
|
|
147
|
+
cleanupSnapshotJson(collectResult.snapshotPath);
|
|
148
|
+
} catch (_e) {
|
|
149
|
+
cleanupFailed = true;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return {
|
|
153
|
+
...syncReport,
|
|
154
|
+
weibo: {
|
|
155
|
+
uid: collectResult.uid,
|
|
156
|
+
eventCounts: collectResult.eventCounts,
|
|
157
|
+
lastErrorCode: collectResult.lastErrorCode,
|
|
158
|
+
lastErrorMessage: collectResult.lastErrorMessage,
|
|
159
|
+
cookieDiagnostic: collectResult.cookieDiagnostic,
|
|
160
|
+
uidFetchFailed: collectResult.uidFetchFailed,
|
|
161
|
+
cleanupFailed,
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
module.exports = {
|
|
167
|
+
collect,
|
|
168
|
+
collectAndSync,
|
|
169
|
+
};
|