@chainlesschain/personal-data-hub 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapter-guide.test.js +47 -0
- package/__tests__/adapters/local-im-pc.test.js +7 -2
- package/__tests__/adapters/pc-local-discovery.test.js +141 -0
- package/__tests__/adapters/qq-pc-direct-read.test.js +43 -2
- package/__tests__/adapters/social-douyin-adb-db-extension.test.js +114 -0
- package/__tests__/adapters/social-weibo-adb-cookies-extension.test.js +167 -0
- package/__tests__/adapters/wechat-pc-direct-read.test.js +160 -2
- package/__tests__/adapters/wechat-pc-v4-sidecar.test.js +72 -0
- package/__tests__/registry-readiness.test.js +59 -0
- package/lib/adapter-guide.js +54 -19
- package/lib/adapter-readiness.js +23 -0
- package/lib/adapters/_local-im-pc-adapter.js +34 -5
- package/lib/adapters/_pc-local-discovery.js +362 -0
- package/lib/adapters/qq-pc/index.js +118 -8
- package/lib/adapters/qq-pc/qqnt-sidecar.js +109 -0
- package/lib/adapters/social-douyin-adb/db-extension.js +66 -4
- package/lib/adapters/social-weibo-adb/cookies-extension.js +33 -6
- package/lib/adapters/wechat-pc/index.js +182 -8
- package/lib/adapters/wechat-pc/v4-sidecar.js +112 -0
- package/lib/registry.js +78 -2
- package/package.json +1 -1
|
@@ -37,8 +37,29 @@ const crypto = require("node:crypto");
|
|
|
37
37
|
const DOUYIN_DB_REMOTE_DIR =
|
|
38
38
|
"/data/data/com.ss.android.ugc.aweme/databases";
|
|
39
39
|
|
|
40
|
+
// Legacy plaintext social-DM IM db (Brignoni 2018 TikTok era, `<19-digit-uid>_im.db`).
|
|
40
41
|
const IM_DB_PATTERN = /^(\d{19})_im\.db$/;
|
|
41
42
|
|
|
43
|
+
// Real-device verification 2026-06-08 (Xiaomi chopin / MIUI 13, Douyin
|
|
44
|
+
// v??-2026 logged in) found CURRENT Douyin no longer ships a plaintext
|
|
45
|
+
// social-DM IM db. Two new on-disk shapes coexist in databases/:
|
|
46
|
+
//
|
|
47
|
+
// encrypted_<uid>_im.db — the social DM store, now SQLCipher-ENCRYPTED
|
|
48
|
+
// (header is NOT `SQLite format 3`). Reading it
|
|
49
|
+
// needs the per-user key, which only the frida
|
|
50
|
+
// key-hook path (Phase 2b, libmsaoaidsec.so
|
|
51
|
+
// anti-debug bypass) can recover — the plaintext
|
|
52
|
+
// C-path here cannot.
|
|
53
|
+
// im_database_<uid> — a Room db, but it is the in-app 豆包/Doubao AI
|
|
54
|
+
// ASSISTANT chat (tables im_message / im_conversation
|
|
55
|
+
// / im_bot), NOT person-to-person social DMs.
|
|
56
|
+
//
|
|
57
|
+
// We classify all three so the handler can emit a precise, actionable error
|
|
58
|
+
// instead of a misleading DOUYIN_NO_IM_DB. See memory
|
|
59
|
+
// [[pdh_douyin_c_path_phase_2a]] / [[pdh_social_cookie_endpoint_drift_2026_05]].
|
|
60
|
+
const ENCRYPTED_IM_DB_PATTERN = /^encrypted_(\d+)_im\.db$/;
|
|
61
|
+
const DOUBAO_IM_DB_PATTERN = /^im_database_(\d{6,})$/;
|
|
62
|
+
|
|
42
63
|
/**
|
|
43
64
|
* List candidate IM db filenames + uid via `adb shell su -c "ls databases/"`.
|
|
44
65
|
*
|
|
@@ -64,15 +85,27 @@ async function listImDbs(adb, serial, opts) {
|
|
|
64
85
|
return { candidates: [], dirMissing: true };
|
|
65
86
|
}
|
|
66
87
|
const candidates = [];
|
|
88
|
+
const encryptedCandidates = [];
|
|
89
|
+
const doubaoCandidates = [];
|
|
67
90
|
for (const line of lines) {
|
|
68
91
|
const fileName = line.trim();
|
|
69
92
|
if (!fileName) continue;
|
|
70
93
|
const m = fileName.match(IM_DB_PATTERN);
|
|
71
94
|
if (m) {
|
|
72
95
|
candidates.push({ uid: m[1], fileName });
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
const enc = fileName.match(ENCRYPTED_IM_DB_PATTERN);
|
|
99
|
+
if (enc) {
|
|
100
|
+
encryptedCandidates.push({ uid: enc[1], fileName });
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
const doubao = fileName.match(DOUBAO_IM_DB_PATTERN);
|
|
104
|
+
if (doubao) {
|
|
105
|
+
doubaoCandidates.push({ uid: doubao[1], fileName });
|
|
73
106
|
}
|
|
74
107
|
}
|
|
75
|
-
return { candidates, dirMissing: false };
|
|
108
|
+
return { candidates, encryptedCandidates, doubaoCandidates, dirMissing: false };
|
|
76
109
|
}
|
|
77
110
|
|
|
78
111
|
/**
|
|
@@ -159,9 +192,10 @@ function createDouyinDbExtension(factoryOpts = {}) {
|
|
|
159
192
|
}
|
|
160
193
|
|
|
161
194
|
// Step 1: discover candidate IM dbs.
|
|
162
|
-
const { candidates, dirMissing } =
|
|
163
|
-
|
|
164
|
-
|
|
195
|
+
const { candidates, encryptedCandidates, doubaoCandidates, dirMissing } =
|
|
196
|
+
await listImDbs(ctx.adb, serial, {
|
|
197
|
+
timeoutMs,
|
|
198
|
+
});
|
|
165
199
|
if (dirMissing) {
|
|
166
200
|
throw new Error(
|
|
167
201
|
"DOUYIN_NOT_INSTALLED: " +
|
|
@@ -170,6 +204,32 @@ function createDouyinDbExtension(factoryOpts = {}) {
|
|
|
170
204
|
);
|
|
171
205
|
}
|
|
172
206
|
if (candidates.length === 0) {
|
|
207
|
+
// No legacy plaintext IM db. Distinguish the modern layouts so the UI
|
|
208
|
+
// can tell the user the truth instead of "no db found". Real-device
|
|
209
|
+
// verification 2026-06-08 — see ENCRYPTED_IM_DB_PATTERN comment above.
|
|
210
|
+
if (encryptedCandidates && encryptedCandidates.length > 0) {
|
|
211
|
+
throw new Error(
|
|
212
|
+
"DOUYIN_IM_DB_ENCRYPTED: this Douyin version stores its social DM db as " +
|
|
213
|
+
`\`encrypted_<uid>_im.db\` (SQLCipher) — found ${encryptedCandidates
|
|
214
|
+
.map((c) => c.fileName)
|
|
215
|
+
.join(", ")}. The plaintext C-path can't read it; the per-user ` +
|
|
216
|
+
"key must be recovered via the frida key-hook path (Phase 2b, " +
|
|
217
|
+
"libmsaoaidsec.so anti-debug bypass). Plaintext direct-read is no " +
|
|
218
|
+
"longer possible on current Douyin.",
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
if (doubaoCandidates && doubaoCandidates.length > 0) {
|
|
222
|
+
throw new Error(
|
|
223
|
+
"DOUYIN_ONLY_DOUBAO_AI_CHAT: the only readable `im_database_<uid>` db " +
|
|
224
|
+
`(${doubaoCandidates
|
|
225
|
+
.map((c) => c.fileName)
|
|
226
|
+
.join(", ")}) is the in-app 豆包/Doubao AI ASSISTANT chat ` +
|
|
227
|
+
"(tables im_message / im_conversation / im_bot), not person-to-person " +
|
|
228
|
+
"social DMs. Social DMs live in the SQLCipher `encrypted_<uid>_im.db` " +
|
|
229
|
+
"and need the frida key path. (Collecting Doubao AI chat would be a " +
|
|
230
|
+
"separate, net-new adapter.)",
|
|
231
|
+
);
|
|
232
|
+
}
|
|
173
233
|
throw new Error(
|
|
174
234
|
"DOUYIN_NO_IM_DB: no `<19-digit-uid>_im.db` found in databases/. Open the Douyin App + log in once + open any chat thread to materialize the IM database, then retry.",
|
|
175
235
|
);
|
|
@@ -273,6 +333,8 @@ module.exports = {
|
|
|
273
333
|
createDouyinDbExtension,
|
|
274
334
|
DOUYIN_DB_REMOTE_DIR,
|
|
275
335
|
IM_DB_PATTERN,
|
|
336
|
+
ENCRYPTED_IM_DB_PATTERN,
|
|
337
|
+
DOUBAO_IM_DB_PATTERN,
|
|
276
338
|
// Exposed for tests
|
|
277
339
|
_internals: {
|
|
278
340
|
listImDbs,
|
|
@@ -53,6 +53,22 @@ const {
|
|
|
53
53
|
const WEIBO_COOKIES_REMOTE_PATH =
|
|
54
54
|
"/data/data/com.sina.weibo/app_webview/Default/Cookies";
|
|
55
55
|
|
|
56
|
+
/**
|
|
57
|
+
* Glob the WebView profile dir at pull time. Real-device verification
|
|
58
|
+
* (2026-06-08, Xiaomi chopin / MIUI 13 / Weibo logged in) showed current
|
|
59
|
+
* Weibo stores cookies under a SUFFIXED profile dir
|
|
60
|
+
* `app_webview_com.sina.weibo/Default/Cookies`, NOT the standard
|
|
61
|
+
* `app_webview/Default/Cookies` — so the old hardcoded path made the
|
|
62
|
+
* collector throw WEIBO_NOT_INSTALLED even though Weibo was installed and
|
|
63
|
+
* logged in. Chromium names the WebView data dir after the WebView
|
|
64
|
+
* `dataDirectorySuffix` the host app sets; Weibo sets it to its own
|
|
65
|
+
* package name. We glob `app_webview*` and take the first match (Default
|
|
66
|
+
* profile) so both the legacy and suffixed layouts resolve. See memory
|
|
67
|
+
* [[pdh_social_cookie_endpoint_drift_2026_05]].
|
|
68
|
+
*/
|
|
69
|
+
const WEIBO_COOKIES_REMOTE_GLOB =
|
|
70
|
+
"/data/data/com.sina.weibo/app_webview*/Default/Cookies";
|
|
71
|
+
|
|
56
72
|
const WEIBO_COOKIE_HOST_DOMAIN = "m.weibo.cn";
|
|
57
73
|
|
|
58
74
|
/** Minimum required cookie name — without SUB, /api/config returns login=false. */
|
|
@@ -60,21 +76,31 @@ const WEIBO_REQUIRED_COOKIE = "SUB";
|
|
|
60
76
|
|
|
61
77
|
async function pullCookiesViaSu(adb, serial, opts) {
|
|
62
78
|
const adbOpts = { serial, timeoutMs: opts?.timeoutMs || 60_000 };
|
|
79
|
+
// Resolve the actual Cookies path — glob `app_webview*` so the suffixed
|
|
80
|
+
// profile dir (app_webview_com.sina.weibo, observed on real devices) is
|
|
81
|
+
// found as well as the legacy `app_webview`. `ls -d <glob>` prints every
|
|
82
|
+
// match; we take the first (Default profile). When nothing matches the
|
|
83
|
+
// shell prints the unexpanded glob, so we sentinel-guard NOT_FOUND.
|
|
63
84
|
const lsOut = await adb(
|
|
64
85
|
[
|
|
65
86
|
"shell",
|
|
66
87
|
"su",
|
|
67
88
|
"-c",
|
|
68
|
-
`ls ${
|
|
89
|
+
`ls -d ${WEIBO_COOKIES_REMOTE_GLOB} 2>/dev/null | head -n1 || echo NOT_FOUND`,
|
|
69
90
|
],
|
|
70
91
|
adbOpts,
|
|
71
92
|
);
|
|
72
93
|
const lsLine = lsOut.replace(/\r+$/gm, "").trim();
|
|
73
|
-
|
|
94
|
+
const remotePath =
|
|
95
|
+
lsLine && lsLine !== "NOT_FOUND" && !lsLine.includes("*") ? lsLine : null;
|
|
96
|
+
if (!remotePath) {
|
|
74
97
|
throw new Error(
|
|
75
|
-
"WEIBO_NOT_INSTALLED: " +
|
|
76
|
-
|
|
77
|
-
"
|
|
98
|
+
"WEIBO_NOT_INSTALLED: no Cookies DB under " +
|
|
99
|
+
WEIBO_COOKIES_REMOTE_GLOB +
|
|
100
|
+
" (globbed `app_webview*` to cover both the legacy and the suffixed " +
|
|
101
|
+
"`app_webview_com.sina.weibo` profile layouts). Install Weibo App + " +
|
|
102
|
+
"log in once on the phone, then retry. If Weibo is installed but no " +
|
|
103
|
+
"match exists, the WebView dataDirectorySuffix changed again — file a bug.",
|
|
78
104
|
);
|
|
79
105
|
}
|
|
80
106
|
// Probe root.
|
|
@@ -93,7 +119,7 @@ async function pullCookiesViaSu(adb, serial, opts) {
|
|
|
93
119
|
"shell",
|
|
94
120
|
"su",
|
|
95
121
|
"-c",
|
|
96
|
-
`base64 ${
|
|
122
|
+
`base64 ${remotePath} | tr -d '\\n\\r'`,
|
|
97
123
|
],
|
|
98
124
|
{ ...adbOpts, timeoutMs: opts?.timeoutMs || 60_000 },
|
|
99
125
|
);
|
|
@@ -241,6 +267,7 @@ function createWeiboCookiesExtension(factoryOpts = {}) {
|
|
|
241
267
|
module.exports = {
|
|
242
268
|
createWeiboCookiesExtension,
|
|
243
269
|
WEIBO_COOKIES_REMOTE_PATH,
|
|
270
|
+
WEIBO_COOKIES_REMOTE_GLOB,
|
|
244
271
|
WEIBO_COOKIE_HOST_DOMAIN,
|
|
245
272
|
WEIBO_REQUIRED_COOKIE,
|
|
246
273
|
assembleWeiboCookieHeader,
|
|
@@ -81,21 +81,52 @@ class WeChatPcAdapter {
|
|
|
81
81
|
fs,
|
|
82
82
|
// DI seam: tests inject a fake SQLite driver class via dbDriverFactory.
|
|
83
83
|
dbDriverFactory: opts.dbDriverFactory || null,
|
|
84
|
+
// DI seam: tests inject a fake WeChat 4.x collector; default lazy-loads
|
|
85
|
+
// the forensics-bridge sidecar invoker.
|
|
86
|
+
v4Collector: opts.v4Collector || null,
|
|
87
|
+
// DI seam for discovery (see _autoDiscover).
|
|
88
|
+
discoveryDeps: opts.discoveryDeps || undefined,
|
|
84
89
|
};
|
|
85
90
|
}
|
|
86
91
|
|
|
92
|
+
// Auto-discover PC WeChat's local DB on the host (3.x + 4.x layouts) so the
|
|
93
|
+
// UI never needs a manually typed path. Lazy-required + cached per instance.
|
|
94
|
+
_autoDiscover() {
|
|
95
|
+
if (this._discovered !== undefined) return this._discovered;
|
|
96
|
+
try {
|
|
97
|
+
// eslint-disable-next-line global-require
|
|
98
|
+
const { discover } = require("../_pc-local-discovery");
|
|
99
|
+
this._discovered = discover("wechat-pc", this._deps.discoveryDeps || {});
|
|
100
|
+
} catch (_e) {
|
|
101
|
+
this._discovered = null;
|
|
102
|
+
}
|
|
103
|
+
return this._discovered;
|
|
104
|
+
}
|
|
105
|
+
|
|
87
106
|
async authenticate(ctx = {}) {
|
|
88
107
|
// Cheap readiness probe — never opens / decrypts a DB.
|
|
89
108
|
if (ctx && ctx.readinessOnly) {
|
|
90
109
|
if (this._dbPath) return { ok: true, mode: "configured" };
|
|
110
|
+
const disc = this._autoDiscover();
|
|
111
|
+
if (disc && disc.installed) {
|
|
112
|
+
return {
|
|
113
|
+
ok: false,
|
|
114
|
+
reason: "DB_FOUND_NEEDS_KEY",
|
|
115
|
+
message: `已找到本机微信库(${disc.layout || ""} ${disc.accounts.length} 个账号,主库 ${disc.primaryDb})`,
|
|
116
|
+
discovered: disc,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
91
119
|
return {
|
|
92
120
|
ok: false,
|
|
93
|
-
reason: "
|
|
94
|
-
message:
|
|
95
|
-
"wechat-pc: 需提供 PC 微信本地数据库路径(MSG*.db / MicroMsg.db),加密库需先解密或提供 key",
|
|
121
|
+
reason: "APP_NOT_INSTALLED",
|
|
122
|
+
message: (disc && disc.note) || "未检测到本机微信数据(可能未安装或未登录)",
|
|
96
123
|
};
|
|
97
124
|
}
|
|
98
|
-
const dbPath =
|
|
125
|
+
const dbPath =
|
|
126
|
+
(ctx && ctx.inputPath) ||
|
|
127
|
+
(ctx && ctx.dbPath) ||
|
|
128
|
+
this._dbPath ||
|
|
129
|
+
this._resolveDiscoveredDbPath();
|
|
99
130
|
if (dbPath) {
|
|
100
131
|
try {
|
|
101
132
|
this._deps.fs.accessSync(dbPath, this._deps.fs.constants.R_OK);
|
|
@@ -108,22 +139,54 @@ class WeChatPcAdapter {
|
|
|
108
139
|
}
|
|
109
140
|
return { ok: true, mode: "sqlite" };
|
|
110
141
|
}
|
|
142
|
+
const disc = this._autoDiscover();
|
|
143
|
+
if (disc && disc.installed) {
|
|
144
|
+
return {
|
|
145
|
+
ok: false,
|
|
146
|
+
reason: "DB_FOUND_NEEDS_KEY",
|
|
147
|
+
message: `已找到本机微信库(主库 ${disc.primaryDb}),需解密密钥`,
|
|
148
|
+
discovered: disc,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
111
151
|
return {
|
|
112
152
|
ok: false,
|
|
113
|
-
reason: "
|
|
114
|
-
message: "wechat-pc.authenticate:
|
|
153
|
+
reason: "APP_NOT_INSTALLED",
|
|
154
|
+
message: "wechat-pc.authenticate: 未检测到本机微信库,也未提供 dbPath / inputPath",
|
|
115
155
|
};
|
|
116
156
|
}
|
|
117
157
|
|
|
158
|
+
// Resolve the auto-discovered primary message DB path (null if none).
|
|
159
|
+
_resolveDiscoveredDbPath() {
|
|
160
|
+
const disc = this._autoDiscover();
|
|
161
|
+
return disc && disc.installed && disc.primaryDb ? disc.primaryDb : null;
|
|
162
|
+
}
|
|
163
|
+
|
|
118
164
|
async healthCheck() {
|
|
119
165
|
return { ok: true, lastChecked: Date.now() };
|
|
120
166
|
}
|
|
121
167
|
|
|
122
168
|
async *sync(opts = {}) {
|
|
123
|
-
|
|
169
|
+
// WeChat 4.x path: encrypted SQLCipher-4 DBs whose key lives in Weixin.exe
|
|
170
|
+
// memory. Route through the Python sidecar (memory key + decrypt + parse)
|
|
171
|
+
// and yield the decrypted messages. Triggered when the user gives no
|
|
172
|
+
// explicit plaintext path AND discovery sees the 4.x layout, or opts.mode.
|
|
173
|
+
const disc = this._autoDiscover();
|
|
174
|
+
const noExplicitPath = !opts.dbPath && !opts.inputPath && !this._dbPath;
|
|
175
|
+
const useV4 =
|
|
176
|
+
opts.mode === "v4" ||
|
|
177
|
+
(noExplicitPath && disc && disc.installed && disc.layout === "4.x");
|
|
178
|
+
if (useV4) {
|
|
179
|
+
yield* this._syncV4(opts, disc);
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// One-click: when no explicit path is given, fall back to the
|
|
184
|
+
// auto-discovered primary message DB on this host (3.x plaintext/keyed).
|
|
185
|
+
const dbPath =
|
|
186
|
+
opts.dbPath || opts.inputPath || this._dbPath || this._resolveDiscoveredDbPath();
|
|
124
187
|
if (!dbPath) {
|
|
125
188
|
throw new Error(
|
|
126
|
-
"wechat-pc.sync:
|
|
189
|
+
"wechat-pc.sync: 未找到本机微信库且未提供 opts.dbPath / opts.inputPath",
|
|
127
190
|
);
|
|
128
191
|
}
|
|
129
192
|
if (!this._deps.fs.existsSync(dbPath)) return;
|
|
@@ -186,6 +249,114 @@ class WeChatPcAdapter {
|
|
|
186
249
|
}
|
|
187
250
|
}
|
|
188
251
|
|
|
252
|
+
// WeChat 4.x: invoke the sidecar collector, then re-shape each decrypted
|
|
253
|
+
// message into the SAME payload the 3.x normalizeMessage() understands, so
|
|
254
|
+
// both layouts share one normalization path.
|
|
255
|
+
async *_syncV4(opts = {}, disc) {
|
|
256
|
+
let collect = this._deps.v4Collector;
|
|
257
|
+
if (!collect) {
|
|
258
|
+
// eslint-disable-next-line global-require
|
|
259
|
+
collect = require("./v4-sidecar").collectWeChatV4;
|
|
260
|
+
}
|
|
261
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : undefined;
|
|
262
|
+
const result = await collect({
|
|
263
|
+
limit,
|
|
264
|
+
key: opts.key || this._key || undefined,
|
|
265
|
+
pythonExe: opts.pythonExe,
|
|
266
|
+
bridgeDir: opts.bridgeDir,
|
|
267
|
+
timeoutMs: opts.timeoutMs,
|
|
268
|
+
onProgress:
|
|
269
|
+
typeof opts.onProgress === "function"
|
|
270
|
+
? (m) => {
|
|
271
|
+
try { opts.onProgress({ phase: "wechat-v4", adapter: NAME, ...m }); } catch (_e) { /* best-effort */ }
|
|
272
|
+
}
|
|
273
|
+
: undefined,
|
|
274
|
+
_supervisorFactory: opts._supervisorFactory,
|
|
275
|
+
});
|
|
276
|
+
if (typeof opts.onProgress === "function") {
|
|
277
|
+
try {
|
|
278
|
+
opts.onProgress({
|
|
279
|
+
phase: "wechat-v4-done",
|
|
280
|
+
adapter: NAME,
|
|
281
|
+
account: result && result.account,
|
|
282
|
+
messageCount: result && result.messageCount,
|
|
283
|
+
dbs: result && result.dbs,
|
|
284
|
+
});
|
|
285
|
+
} catch (_e) { /* best-effort */ }
|
|
286
|
+
}
|
|
287
|
+
const selfWxid =
|
|
288
|
+
(result && result.account) ||
|
|
289
|
+
(disc && disc.accounts && disc.accounts[0] && disc.accounts[0].id) ||
|
|
290
|
+
null;
|
|
291
|
+
const fallbackCapturedAt = Date.now();
|
|
292
|
+
const messages = (result && Array.isArray(result.messages)) ? result.messages : [];
|
|
293
|
+
let emitted = 0;
|
|
294
|
+
// The sidecar already applied `limit` across all sources (chat/biz/sns/
|
|
295
|
+
// favorite). Yield everything it returned — do NOT re-cap here, or the
|
|
296
|
+
// trailing 朋友圈/收藏 entries and the contacts block would be skipped.
|
|
297
|
+
for (const m of messages) {
|
|
298
|
+
if (!m || typeof m !== "object") continue;
|
|
299
|
+
const conv = typeof m.conversation === "string" ? m.conversation : null;
|
|
300
|
+
const isGroup = !!conv && conv.endsWith("@chatroom");
|
|
301
|
+
const createdTimeMs =
|
|
302
|
+
typeof m.createTime === "number" && m.createTime > 0 ? m.createTime * 1000 : null;
|
|
303
|
+
// Map → 3.x payload shape consumed by normalizeMessage().
|
|
304
|
+
const payload = {
|
|
305
|
+
kind: KIND_MESSAGE,
|
|
306
|
+
msgSvrId: m.originalId || null,
|
|
307
|
+
talker: conv,
|
|
308
|
+
isSend: selfWxid && m.sender && m.sender === selfWxid ? 1 : 0,
|
|
309
|
+
type: typeof m.type === "number" ? m.type : null,
|
|
310
|
+
createdTimeMs,
|
|
311
|
+
text: typeof m.text === "string" ? m.text : "",
|
|
312
|
+
senderWxid: isGroup ? (m.sender || null) : null,
|
|
313
|
+
isGroup,
|
|
314
|
+
contentBlob: typeof m.text === "string" ? m.text : null,
|
|
315
|
+
// provenance: chat | biz(公众号) | sns(朋友圈) | favorite(收藏)
|
|
316
|
+
wechatSource: typeof m.source === "string" ? m.source : "chat",
|
|
317
|
+
appType: typeof m.appType === "number" ? m.appType : null, // appmsg subtype (type 49)
|
|
318
|
+
appUrl: typeof m.appUrl === "string" ? m.appUrl : null, // link/article url
|
|
319
|
+
};
|
|
320
|
+
const idPart =
|
|
321
|
+
m.originalId ||
|
|
322
|
+
(conv && createdTimeMs ? `${conv}-${createdTimeMs}` : `v4-${emitted}`);
|
|
323
|
+
yield {
|
|
324
|
+
adapter: NAME,
|
|
325
|
+
kind: KIND_MESSAGE,
|
|
326
|
+
originalId: m.originalId || stableOriginalId(KIND_MESSAGE, idPart),
|
|
327
|
+
capturedAt: createdTimeMs || fallbackCapturedAt,
|
|
328
|
+
payload,
|
|
329
|
+
};
|
|
330
|
+
emitted += 1;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Contacts (from contact.db) → Person entities. Not bound by the message
|
|
334
|
+
// `limit` (that caps messages, not the address book). Opt out via
|
|
335
|
+
// opts.include.contact === false.
|
|
336
|
+
const include = opts.include || {};
|
|
337
|
+
if (include[KIND_CONTACT] !== false) {
|
|
338
|
+
const contacts = (result && Array.isArray(result.contacts)) ? result.contacts : [];
|
|
339
|
+
for (const c of contacts) {
|
|
340
|
+
if (!c || typeof c !== "object" || !c.wxid) continue;
|
|
341
|
+
if (typeof c.wxid === "string" && c.wxid.endsWith("@chatroom")) continue;
|
|
342
|
+
yield {
|
|
343
|
+
adapter: NAME,
|
|
344
|
+
kind: KIND_CONTACT,
|
|
345
|
+
originalId: stableOriginalId(KIND_CONTACT, c.wxid),
|
|
346
|
+
capturedAt: fallbackCapturedAt,
|
|
347
|
+
payload: {
|
|
348
|
+
kind: KIND_CONTACT,
|
|
349
|
+
wxid: c.wxid,
|
|
350
|
+
alias: c.alias || null,
|
|
351
|
+
nickname: c.nickname || null,
|
|
352
|
+
remark: c.remark || null,
|
|
353
|
+
type: typeof c.type === "number" ? c.type : null,
|
|
354
|
+
},
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
189
360
|
normalize(raw) {
|
|
190
361
|
if (!raw || !raw.payload) {
|
|
191
362
|
throw new Error("WeChatPcAdapter.normalize: payload missing");
|
|
@@ -282,6 +453,9 @@ function normalizeMessage(p, raw, ingestedAt) {
|
|
|
282
453
|
isSend,
|
|
283
454
|
isGroup,
|
|
284
455
|
wechatType: typeof p.type === "number" ? p.type : null,
|
|
456
|
+
wechatSource: typeof p.wechatSource === "string" ? p.wechatSource : "chat",
|
|
457
|
+
...(p.appType != null ? { wechatAppType: p.appType } : {}),
|
|
458
|
+
...(p.appUrl ? { url: p.appUrl } : {}),
|
|
285
459
|
senderWxid: p.senderWxid || null,
|
|
286
460
|
contentBlob: typeof p.contentBlob === "string" ? p.contentBlob : null,
|
|
287
461
|
...(topics.length ? { topicId: topics[0].id } : {}),
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* WeChat 4.x collection bridge — invokes the forensics-bridge Python sidecar's
|
|
5
|
+
* `wechat_v4.collect` method (memory key extraction + SQLCipher-4 decryption +
|
|
6
|
+
* Msg_<md5> parsing) and returns the decrypted messages to the node adapter.
|
|
7
|
+
*
|
|
8
|
+
* Why a sidecar: WeChat 4.0 DBs are SQLCipher-4 encrypted with a key cached in
|
|
9
|
+
* Weixin.exe process memory. Recovering it needs ReadProcessMemory (Windows)
|
|
10
|
+
* and AES/PBKDF2 — done in Python (`cryptography`), which also sidesteps the
|
|
11
|
+
* host-node bs3mc ABI problem (the node side never opens the encrypted DB).
|
|
12
|
+
*
|
|
13
|
+
* Resolution (all overridable for tests / packaging):
|
|
14
|
+
* - python exe: opts.pythonExe → env CC_PDH_PYTHON → "python" / "python3"
|
|
15
|
+
* - bridge dir: opts.bridgeDir → env CC_PDH_BRIDGE_DIR → sibling package
|
|
16
|
+
*
|
|
17
|
+
* Returns the sidecar result `{ account, messageCount, dbs, messages }`.
|
|
18
|
+
* Throws a typed Error (code on .code) the adapter maps to a sync failure.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
const path = require("node:path");
|
|
22
|
+
const { existsSync } = require("node:fs");
|
|
23
|
+
|
|
24
|
+
function resolveBridgeDir(explicit) {
|
|
25
|
+
if (explicit) return explicit;
|
|
26
|
+
if (process.env.CC_PDH_BRIDGE_DIR) return process.env.CC_PDH_BRIDGE_DIR;
|
|
27
|
+
// lib/adapters/wechat-pc → up to packages/, then sibling bridge package.
|
|
28
|
+
return path.resolve(__dirname, "../../../../personal-data-hub-bridge");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function pythonCandidates(explicit) {
|
|
32
|
+
const list = [];
|
|
33
|
+
if (explicit) list.push(explicit);
|
|
34
|
+
if (process.env.CC_PDH_PYTHON) list.push(process.env.CC_PDH_PYTHON);
|
|
35
|
+
// Windows commonly ships `python`; *nix `python3`. Try both.
|
|
36
|
+
list.push(process.platform === "win32" ? "python" : "python3");
|
|
37
|
+
list.push(process.platform === "win32" ? "python3" : "python");
|
|
38
|
+
return [...new Set(list)];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* @param {object} [opts]
|
|
43
|
+
* @param {number} [opts.limit] max messages
|
|
44
|
+
* @param {string} [opts.key] pre-extracted 64-hex key (skips memory scan)
|
|
45
|
+
* @param {string} [opts.pythonExe]
|
|
46
|
+
* @param {string} [opts.bridgeDir]
|
|
47
|
+
* @param {number} [opts.timeoutMs] collect timeout (default 120s)
|
|
48
|
+
* @param {(msg:object)=>void} [opts.onProgress]
|
|
49
|
+
* @param {object} [opts._supervisorFactory] test seam → returns a SidecarSupervisor-like
|
|
50
|
+
* @returns {Promise<{account:string,messageCount:number,dbs:object[],messages:object[]}>}
|
|
51
|
+
*/
|
|
52
|
+
async function collectWeChatV4(opts = {}) {
|
|
53
|
+
const bridgeDir = resolveBridgeDir(opts.bridgeDir);
|
|
54
|
+
const makeSupervisor =
|
|
55
|
+
opts._supervisorFactory ||
|
|
56
|
+
((command, cwd) => {
|
|
57
|
+
// eslint-disable-next-line global-require
|
|
58
|
+
const { SidecarSupervisor } = require("../../sidecar");
|
|
59
|
+
return new SidecarSupervisor({
|
|
60
|
+
command,
|
|
61
|
+
cwd,
|
|
62
|
+
defaultTimeoutMs: opts.timeoutMs || 120_000,
|
|
63
|
+
healthCheckIntervalMs: 0,
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
if (!opts._supervisorFactory && !existsSync(bridgeDir)) {
|
|
68
|
+
const e = new Error(
|
|
69
|
+
`wechat-pc v4: forensics-bridge not found at ${bridgeDir} (set CC_PDH_BRIDGE_DIR)`,
|
|
70
|
+
);
|
|
71
|
+
e.code = "BRIDGE_NOT_FOUND";
|
|
72
|
+
throw e;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const params = {};
|
|
76
|
+
if (Number.isInteger(opts.limit) && opts.limit > 0) params.limit = opts.limit;
|
|
77
|
+
if (opts.key) params.key = opts.key;
|
|
78
|
+
|
|
79
|
+
let lastErr = null;
|
|
80
|
+
for (const py of pythonCandidates(opts.pythonExe)) {
|
|
81
|
+
const command = [py, "-m", "forensics_bridge.ipc_server"];
|
|
82
|
+
const sup = makeSupervisor(command, bridgeDir);
|
|
83
|
+
try {
|
|
84
|
+
await sup.start({ readyTimeoutMs: opts.readyTimeoutMs || 15_000 });
|
|
85
|
+
const result = await sup.invoke("wechat_v4.collect", params, {
|
|
86
|
+
timeoutMs: opts.timeoutMs || 120_000,
|
|
87
|
+
onProgress: opts.onProgress,
|
|
88
|
+
});
|
|
89
|
+
try { await sup.stop(); } catch (_e) { /* best-effort */ }
|
|
90
|
+
return result;
|
|
91
|
+
} catch (err) {
|
|
92
|
+
lastErr = err;
|
|
93
|
+
try { await sup.stop(); } catch (_e) { /* best-effort */ }
|
|
94
|
+
const msg = (err && err.message) || "";
|
|
95
|
+
// Real WeChat-side failures (key/app/db) must surface immediately — the
|
|
96
|
+
// sidecar ran fine, the data just isn't there. Everything else (python
|
|
97
|
+
// missing, wrong python without `cryptography`, import errors, spawn
|
|
98
|
+
// death, handshake timeout) → try the next python candidate.
|
|
99
|
+
const isDataError = /KEY_NOT_FOUND|KEY_VERIFY|APP_NOT|DB_NOT|APP_NOT_RUNNING|EXTRACT_PERMISSION/i.test(msg);
|
|
100
|
+
if (isDataError) throw err;
|
|
101
|
+
// otherwise fall through to the next candidate
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
const e = new Error(
|
|
105
|
+
`wechat-pc v4: could not run forensics-bridge sidecar (tried ${pythonCandidates(opts.pythonExe).join(", ")}). ` +
|
|
106
|
+
`Install Python 3.11+ with the 'cryptography' package, or set CC_PDH_PYTHON. Last error: ${lastErr && lastErr.message}`,
|
|
107
|
+
);
|
|
108
|
+
e.code = "SIDECAR_UNAVAILABLE";
|
|
109
|
+
throw e;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
module.exports = { collectWeChatV4, _internals: { resolveBridgeDir, pythonCandidates } };
|
package/lib/registry.js
CHANGED
|
@@ -65,6 +65,26 @@ class AdapterRegistry {
|
|
|
65
65
|
// depend on it).
|
|
66
66
|
this.entityResolver = opts.entityResolver || null;
|
|
67
67
|
|
|
68
|
+
// ADB one-click readiness (Phase: social platforms). When supplied by the
|
|
69
|
+
// wiring, readiness() treats the named adapters as "collectable via a
|
|
70
|
+
// rooted-phone USB one-click" — flipping their NO_INPUT / DB_NOT_PULLED
|
|
71
|
+
// status to "ready (device connected)" or "ADB_DEVICE_NEEDED" depending on
|
|
72
|
+
// whether a device is currently attached. Keeps the registry generic: the
|
|
73
|
+
// platform list + the actual `adb devices` probe come from the host wiring.
|
|
74
|
+
// opts.adbReadiness = {
|
|
75
|
+
// probe: async () => ({ deviceConnected: boolean, serial?: string }),
|
|
76
|
+
// oneClickNames: Set<string>, // adapter names with an *AdbSync path
|
|
77
|
+
// }
|
|
78
|
+
this._adbReadiness =
|
|
79
|
+
opts.adbReadiness && typeof opts.adbReadiness.probe === "function"
|
|
80
|
+
? {
|
|
81
|
+
probe: opts.adbReadiness.probe,
|
|
82
|
+
oneClickNames: opts.adbReadiness.oneClickNames instanceof Set
|
|
83
|
+
? opts.adbReadiness.oneClickNames
|
|
84
|
+
: new Set(opts.adbReadiness.oneClickNames || []),
|
|
85
|
+
}
|
|
86
|
+
: null;
|
|
87
|
+
|
|
68
88
|
this._adapters = new Map();
|
|
69
89
|
this._activeSync = null; // name of currently-running adapter, or null
|
|
70
90
|
}
|
|
@@ -158,9 +178,24 @@ class AdapterRegistry {
|
|
|
158
178
|
Number.isInteger(opts.timeoutMs) && opts.timeoutMs > 0
|
|
159
179
|
? opts.timeoutMs
|
|
160
180
|
: DEFAULT_READINESS_TIMEOUT_MS;
|
|
181
|
+
// Probe the host's ADB device state ONCE (best-effort) so all ADB
|
|
182
|
+
// one-click adapters share a single `adb devices` call this round.
|
|
183
|
+
let adbState = null;
|
|
184
|
+
if (this._adbReadiness) {
|
|
185
|
+
try {
|
|
186
|
+
adbState = await this._withTimeout(
|
|
187
|
+
Promise.resolve().then(() => this._adbReadiness.probe()),
|
|
188
|
+
timeoutMs,
|
|
189
|
+
"adb-probe"
|
|
190
|
+
);
|
|
191
|
+
} catch (_e) {
|
|
192
|
+
adbState = { deviceConnected: false };
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
161
196
|
const reports = [];
|
|
162
197
|
for (const adapter of this._adapters.values()) {
|
|
163
|
-
const report = await this._probeReadiness(adapter, timeoutMs);
|
|
198
|
+
const report = await this._probeReadiness(adapter, timeoutMs, adbState);
|
|
164
199
|
// Attach the step-by-step import guide (how to get this source's data
|
|
165
200
|
// into the vault) keyed off the resolved category. Single source of
|
|
166
201
|
// truth in adapter-guide.js — reused by every shell.
|
|
@@ -170,7 +205,7 @@ class AdapterRegistry {
|
|
|
170
205
|
return reports;
|
|
171
206
|
}
|
|
172
207
|
|
|
173
|
-
async _probeReadiness(adapter, timeoutMs) {
|
|
208
|
+
async _probeReadiness(adapter, timeoutMs, adbState) {
|
|
174
209
|
const dd = adapter.dataDisclosure || {};
|
|
175
210
|
const extractMode = adapter.extractMode || "web-api";
|
|
176
211
|
const base = {
|
|
@@ -240,6 +275,47 @@ class AdapterRegistry {
|
|
|
240
275
|
}
|
|
241
276
|
|
|
242
277
|
const reason = (auth && auth.reason) || "UNKNOWN";
|
|
278
|
+
|
|
279
|
+
// ADB one-click platforms (social): the adapter itself has no snapshot yet
|
|
280
|
+
// (NO_INPUT / INPUT_PATH_REQUIRED / DB_NOT_PULLED), but the platform CAN be
|
|
281
|
+
// collected in one click from a rooted phone over USB. Reflect the real
|
|
282
|
+
// device state instead of the misleading "采集需先在手机 App 内…".
|
|
283
|
+
if (
|
|
284
|
+
this._adbReadiness &&
|
|
285
|
+
this._adbReadiness.oneClickNames.has(adapter.name) &&
|
|
286
|
+
(reason === "NO_INPUT" || reason === "INPUT_PATH_REQUIRED" || reason === "DB_NOT_PULLED")
|
|
287
|
+
) {
|
|
288
|
+
if (adbState && adbState.deviceConnected) {
|
|
289
|
+
return {
|
|
290
|
+
...base,
|
|
291
|
+
ready: true,
|
|
292
|
+
status: "ready",
|
|
293
|
+
category: "device",
|
|
294
|
+
reason: null,
|
|
295
|
+
message: "已连接 root 手机,点「一键采集」即可拉取",
|
|
296
|
+
actionHint: null,
|
|
297
|
+
mode: "adb-oneclick",
|
|
298
|
+
lastSyncedAt,
|
|
299
|
+
lastStatus,
|
|
300
|
+
lastError,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
const adbDesc = describeReadiness("ADB_DEVICE_NEEDED");
|
|
304
|
+
return {
|
|
305
|
+
...base,
|
|
306
|
+
ready: false,
|
|
307
|
+
status: adbDesc.status,
|
|
308
|
+
category: adbDesc.category,
|
|
309
|
+
reason: "ADB_DEVICE_NEEDED",
|
|
310
|
+
message: adbDesc.message,
|
|
311
|
+
actionHint: adbDesc.actionHint,
|
|
312
|
+
mode: null,
|
|
313
|
+
lastSyncedAt,
|
|
314
|
+
lastStatus,
|
|
315
|
+
lastError,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
243
319
|
const desc = describeReadiness(reason);
|
|
244
320
|
const detail = auth && (auth.message || auth.error);
|
|
245
321
|
const message =
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.3",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|