@chainlesschain/personal-data-hub 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,8 +37,29 @@ const crypto = require("node:crypto");
37
37
  const DOUYIN_DB_REMOTE_DIR =
38
38
  "/data/data/com.ss.android.ugc.aweme/databases";
39
39
 
40
+ // Legacy plaintext social-DM IM db (Brignoni 2018 TikTok era, `<19-digit-uid>_im.db`).
40
41
  const IM_DB_PATTERN = /^(\d{19})_im\.db$/;
41
42
 
43
+ // Real-device verification 2026-06-08 (Xiaomi chopin / MIUI 13, Douyin
44
+ // v??-2026 logged in) found CURRENT Douyin no longer ships a plaintext
45
+ // social-DM IM db. Two new on-disk shapes coexist in databases/:
46
+ //
47
+ // encrypted_<uid>_im.db — the social DM store, now SQLCipher-ENCRYPTED
48
+ // (header is NOT `SQLite format 3`). Reading it
49
+ // needs the per-user key, which only the frida
50
+ // key-hook path (Phase 2b, libmsaoaidsec.so
51
+ // anti-debug bypass) can recover — the plaintext
52
+ // C-path here cannot.
53
+ // im_database_<uid> — a Room db, but it is the in-app 豆包/Doubao AI
54
+ // ASSISTANT chat (tables im_message / im_conversation
55
+ // / im_bot), NOT person-to-person social DMs.
56
+ //
57
+ // We classify all three so the handler can emit a precise, actionable error
58
+ // instead of a misleading DOUYIN_NO_IM_DB. See memory
59
+ // [[pdh_douyin_c_path_phase_2a]] / [[pdh_social_cookie_endpoint_drift_2026_05]].
60
+ const ENCRYPTED_IM_DB_PATTERN = /^encrypted_(\d+)_im\.db$/;
61
+ const DOUBAO_IM_DB_PATTERN = /^im_database_(\d{6,})$/;
62
+
42
63
  /**
43
64
  * List candidate IM db filenames + uid via `adb shell su -c "ls databases/"`.
44
65
  *
@@ -64,15 +85,27 @@ async function listImDbs(adb, serial, opts) {
64
85
  return { candidates: [], dirMissing: true };
65
86
  }
66
87
  const candidates = [];
88
+ const encryptedCandidates = [];
89
+ const doubaoCandidates = [];
67
90
  for (const line of lines) {
68
91
  const fileName = line.trim();
69
92
  if (!fileName) continue;
70
93
  const m = fileName.match(IM_DB_PATTERN);
71
94
  if (m) {
72
95
  candidates.push({ uid: m[1], fileName });
96
+ continue;
97
+ }
98
+ const enc = fileName.match(ENCRYPTED_IM_DB_PATTERN);
99
+ if (enc) {
100
+ encryptedCandidates.push({ uid: enc[1], fileName });
101
+ continue;
102
+ }
103
+ const doubao = fileName.match(DOUBAO_IM_DB_PATTERN);
104
+ if (doubao) {
105
+ doubaoCandidates.push({ uid: doubao[1], fileName });
73
106
  }
74
107
  }
75
- return { candidates, dirMissing: false };
108
+ return { candidates, encryptedCandidates, doubaoCandidates, dirMissing: false };
76
109
  }
77
110
 
78
111
  /**
@@ -159,9 +192,10 @@ function createDouyinDbExtension(factoryOpts = {}) {
159
192
  }
160
193
 
161
194
  // Step 1: discover candidate IM dbs.
162
- const { candidates, dirMissing } = await listImDbs(ctx.adb, serial, {
163
- timeoutMs,
164
- });
195
+ const { candidates, encryptedCandidates, doubaoCandidates, dirMissing } =
196
+ await listImDbs(ctx.adb, serial, {
197
+ timeoutMs,
198
+ });
165
199
  if (dirMissing) {
166
200
  throw new Error(
167
201
  "DOUYIN_NOT_INSTALLED: " +
@@ -170,6 +204,32 @@ function createDouyinDbExtension(factoryOpts = {}) {
170
204
  );
171
205
  }
172
206
  if (candidates.length === 0) {
207
+ // No legacy plaintext IM db. Distinguish the modern layouts so the UI
208
+ // can tell the user the truth instead of "no db found". Real-device
209
+ // verification 2026-06-08 — see ENCRYPTED_IM_DB_PATTERN comment above.
210
+ if (encryptedCandidates && encryptedCandidates.length > 0) {
211
+ throw new Error(
212
+ "DOUYIN_IM_DB_ENCRYPTED: this Douyin version stores its social DM db as " +
213
+ `\`encrypted_<uid>_im.db\` (SQLCipher) — found ${encryptedCandidates
214
+ .map((c) => c.fileName)
215
+ .join(", ")}. The plaintext C-path can't read it; the per-user ` +
216
+ "key must be recovered via the frida key-hook path (Phase 2b, " +
217
+ "libmsaoaidsec.so anti-debug bypass). Plaintext direct-read is no " +
218
+ "longer possible on current Douyin.",
219
+ );
220
+ }
221
+ if (doubaoCandidates && doubaoCandidates.length > 0) {
222
+ throw new Error(
223
+ "DOUYIN_ONLY_DOUBAO_AI_CHAT: the only readable `im_database_<uid>` db " +
224
+ `(${doubaoCandidates
225
+ .map((c) => c.fileName)
226
+ .join(", ")}) is the in-app 豆包/Doubao AI ASSISTANT chat ` +
227
+ "(tables im_message / im_conversation / im_bot), not person-to-person " +
228
+ "social DMs. Social DMs live in the SQLCipher `encrypted_<uid>_im.db` " +
229
+ "and need the frida key path. (Collecting Doubao AI chat would be a " +
230
+ "separate, net-new adapter.)",
231
+ );
232
+ }
173
233
  throw new Error(
174
234
  "DOUYIN_NO_IM_DB: no `<19-digit-uid>_im.db` found in databases/. Open the Douyin App + log in once + open any chat thread to materialize the IM database, then retry.",
175
235
  );
@@ -273,6 +333,8 @@ module.exports = {
273
333
  createDouyinDbExtension,
274
334
  DOUYIN_DB_REMOTE_DIR,
275
335
  IM_DB_PATTERN,
336
+ ENCRYPTED_IM_DB_PATTERN,
337
+ DOUBAO_IM_DB_PATTERN,
276
338
  // Exposed for tests
277
339
  _internals: {
278
340
  listImDbs,
@@ -53,6 +53,22 @@ const {
53
53
  const WEIBO_COOKIES_REMOTE_PATH =
54
54
  "/data/data/com.sina.weibo/app_webview/Default/Cookies";
55
55
 
56
+ /**
57
+ * Glob the WebView profile dir at pull time. Real-device verification
58
+ * (2026-06-08, Xiaomi chopin / MIUI 13 / Weibo logged in) showed current
59
+ * Weibo stores cookies under a SUFFIXED profile dir
60
+ * `app_webview_com.sina.weibo/Default/Cookies`, NOT the standard
61
+ * `app_webview/Default/Cookies` — so the old hardcoded path made the
62
+ * collector throw WEIBO_NOT_INSTALLED even though Weibo was installed and
63
+ * logged in. Chromium names the WebView data dir after the WebView
64
+ * `dataDirectorySuffix` the host app sets; Weibo sets it to its own
65
+ * package name. We glob `app_webview*` and take the first match (Default
66
+ * profile) so both the legacy and suffixed layouts resolve. See memory
67
+ * [[pdh_social_cookie_endpoint_drift_2026_05]].
68
+ */
69
+ const WEIBO_COOKIES_REMOTE_GLOB =
70
+ "/data/data/com.sina.weibo/app_webview*/Default/Cookies";
71
+
56
72
  const WEIBO_COOKIE_HOST_DOMAIN = "m.weibo.cn";
57
73
 
58
74
  /** Minimum required cookie name — without SUB, /api/config returns login=false. */
@@ -60,21 +76,31 @@ const WEIBO_REQUIRED_COOKIE = "SUB";
60
76
 
61
77
  async function pullCookiesViaSu(adb, serial, opts) {
62
78
  const adbOpts = { serial, timeoutMs: opts?.timeoutMs || 60_000 };
79
+ // Resolve the actual Cookies path — glob `app_webview*` so the suffixed
80
+ // profile dir (app_webview_com.sina.weibo, observed on real devices) is
81
+ // found as well as the legacy `app_webview`. `ls -d <glob>` prints every
82
+ // match; we take the first (Default profile). When nothing matches the
83
+ // shell prints the unexpanded glob, so we sentinel-guard NOT_FOUND.
63
84
  const lsOut = await adb(
64
85
  [
65
86
  "shell",
66
87
  "su",
67
88
  "-c",
68
- `ls ${WEIBO_COOKIES_REMOTE_PATH} 2>/dev/null || echo NOT_FOUND`,
89
+ `ls -d ${WEIBO_COOKIES_REMOTE_GLOB} 2>/dev/null | head -n1 || echo NOT_FOUND`,
69
90
  ],
70
91
  adbOpts,
71
92
  );
72
93
  const lsLine = lsOut.replace(/\r+$/gm, "").trim();
73
- if (lsLine === "NOT_FOUND" || lsLine === "") {
94
+ const remotePath =
95
+ lsLine && lsLine !== "NOT_FOUND" && !lsLine.includes("*") ? lsLine : null;
96
+ if (!remotePath) {
74
97
  throw new Error(
75
- "WEIBO_NOT_INSTALLED: " +
76
- WEIBO_COOKIES_REMOTE_PATH +
77
- " not found. Install Weibo App + log in once on the phone, then retry. (Some Weibo App versions store cookies in a non-default WebView profile dir; if Weibo is installed but the path is missing, file a bug to track the actual path.)",
98
+ "WEIBO_NOT_INSTALLED: no Cookies DB under " +
99
+ WEIBO_COOKIES_REMOTE_GLOB +
100
+ " (globbed `app_webview*` to cover both the legacy and the suffixed " +
101
+ "`app_webview_com.sina.weibo` profile layouts). Install Weibo App + " +
102
+ "log in once on the phone, then retry. If Weibo is installed but no " +
103
+ "match exists, the WebView dataDirectorySuffix changed again — file a bug.",
78
104
  );
79
105
  }
80
106
  // Probe root.
@@ -93,7 +119,7 @@ async function pullCookiesViaSu(adb, serial, opts) {
93
119
  "shell",
94
120
  "su",
95
121
  "-c",
96
- `base64 ${WEIBO_COOKIES_REMOTE_PATH} | tr -d '\\n\\r'`,
122
+ `base64 ${remotePath} | tr -d '\\n\\r'`,
97
123
  ],
98
124
  { ...adbOpts, timeoutMs: opts?.timeoutMs || 60_000 },
99
125
  );
@@ -241,6 +267,7 @@ function createWeiboCookiesExtension(factoryOpts = {}) {
241
267
  module.exports = {
242
268
  createWeiboCookiesExtension,
243
269
  WEIBO_COOKIES_REMOTE_PATH,
270
+ WEIBO_COOKIES_REMOTE_GLOB,
244
271
  WEIBO_COOKIE_HOST_DOMAIN,
245
272
  WEIBO_REQUIRED_COOKIE,
246
273
  assembleWeiboCookieHeader,
@@ -81,21 +81,52 @@ class WeChatPcAdapter {
81
81
  fs,
82
82
  // DI seam: tests inject a fake SQLite driver class via dbDriverFactory.
83
83
  dbDriverFactory: opts.dbDriverFactory || null,
84
+ // DI seam: tests inject a fake WeChat 4.x collector; default lazy-loads
85
+ // the forensics-bridge sidecar invoker.
86
+ v4Collector: opts.v4Collector || null,
87
+ // DI seam for discovery (see _autoDiscover).
88
+ discoveryDeps: opts.discoveryDeps || undefined,
84
89
  };
85
90
  }
86
91
 
92
+ // Auto-discover PC WeChat's local DB on the host (3.x + 4.x layouts) so the
93
+ // UI never needs a manually typed path. Lazy-required + cached per instance.
94
+ _autoDiscover() {
95
+ if (this._discovered !== undefined) return this._discovered;
96
+ try {
97
+ // eslint-disable-next-line global-require
98
+ const { discover } = require("../_pc-local-discovery");
99
+ this._discovered = discover("wechat-pc", this._deps.discoveryDeps || {});
100
+ } catch (_e) {
101
+ this._discovered = null;
102
+ }
103
+ return this._discovered;
104
+ }
105
+
87
106
  async authenticate(ctx = {}) {
88
107
  // Cheap readiness probe — never opens / decrypts a DB.
89
108
  if (ctx && ctx.readinessOnly) {
90
109
  if (this._dbPath) return { ok: true, mode: "configured" };
110
+ const disc = this._autoDiscover();
111
+ if (disc && disc.installed) {
112
+ return {
113
+ ok: false,
114
+ reason: "DB_FOUND_NEEDS_KEY",
115
+ message: `已找到本机微信库(${disc.layout || ""} ${disc.accounts.length} 个账号,主库 ${disc.primaryDb})`,
116
+ discovered: disc,
117
+ };
118
+ }
91
119
  return {
92
120
  ok: false,
93
- reason: "DB_NOT_PULLED",
94
- message:
95
- "wechat-pc: 需提供 PC 微信本地数据库路径(MSG*.db / MicroMsg.db),加密库需先解密或提供 key",
121
+ reason: "APP_NOT_INSTALLED",
122
+ message: (disc && disc.note) || "未检测到本机微信数据(可能未安装或未登录)",
96
123
  };
97
124
  }
98
- const dbPath = (ctx && ctx.inputPath) || (ctx && ctx.dbPath) || this._dbPath;
125
+ const dbPath =
126
+ (ctx && ctx.inputPath) ||
127
+ (ctx && ctx.dbPath) ||
128
+ this._dbPath ||
129
+ this._resolveDiscoveredDbPath();
99
130
  if (dbPath) {
100
131
  try {
101
132
  this._deps.fs.accessSync(dbPath, this._deps.fs.constants.R_OK);
@@ -108,22 +139,54 @@ class WeChatPcAdapter {
108
139
  }
109
140
  return { ok: true, mode: "sqlite" };
110
141
  }
142
+ const disc = this._autoDiscover();
143
+ if (disc && disc.installed) {
144
+ return {
145
+ ok: false,
146
+ reason: "DB_FOUND_NEEDS_KEY",
147
+ message: `已找到本机微信库(主库 ${disc.primaryDb}),需解密密钥`,
148
+ discovered: disc,
149
+ };
150
+ }
111
151
  return {
112
152
  ok: false,
113
- reason: "DB_NOT_PULLED",
114
- message: "wechat-pc.authenticate: needs opts.dbPath / inputPath (MSG*.db or MicroMsg.db)",
153
+ reason: "APP_NOT_INSTALLED",
154
+ message: "wechat-pc.authenticate: 未检测到本机微信库,也未提供 dbPath / inputPath",
115
155
  };
116
156
  }
117
157
 
158
+ // Resolve the auto-discovered primary message DB path (null if none).
159
+ _resolveDiscoveredDbPath() {
160
+ const disc = this._autoDiscover();
161
+ return disc && disc.installed && disc.primaryDb ? disc.primaryDb : null;
162
+ }
163
+
118
164
  async healthCheck() {
119
165
  return { ok: true, lastChecked: Date.now() };
120
166
  }
121
167
 
122
168
  async *sync(opts = {}) {
123
- const dbPath = opts.dbPath || opts.inputPath || this._dbPath;
169
+ // WeChat 4.x path: encrypted SQLCipher-4 DBs whose key lives in Weixin.exe
170
+ // memory. Route through the Python sidecar (memory key + decrypt + parse)
171
+ // and yield the decrypted messages. Triggered when the user gives no
172
+ // explicit plaintext path AND discovery sees the 4.x layout, or opts.mode.
173
+ const disc = this._autoDiscover();
174
+ const noExplicitPath = !opts.dbPath && !opts.inputPath && !this._dbPath;
175
+ const useV4 =
176
+ opts.mode === "v4" ||
177
+ (noExplicitPath && disc && disc.installed && disc.layout === "4.x");
178
+ if (useV4) {
179
+ yield* this._syncV4(opts, disc);
180
+ return;
181
+ }
182
+
183
+ // One-click: when no explicit path is given, fall back to the
184
+ // auto-discovered primary message DB on this host (3.x plaintext/keyed).
185
+ const dbPath =
186
+ opts.dbPath || opts.inputPath || this._dbPath || this._resolveDiscoveredDbPath();
124
187
  if (!dbPath) {
125
188
  throw new Error(
126
- "wechat-pc.sync: needs opts.dbPath / opts.inputPath pointing to a PC WeChat DB (MSG*.db or MicroMsg.db)",
189
+ "wechat-pc.sync: 未找到本机微信库且未提供 opts.dbPath / opts.inputPath",
127
190
  );
128
191
  }
129
192
  if (!this._deps.fs.existsSync(dbPath)) return;
@@ -186,6 +249,114 @@ class WeChatPcAdapter {
186
249
  }
187
250
  }
188
251
 
252
+ // WeChat 4.x: invoke the sidecar collector, then re-shape each decrypted
253
+ // message into the SAME payload the 3.x normalizeMessage() understands, so
254
+ // both layouts share one normalization path.
255
+ async *_syncV4(opts = {}, disc) {
256
+ let collect = this._deps.v4Collector;
257
+ if (!collect) {
258
+ // eslint-disable-next-line global-require
259
+ collect = require("./v4-sidecar").collectWeChatV4;
260
+ }
261
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : undefined;
262
+ const result = await collect({
263
+ limit,
264
+ key: opts.key || this._key || undefined,
265
+ pythonExe: opts.pythonExe,
266
+ bridgeDir: opts.bridgeDir,
267
+ timeoutMs: opts.timeoutMs,
268
+ onProgress:
269
+ typeof opts.onProgress === "function"
270
+ ? (m) => {
271
+ try { opts.onProgress({ phase: "wechat-v4", adapter: NAME, ...m }); } catch (_e) { /* best-effort */ }
272
+ }
273
+ : undefined,
274
+ _supervisorFactory: opts._supervisorFactory,
275
+ });
276
+ if (typeof opts.onProgress === "function") {
277
+ try {
278
+ opts.onProgress({
279
+ phase: "wechat-v4-done",
280
+ adapter: NAME,
281
+ account: result && result.account,
282
+ messageCount: result && result.messageCount,
283
+ dbs: result && result.dbs,
284
+ });
285
+ } catch (_e) { /* best-effort */ }
286
+ }
287
+ const selfWxid =
288
+ (result && result.account) ||
289
+ (disc && disc.accounts && disc.accounts[0] && disc.accounts[0].id) ||
290
+ null;
291
+ const fallbackCapturedAt = Date.now();
292
+ const messages = (result && Array.isArray(result.messages)) ? result.messages : [];
293
+ let emitted = 0;
294
+ // The sidecar already applied `limit` across all sources (chat/biz/sns/
295
+ // favorite). Yield everything it returned — do NOT re-cap here, or the
296
+ // trailing 朋友圈/收藏 entries and the contacts block would be skipped.
297
+ for (const m of messages) {
298
+ if (!m || typeof m !== "object") continue;
299
+ const conv = typeof m.conversation === "string" ? m.conversation : null;
300
+ const isGroup = !!conv && conv.endsWith("@chatroom");
301
+ const createdTimeMs =
302
+ typeof m.createTime === "number" && m.createTime > 0 ? m.createTime * 1000 : null;
303
+ // Map → 3.x payload shape consumed by normalizeMessage().
304
+ const payload = {
305
+ kind: KIND_MESSAGE,
306
+ msgSvrId: m.originalId || null,
307
+ talker: conv,
308
+ isSend: selfWxid && m.sender && m.sender === selfWxid ? 1 : 0,
309
+ type: typeof m.type === "number" ? m.type : null,
310
+ createdTimeMs,
311
+ text: typeof m.text === "string" ? m.text : "",
312
+ senderWxid: isGroup ? (m.sender || null) : null,
313
+ isGroup,
314
+ contentBlob: typeof m.text === "string" ? m.text : null,
315
+ // provenance: chat | biz(公众号) | sns(朋友圈) | favorite(收藏)
316
+ wechatSource: typeof m.source === "string" ? m.source : "chat",
317
+ appType: typeof m.appType === "number" ? m.appType : null, // appmsg subtype (type 49)
318
+ appUrl: typeof m.appUrl === "string" ? m.appUrl : null, // link/article url
319
+ };
320
+ const idPart =
321
+ m.originalId ||
322
+ (conv && createdTimeMs ? `${conv}-${createdTimeMs}` : `v4-${emitted}`);
323
+ yield {
324
+ adapter: NAME,
325
+ kind: KIND_MESSAGE,
326
+ originalId: m.originalId || stableOriginalId(KIND_MESSAGE, idPart),
327
+ capturedAt: createdTimeMs || fallbackCapturedAt,
328
+ payload,
329
+ };
330
+ emitted += 1;
331
+ }
332
+
333
+ // Contacts (from contact.db) → Person entities. Not bound by the message
334
+ // `limit` (that caps messages, not the address book). Opt out via
335
+ // opts.include.contact === false.
336
+ const include = opts.include || {};
337
+ if (include[KIND_CONTACT] !== false) {
338
+ const contacts = (result && Array.isArray(result.contacts)) ? result.contacts : [];
339
+ for (const c of contacts) {
340
+ if (!c || typeof c !== "object" || !c.wxid) continue;
341
+ if (typeof c.wxid === "string" && c.wxid.endsWith("@chatroom")) continue;
342
+ yield {
343
+ adapter: NAME,
344
+ kind: KIND_CONTACT,
345
+ originalId: stableOriginalId(KIND_CONTACT, c.wxid),
346
+ capturedAt: fallbackCapturedAt,
347
+ payload: {
348
+ kind: KIND_CONTACT,
349
+ wxid: c.wxid,
350
+ alias: c.alias || null,
351
+ nickname: c.nickname || null,
352
+ remark: c.remark || null,
353
+ type: typeof c.type === "number" ? c.type : null,
354
+ },
355
+ };
356
+ }
357
+ }
358
+ }
359
+
189
360
  normalize(raw) {
190
361
  if (!raw || !raw.payload) {
191
362
  throw new Error("WeChatPcAdapter.normalize: payload missing");
@@ -282,6 +453,9 @@ function normalizeMessage(p, raw, ingestedAt) {
282
453
  isSend,
283
454
  isGroup,
284
455
  wechatType: typeof p.type === "number" ? p.type : null,
456
+ wechatSource: typeof p.wechatSource === "string" ? p.wechatSource : "chat",
457
+ ...(p.appType != null ? { wechatAppType: p.appType } : {}),
458
+ ...(p.appUrl ? { url: p.appUrl } : {}),
285
459
  senderWxid: p.senderWxid || null,
286
460
  contentBlob: typeof p.contentBlob === "string" ? p.contentBlob : null,
287
461
  ...(topics.length ? { topicId: topics[0].id } : {}),
@@ -0,0 +1,112 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * WeChat 4.x collection bridge — invokes the forensics-bridge Python sidecar's
5
+ * `wechat_v4.collect` method (memory key extraction + SQLCipher-4 decryption +
6
+ * Msg_<md5> parsing) and returns the decrypted messages to the node adapter.
7
+ *
8
+ * Why a sidecar: WeChat 4.0 DBs are SQLCipher-4 encrypted with a key cached in
9
+ * Weixin.exe process memory. Recovering it needs ReadProcessMemory (Windows)
10
+ * and AES/PBKDF2 — done in Python (`cryptography`), which also sidesteps the
11
+ * host-node bs3mc ABI problem (the node side never opens the encrypted DB).
12
+ *
13
+ * Resolution (all overridable for tests / packaging):
14
+ * - python exe: opts.pythonExe → env CC_PDH_PYTHON → "python" / "python3"
15
+ * - bridge dir: opts.bridgeDir → env CC_PDH_BRIDGE_DIR → sibling package
16
+ *
17
+ * Returns the sidecar result `{ account, messageCount, dbs, messages }`.
18
+ * Throws a typed Error (code on .code) the adapter maps to a sync failure.
19
+ */
20
+
21
+ const path = require("node:path");
22
+ const { existsSync } = require("node:fs");
23
+
24
+ function resolveBridgeDir(explicit) {
25
+ if (explicit) return explicit;
26
+ if (process.env.CC_PDH_BRIDGE_DIR) return process.env.CC_PDH_BRIDGE_DIR;
27
+ // lib/adapters/wechat-pc → up to packages/, then sibling bridge package.
28
+ return path.resolve(__dirname, "../../../../personal-data-hub-bridge");
29
+ }
30
+
31
+ function pythonCandidates(explicit) {
32
+ const list = [];
33
+ if (explicit) list.push(explicit);
34
+ if (process.env.CC_PDH_PYTHON) list.push(process.env.CC_PDH_PYTHON);
35
+ // Windows commonly ships `python`; *nix `python3`. Try both.
36
+ list.push(process.platform === "win32" ? "python" : "python3");
37
+ list.push(process.platform === "win32" ? "python3" : "python");
38
+ return [...new Set(list)];
39
+ }
40
+
41
+ /**
42
+ * @param {object} [opts]
43
+ * @param {number} [opts.limit] max messages
44
+ * @param {string} [opts.key] pre-extracted 64-hex key (skips memory scan)
45
+ * @param {string} [opts.pythonExe]
46
+ * @param {string} [opts.bridgeDir]
47
+ * @param {number} [opts.timeoutMs] collect timeout (default 120s)
48
+ * @param {(msg:object)=>void} [opts.onProgress]
49
+ * @param {object} [opts._supervisorFactory] test seam → returns a SidecarSupervisor-like
50
+ * @returns {Promise<{account:string,messageCount:number,dbs:object[],messages:object[]}>}
51
+ */
52
+ async function collectWeChatV4(opts = {}) {
53
+ const bridgeDir = resolveBridgeDir(opts.bridgeDir);
54
+ const makeSupervisor =
55
+ opts._supervisorFactory ||
56
+ ((command, cwd) => {
57
+ // eslint-disable-next-line global-require
58
+ const { SidecarSupervisor } = require("../../sidecar");
59
+ return new SidecarSupervisor({
60
+ command,
61
+ cwd,
62
+ defaultTimeoutMs: opts.timeoutMs || 120_000,
63
+ healthCheckIntervalMs: 0,
64
+ });
65
+ });
66
+
67
+ if (!opts._supervisorFactory && !existsSync(bridgeDir)) {
68
+ const e = new Error(
69
+ `wechat-pc v4: forensics-bridge not found at ${bridgeDir} (set CC_PDH_BRIDGE_DIR)`,
70
+ );
71
+ e.code = "BRIDGE_NOT_FOUND";
72
+ throw e;
73
+ }
74
+
75
+ const params = {};
76
+ if (Number.isInteger(opts.limit) && opts.limit > 0) params.limit = opts.limit;
77
+ if (opts.key) params.key = opts.key;
78
+
79
+ let lastErr = null;
80
+ for (const py of pythonCandidates(opts.pythonExe)) {
81
+ const command = [py, "-m", "forensics_bridge.ipc_server"];
82
+ const sup = makeSupervisor(command, bridgeDir);
83
+ try {
84
+ await sup.start({ readyTimeoutMs: opts.readyTimeoutMs || 15_000 });
85
+ const result = await sup.invoke("wechat_v4.collect", params, {
86
+ timeoutMs: opts.timeoutMs || 120_000,
87
+ onProgress: opts.onProgress,
88
+ });
89
+ try { await sup.stop(); } catch (_e) { /* best-effort */ }
90
+ return result;
91
+ } catch (err) {
92
+ lastErr = err;
93
+ try { await sup.stop(); } catch (_e) { /* best-effort */ }
94
+ const msg = (err && err.message) || "";
95
+ // Real WeChat-side failures (key/app/db) must surface immediately — the
96
+ // sidecar ran fine, the data just isn't there. Everything else (python
97
+ // missing, wrong python without `cryptography`, import errors, spawn
98
+ // death, handshake timeout) → try the next python candidate.
99
+ const isDataError = /KEY_NOT_FOUND|KEY_VERIFY|APP_NOT|DB_NOT|APP_NOT_RUNNING|EXTRACT_PERMISSION/i.test(msg);
100
+ if (isDataError) throw err;
101
+ // otherwise fall through to the next candidate
102
+ }
103
+ }
104
+ const e = new Error(
105
+ `wechat-pc v4: could not run forensics-bridge sidecar (tried ${pythonCandidates(opts.pythonExe).join(", ")}). ` +
106
+ `Install Python 3.11+ with the 'cryptography' package, or set CC_PDH_PYTHON. Last error: ${lastErr && lastErr.message}`,
107
+ );
108
+ e.code = "SIDECAR_UNAVAILABLE";
109
+ throw e;
110
+ }
111
+
112
+ module.exports = { collectWeChatV4, _internals: { resolveBridgeDir, pythonCandidates } };
package/lib/registry.js CHANGED
@@ -65,6 +65,26 @@ class AdapterRegistry {
65
65
  // depend on it).
66
66
  this.entityResolver = opts.entityResolver || null;
67
67
 
68
+ // ADB one-click readiness (Phase: social platforms). When supplied by the
69
+ // wiring, readiness() treats the named adapters as "collectable via a
70
+ // rooted-phone USB one-click" — flipping their NO_INPUT / DB_NOT_PULLED
71
+ // status to "ready (device connected)" or "ADB_DEVICE_NEEDED" depending on
72
+ // whether a device is currently attached. Keeps the registry generic: the
73
+ // platform list + the actual `adb devices` probe come from the host wiring.
74
+ // opts.adbReadiness = {
75
+ // probe: async () => ({ deviceConnected: boolean, serial?: string }),
76
+ // oneClickNames: Set<string>, // adapter names with an *AdbSync path
77
+ // }
78
+ this._adbReadiness =
79
+ opts.adbReadiness && typeof opts.adbReadiness.probe === "function"
80
+ ? {
81
+ probe: opts.adbReadiness.probe,
82
+ oneClickNames: opts.adbReadiness.oneClickNames instanceof Set
83
+ ? opts.adbReadiness.oneClickNames
84
+ : new Set(opts.adbReadiness.oneClickNames || []),
85
+ }
86
+ : null;
87
+
68
88
  this._adapters = new Map();
69
89
  this._activeSync = null; // name of currently-running adapter, or null
70
90
  }
@@ -158,9 +178,24 @@ class AdapterRegistry {
158
178
  Number.isInteger(opts.timeoutMs) && opts.timeoutMs > 0
159
179
  ? opts.timeoutMs
160
180
  : DEFAULT_READINESS_TIMEOUT_MS;
181
+ // Probe the host's ADB device state ONCE (best-effort) so all ADB
182
+ // one-click adapters share a single `adb devices` call this round.
183
+ let adbState = null;
184
+ if (this._adbReadiness) {
185
+ try {
186
+ adbState = await this._withTimeout(
187
+ Promise.resolve().then(() => this._adbReadiness.probe()),
188
+ timeoutMs,
189
+ "adb-probe"
190
+ );
191
+ } catch (_e) {
192
+ adbState = { deviceConnected: false };
193
+ }
194
+ }
195
+
161
196
  const reports = [];
162
197
  for (const adapter of this._adapters.values()) {
163
- const report = await this._probeReadiness(adapter, timeoutMs);
198
+ const report = await this._probeReadiness(adapter, timeoutMs, adbState);
164
199
  // Attach the step-by-step import guide (how to get this source's data
165
200
  // into the vault) keyed off the resolved category. Single source of
166
201
  // truth in adapter-guide.js — reused by every shell.
@@ -170,7 +205,7 @@ class AdapterRegistry {
170
205
  return reports;
171
206
  }
172
207
 
173
- async _probeReadiness(adapter, timeoutMs) {
208
+ async _probeReadiness(adapter, timeoutMs, adbState) {
174
209
  const dd = adapter.dataDisclosure || {};
175
210
  const extractMode = adapter.extractMode || "web-api";
176
211
  const base = {
@@ -240,6 +275,47 @@ class AdapterRegistry {
240
275
  }
241
276
 
242
277
  const reason = (auth && auth.reason) || "UNKNOWN";
278
+
279
+ // ADB one-click platforms (social): the adapter itself has no snapshot yet
280
+ // (NO_INPUT / INPUT_PATH_REQUIRED / DB_NOT_PULLED), but the platform CAN be
281
+ // collected in one click from a rooted phone over USB. Reflect the real
282
+ // device state instead of the misleading "采集需先在手机 App 内…".
283
+ if (
284
+ this._adbReadiness &&
285
+ this._adbReadiness.oneClickNames.has(adapter.name) &&
286
+ (reason === "NO_INPUT" || reason === "INPUT_PATH_REQUIRED" || reason === "DB_NOT_PULLED")
287
+ ) {
288
+ if (adbState && adbState.deviceConnected) {
289
+ return {
290
+ ...base,
291
+ ready: true,
292
+ status: "ready",
293
+ category: "device",
294
+ reason: null,
295
+ message: "已连接 root 手机,点「一键采集」即可拉取",
296
+ actionHint: null,
297
+ mode: "adb-oneclick",
298
+ lastSyncedAt,
299
+ lastStatus,
300
+ lastError,
301
+ };
302
+ }
303
+ const adbDesc = describeReadiness("ADB_DEVICE_NEEDED");
304
+ return {
305
+ ...base,
306
+ ready: false,
307
+ status: adbDesc.status,
308
+ category: adbDesc.category,
309
+ reason: "ADB_DEVICE_NEEDED",
310
+ message: adbDesc.message,
311
+ actionHint: adbDesc.actionHint,
312
+ mode: null,
313
+ lastSyncedAt,
314
+ lastStatus,
315
+ lastError,
316
+ };
317
+ }
318
+
243
319
  const desc = describeReadiness(reason);
244
320
  const detail = auth && (auth.message || auth.error);
245
321
  const message =
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.4.1",
3
+ "version": "0.4.3",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",