@chainlesschain/personal-data-hub 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,6 +54,10 @@ const APPMSG_SUBTYPES = {
54
54
  33: "miniprogram",
55
55
  36: "miniprogram",
56
56
  51: "channel-video",
57
+ // sjqz docs reference these higher subtype codes on newer WeChat builds —
58
+ // accept both for forward compatibility (post-Phase 12.6 audit).
59
+ 2000: "transfer",
60
+ 2001: "redpacket",
57
61
  };
58
62
 
59
63
  /**
@@ -225,10 +229,15 @@ function parseAppMsg(body) {
225
229
  url: url || null,
226
230
  };
227
231
 
228
- // Redpacket-specific
229
- if (appType === 21) {
232
+ // Redpacket-specific (accept both 21 and 2001 — see APPMSG_SUBTYPES)
233
+ if (appType === 21 || appType === 2001) {
230
234
  structured.redPacketTitle = title;
231
235
  }
236
+ // Transfer-specific
237
+ if (appType === 2000) {
238
+ structured.transferAmount =
239
+ extractTag(body, "feedesc") || extractTag(body, "pay_memo");
240
+ }
232
241
  // File-specific
233
242
  if (appType === 6) {
234
243
  structured.fileName = title;
@@ -129,27 +129,88 @@ class WeChatDBReader {
129
129
  .map((r) => r.name);
130
130
  }
131
131
 
132
+ /**
133
+ * Discover actual column names via `PRAGMA table_info(<table>)` so
134
+ * uppercase/lowercase divergence across WeChat builds doesn't blow up
135
+ * the SELECT. Returns a Map<lowercased_name, actual_name>.
136
+ *
137
+ * Post-sjqz audit defence — sjqz schema docs show some column-case
138
+ * variation across versions; failing late at SELECT yields a confusing
139
+ * "no such column" error rather than a clean fallback path.
140
+ */
141
+ _columnMap(table) {
142
+ if (!this._db) return new Map();
143
+ try {
144
+ const rows = this._db.prepare(`PRAGMA table_info(${table})`).all();
145
+ return new Map(rows.map((r) => [String(r.name).toLowerCase(), r.name]));
146
+ } catch (_e) {
147
+ return new Map();
148
+ }
149
+ }
150
+
151
+ /**
152
+ * Resolve a list of desired column names against the actual table
153
+ * schema. Returns the actual column names quoted for SQL use; throws
154
+ * if any required column is missing (caller catches and surfaces a
155
+ * "schema-mismatch" error to the host).
156
+ */
157
+ _resolveColumns(table, desiredNames, { required = true } = {}) {
158
+ const map = this._columnMap(table);
159
+ const resolved = [];
160
+ const missing = [];
161
+ for (const name of desiredNames) {
162
+ const actual = map.get(name.toLowerCase());
163
+ if (actual) resolved.push(actual);
164
+ else if (required) missing.push(name);
165
+ }
166
+ if (missing.length > 0 && required) {
167
+ const err = new Error(
168
+ `WeChatDBReader: table '${table}' missing required columns: ${missing.join(", ")} ` +
169
+ `(available: ${Array.from(map.values()).join(", ")})`,
170
+ );
171
+ err.code = "WECHAT_SCHEMA_MISMATCH";
172
+ throw err;
173
+ }
174
+ return resolved;
175
+ }
176
+
132
177
  /**
133
178
  * Fetch up to `limit` messages since `sinceMsgSvrId` (per design doc
134
179
  * §6 OQ-6 watermark = per-talker last msgSvrId). For initial v0 we
135
180
  * accept a global watermark and let the adapter post-filter per
136
181
  * talker.
182
+ *
183
+ * Column names resolved via PRAGMA table_info to survive case-drift
184
+ * across WeChat versions (sjqz audit defence).
137
185
  */
138
186
  fetchMessages({ sinceMsgSvrId = 0, limit = 1000, talker = null } = {}) {
139
187
  if (!this._db) throw new Error("WeChatDBReader: call open() first");
140
- let sql = "SELECT msgId, msgSvrId, talker, content, type, createTime, isSend, status FROM message";
188
+ const cols = this._resolveColumns("message", [
189
+ "msgId",
190
+ "msgSvrId",
191
+ "talker",
192
+ "content",
193
+ "type",
194
+ "createTime",
195
+ "isSend",
196
+ "status",
197
+ ]);
198
+ let sql = `SELECT ${cols.join(", ")} FROM message`;
141
199
  const params = [];
142
200
  const where = [];
143
201
  if (sinceMsgSvrId) {
144
- where.push("msgSvrId > ?");
202
+ // Use the resolved column name in WHERE / ORDER BY to match case.
203
+ const msgSvrIdCol = cols[1];
204
+ where.push(`${msgSvrIdCol} > ?`);
145
205
  params.push(sinceMsgSvrId);
146
206
  }
147
207
  if (talker) {
148
- where.push("talker = ?");
208
+ const talkerCol = cols[2];
209
+ where.push(`${talkerCol} = ?`);
149
210
  params.push(talker);
150
211
  }
151
212
  if (where.length > 0) sql += " WHERE " + where.join(" AND ");
152
- sql += " ORDER BY msgSvrId ASC LIMIT ?";
213
+ sql += ` ORDER BY ${cols[1]} ASC LIMIT ?`;
153
214
  params.push(limit);
154
215
  return this._db.prepare(sql).all(...params);
155
216
  }
@@ -157,14 +218,31 @@ class WeChatDBReader {
157
218
  /**
158
219
  * Fetch contacts. WeChat rcontact has many columns; we pull the ones
159
220
  * relevant for normalization.
221
+ *
222
+ * sjqz parity (wechat.py:262-263): excludes `@stranger` (unconfirmed
223
+ * friend requests) and `fake_*` (WeChat internal placeholder accounts).
224
+ * Without this filter the vault gets polluted with junk Person entities
225
+ * that never represent real contacts.
226
+ *
227
+ * @param {object} [opts]
228
+ * @param {number} [opts.limit=5000]
229
+ * @param {boolean} [opts.includeJunk=false] true to skip the
230
+ * stranger/fake filter (debug / forensic use only)
160
231
  */
161
- fetchContacts({ limit = 5000 } = {}) {
232
+ fetchContacts({ limit = 5000, includeJunk = false } = {}) {
162
233
  if (!this._db) throw new Error("WeChatDBReader: call open() first");
163
- return this._db
164
- .prepare(
165
- "SELECT username, alias, nickname, conRemark, type FROM rcontact LIMIT ?",
166
- )
167
- .all(limit);
234
+ const cols = this._resolveColumns("rcontact", [
235
+ "username",
236
+ "alias",
237
+ "nickname",
238
+ "conRemark",
239
+ "type",
240
+ ]);
241
+ const usernameCol = cols[0];
242
+ const sql = includeJunk
243
+ ? `SELECT ${cols.join(", ")} FROM rcontact LIMIT ?`
244
+ : `SELECT ${cols.join(", ")} FROM rcontact WHERE ${usernameCol} NOT LIKE '%@stranger' AND ${usernameCol} NOT LIKE 'fake_%' LIMIT ?`;
245
+ return this._db.prepare(sql).all(limit);
168
246
  }
169
247
 
170
248
  /**
@@ -53,6 +53,13 @@ function runAgentUnderMock(mocks = {}) {
53
53
  Interceptor: mocks.Interceptor,
54
54
  send: mocks.send,
55
55
  setTimeout: mocks.setTimeout || setTimeout,
56
+ // Frida injects Memory at runtime; tests that exercise the ascii-hex
57
+ // key-read path inject a mock with readCString(ptr, maxLen). Tests
58
+ // that don't touch it get a no-op stub so the agent module loads
59
+ // cleanly even when the hook itself never calls readCString.
60
+ Memory: mocks.Memory || {
61
+ readCString: () => null,
62
+ },
56
63
  };
57
64
  const ctx = vm.createContext(sandbox);
58
65
  const src = loadAgentScript();
@@ -32,7 +32,10 @@
32
32
  "use strict";
33
33
 
34
34
  (function () {
35
- var TARGET_MODULE = "libwcdb.so";
35
+ // sjqz-verified module name is `libWCDB.so` (uppercase); some WeChat
36
+ // builds ship lowercase. Try both — first match wins, no extra cost
37
+ // because Process.findModuleByName is a cheap lookup.
38
+ var TARGET_MODULES = ["libWCDB.so", "libwcdb.so"];
36
39
  // Primary symbol per §18.3. Add fallbacks below — version drift will
37
40
  // shift the export name; host treats first hit as authoritative.
38
41
  var SYMBOLS = [
@@ -60,63 +63,182 @@
60
63
  // the host detaches quickly (anti-detection §18.6 #4).
61
64
  var fired = false;
62
65
 
66
+ // Sig-aware arg index map. The host treats the first 'key' event as
67
+ // authoritative, so picking the wrong index for v2 = host gets the
68
+ // database NAME pointer (e.g. "main") and DB opens fail silently.
69
+ // sqlite3_key(sqlite3 *db, const void *pKey, int nKey)
70
+ // args[0]=db, args[1]=key, args[2]=len
71
+ // sqlite3_key_v2(sqlite3 *db, const char *zDbName, const void *pKey, int nKey)
72
+ // args[0]=db, args[1]=name, args[2]=key, args[3]=len
73
+ // wcdb_setkey / WCDBKeyDerive: unknown sig — assume sqlite3_key shape
74
+ // Mangled C++: WCDB::Database::setCipherKey(*this, const std::string&)
75
+ // args[0]=this, args[1]=&string (length needs .size()) — not handled
76
+ // here; emit error so the host falls back to MD5 path.
77
+ function argIndicesFor(symbolName) {
78
+ if (symbolName === "sqlite3_key_v2") {
79
+ return { key: 2, len: 3, sig: "v2" };
80
+ }
81
+ if (symbolName.indexOf("_ZN4WCDB") === 0) {
82
+ return { key: -1, len: -1, sig: "mangled-cpp" };
83
+ }
84
+ return { key: 1, len: 2, sig: "v1" };
85
+ }
86
+
87
+ // sjqz extract_wechat_key.py uses Memory.readCString(args[1]) for the
88
+ // key — meaning some WeChat builds pass the key as a NUL-terminated
89
+ // 64-char ASCII hex string. Other builds (and the original SQLCipher
90
+ // contract) pass 32 raw bytes. We can disambiguate by `len`:
91
+ // - len === 32 → raw 32-byte key → readByteArray + bytesToHex
92
+ // - len === 64 → ASCII hex string → readCString
93
+ // - anything else → emit error, host falls back to MD5 path
63
94
  function makeHook(symbolName) {
95
+ var idx = argIndicesFor(symbolName);
64
96
  return {
65
97
  onEnter: function (args) {
66
98
  if (fired) return;
99
+ if (idx.key < 0) {
100
+ send({
101
+ kind: "error",
102
+ message:
103
+ "unsupported symbol signature: " +
104
+ symbolName +
105
+ " — host should fall back to MD5(IMEI+UIN) key path",
106
+ });
107
+ return;
108
+ }
67
109
  try {
68
- // sqlite3_key signature: int sqlite3_key(sqlite3 *db, const void *pKey, int nKey)
69
- // args[1] = key bytes, args[2] = key length
70
- var len = args[2].toInt32();
110
+ var len = args[idx.len].toInt32();
71
111
  if (len <= 0 || len > 256) {
72
- send({ kind: "error", message: "implausible key length " + len + " at " + symbolName });
112
+ send({
113
+ kind: "error",
114
+ message:
115
+ "implausible key length " + len + " at " + symbolName,
116
+ });
117
+ return;
118
+ }
119
+ var hex;
120
+ var format;
121
+ if (len === 64) {
122
+ // ASCII hex string (sjqz-verified path on WeChat 7.x/8.0 libWCDB)
123
+ var s = Memory.readCString(args[idx.key], len);
124
+ if (!s || s.length === 0) {
125
+ send({
126
+ kind: "error",
127
+ message: "readCString returned empty at " + symbolName,
128
+ });
129
+ return;
130
+ }
131
+ hex = s.toLowerCase();
132
+ format = "ascii-hex";
133
+ } else if (len === 32) {
134
+ // Raw 32-byte key — convert to 64-char hex
135
+ var buf = args[idx.key].readByteArray(len);
136
+ hex = bytesToHex(buf);
137
+ format = "raw-bytes";
138
+ } else {
139
+ // Ambiguous length — could be either. Emit both interpretations
140
+ // and let the host try each against the DB until one succeeds.
141
+ var bufAmb = args[idx.key].readByteArray(len);
142
+ var hexFromBytes = bytesToHex(bufAmb);
143
+ var hexFromString = null;
144
+ try {
145
+ var sAmb = Memory.readCString(args[idx.key], len);
146
+ if (sAmb) hexFromString = sAmb.toLowerCase();
147
+ } catch (_e) {
148
+ // readCString may fault on non-NUL-terminated bytes; ignore.
149
+ }
150
+ fired = true;
151
+ send({
152
+ kind: "key",
153
+ hex: hexFromBytes,
154
+ alt: hexFromString,
155
+ source: symbolName,
156
+ sig: idx.sig,
157
+ format: "ambiguous",
158
+ length: len,
159
+ });
73
160
  return;
74
161
  }
75
- var buf = args[1].readByteArray(len);
76
- var hex = bytesToHex(buf);
77
162
  if (!hex) {
78
- send({ kind: "error", message: "empty key buffer at " + symbolName });
163
+ send({
164
+ kind: "error",
165
+ message: "empty key buffer at " + symbolName,
166
+ });
79
167
  return;
80
168
  }
81
169
  fired = true;
82
- send({ kind: "key", hex: hex, source: symbolName });
170
+ send({
171
+ kind: "key",
172
+ hex: hex,
173
+ source: symbolName,
174
+ sig: idx.sig,
175
+ format: format,
176
+ length: len,
177
+ });
83
178
  } catch (e) {
84
- send({ kind: "error", message: "hook exception at " + symbolName + ": " + (e && e.message ? e.message : String(e)) });
179
+ send({
180
+ kind: "error",
181
+ message:
182
+ "hook exception at " +
183
+ symbolName +
184
+ ": " +
185
+ (e && e.message ? e.message : String(e)),
186
+ });
85
187
  }
86
188
  },
87
189
  };
88
190
  }
89
191
 
90
- function tryAttach() {
91
- var mod = Process.findModuleByName(TARGET_MODULE);
192
+ function tryAttachOnModule(moduleName) {
193
+ var mod = Process.findModuleByName(moduleName);
92
194
  if (!mod) return false;
93
195
  var attached = 0;
94
196
  for (var i = 0; i < SYMBOLS.length; i++) {
95
- var addr = Module.findExportByName(TARGET_MODULE, SYMBOLS[i]);
197
+ var addr = Module.findExportByName(moduleName, SYMBOLS[i]);
96
198
  if (!addr) continue;
97
199
  try {
98
200
  Interceptor.attach(addr, makeHook(SYMBOLS[i]));
99
- send({ kind: "hooked", symbol: SYMBOLS[i], module: TARGET_MODULE });
201
+ send({ kind: "hooked", symbol: SYMBOLS[i], module: moduleName });
100
202
  attached++;
101
203
  } catch (e) {
102
- send({ kind: "error", message: "Interceptor.attach failed for " + SYMBOLS[i] + ": " + (e && e.message ? e.message : String(e)) });
204
+ send({
205
+ kind: "error",
206
+ message:
207
+ "Interceptor.attach failed for " +
208
+ SYMBOLS[i] +
209
+ ": " +
210
+ (e && e.message ? e.message : String(e)),
211
+ });
103
212
  }
104
213
  }
105
214
  return attached > 0;
106
215
  }
107
216
 
217
+ function tryAttach() {
218
+ for (var i = 0; i < TARGET_MODULES.length; i++) {
219
+ if (tryAttachOnModule(TARGET_MODULES[i])) {
220
+ return true;
221
+ }
222
+ }
223
+ return false;
224
+ }
225
+
108
226
  // Module-load polling — §18.6 #1 "hook at module-load time before
109
- // anti-detection thread runs". WeChat lazy-loads libwcdb when the
227
+ // anti-detection thread runs". WeChat lazy-loads libWCDB when the
110
228
  // first DB opens, so we can't always find it at script start.
111
229
  if (!tryAttach()) {
112
- send({ kind: "module-waiting", module: TARGET_MODULE });
230
+ send({ kind: "module-waiting", module: TARGET_MODULES.join("|") });
113
231
  var attempts = 0;
114
232
  var poll = function () {
115
233
  attempts++;
116
234
  if (tryAttach()) return;
117
235
  if (attempts >= 60) {
118
236
  // 60 attempts × 500ms = 30s ceiling, matches host timeoutMs
119
- send({ kind: "error", message: TARGET_MODULE + " did not load within 30s" });
237
+ send({
238
+ kind: "error",
239
+ message:
240
+ TARGET_MODULES.join("|") + " did not load within 30s",
241
+ });
120
242
  return;
121
243
  }
122
244
  setTimeout(poll, 500);
@@ -191,6 +191,14 @@ class FridaKeyProvider extends KeyProvider {
191
191
  if (evt.kind === "key") {
192
192
  settled = true;
193
193
  telemetry.keySource = evt.source;
194
+ // Phase 12.6 (post-sjqz audit) — capture sig/format/length so a
195
+ // failed DB open can be diagnosed: ascii-hex vs raw-bytes
196
+ // determines whether sqlite3_key got the expected key bytes,
197
+ // and sig=v1/v2 confirms args index resolution.
198
+ telemetry.keyFormat = evt.format || null;
199
+ telemetry.keySig = evt.sig || null;
200
+ telemetry.keyLength = evt.length || null;
201
+ telemetry.keyAlt = evt.alt || null;
194
202
  telemetry.durationMs = Date.now() - telemetry.startedAt;
195
203
  cleanup().then(() => resolve(String(evt.hex || "").toLowerCase()));
196
204
  return;
@@ -203,11 +203,20 @@ function contactDisplayName(byUsername, wxid) {
203
203
  function guessContactSubtype(row) {
204
204
  // rcontact.type bits: official accounts / group / regular contact /
205
205
  // black list. Detailed mapping in WeChat reverse-eng community —
206
- // for v0.5 we keep it simple: anything that's not the user's self is
207
- // "contact". Phase 12.6 will refine with full bit mapping.
208
- if (typeof row.username === "string" && row.username.endsWith("@chatroom")) {
206
+ // for v0.5 we keep it simple: chatroom unknown (not a Person),
207
+ // `gh_*` username merchant (公众号 / Official Account — brand /
208
+ // business pushing content; closest enum match), rest → contact.
209
+ // Phase 12.6 will refine with full bit mapping + rcontact.type bits.
210
+ // (sjqz parity wechat.py:282 — get_friends() excludes gh_* from
211
+ // friends view but keeps them in contacts; we keep as Person with
212
+ // distinct subtype so Ask flow / EntityResolver can filter cleanly.)
213
+ if (typeof row.username !== "string") return "contact";
214
+ if (row.username.endsWith("@chatroom")) {
209
215
  return "unknown"; // chat group, not a Person
210
216
  }
217
+ if (row.username.startsWith("gh_")) {
218
+ return "merchant"; // 公众号 / Official Account
219
+ }
211
220
  return "contact";
212
221
  }
213
222
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",