@chainlesschain/personal-data-hub 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/apple-health.test.js +95 -0
- package/__tests__/adapters/email-templates.test.js +123 -0
- package/__tests__/adapters/family-23-collectors-scaffold.test.js +178 -0
- package/__tests__/adapters/game-genshin-scaffold.test.js +107 -0
- package/__tests__/adapters/git-activity.test.js +7 -1
- package/__tests__/adapters/local-im-pc.test.js +149 -0
- package/__tests__/adapters/netease-music.test.js +74 -0
- package/__tests__/adapters/qq-pc-direct-read.test.js +186 -0
- package/__tests__/adapters/system-data-adapter.test.js +4 -1
- package/__tests__/adapters/wechat-pc-direct-read.test.js +207 -0
- package/__tests__/adapters/weread.test.js +123 -0
- package/__tests__/analysis.test.js +120 -15
- package/__tests__/mobile-extractor-encrypted.test.js +460 -0
- package/__tests__/prompt-builder.test.js +47 -2
- package/__tests__/registry-readiness.test.js +233 -0
- package/__tests__/social-douyin-im-direct-read.test.js +311 -0
- package/__tests__/social-douyin-snapshot.test.js +5 -2
- package/__tests__/vault.test.js +99 -0
- package/lib/adapter-guide.js +520 -0
- package/lib/adapter-readiness.js +257 -0
- package/lib/adapters/_local-im-db-reader.js +218 -0
- package/lib/adapters/_local-im-pc-adapter.js +162 -0
- package/lib/adapters/apple-health/index.js +329 -0
- package/lib/adapters/dingtalk-pc/index.js +29 -0
- package/lib/adapters/edu-huawei-learning/api-client.js +47 -0
- package/lib/adapters/edu-huawei-learning/index.js +255 -0
- package/lib/adapters/edu-zuoyebang/api-client.js +48 -0
- package/lib/adapters/edu-zuoyebang/index.js +259 -0
- package/lib/adapters/email-imap/email-adapter.js +16 -0
- package/lib/adapters/email-imap/templates/bill.js +174 -18
- package/lib/adapters/feishu-pc/index.js +29 -0
- package/lib/adapters/finance-alipay/api-client.js +48 -0
- package/lib/adapters/finance-alipay/index.js +257 -0
- package/lib/adapters/game-genshin/api-client.js +59 -0
- package/lib/adapters/game-genshin/index.js +274 -0
- package/lib/adapters/game-honor-of-kings/api-client.js +54 -0
- package/lib/adapters/game-honor-of-kings/index.js +259 -0
- package/lib/adapters/netease-music/index.js +227 -0
- package/lib/adapters/qq-pc/index.js +200 -0
- package/lib/adapters/qq-pc/nt-db-reader.js +210 -0
- package/lib/adapters/social-douyin/index.js +194 -1
- package/lib/adapters/wechat/wechat-adapter.js +7 -1
- package/lib/adapters/wechat-pc/index.js +335 -0
- package/lib/adapters/wechat-pc/pc-db-reader.js +327 -0
- package/lib/adapters/weread/api-client.js +128 -0
- package/lib/adapters/weread/index.js +337 -0
- package/lib/analysis.js +65 -0
- package/lib/index.js +39 -0
- package/lib/mobile-extractor/bplist.js +233 -0
- package/lib/mobile-extractor/ios-backup-crypto.js +315 -0
- package/lib/mobile-extractor/ios.js +131 -16
- package/lib/prompt-builder.js +19 -1
- package/lib/registry.js +170 -0
- package/lib/vault.js +105 -0
- package/package.json +1 -1
- package/scripts/run-native-tests-sandbox.sh +2 -0
- package/vitest.config.js +79 -1
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* QQ NT **desktop (PC)** local-DB reader — 本地直读样板, ported from the
|
|
5
|
+
* Douyin / wechat-pc samples to QQ NT (新版 PC QQ, Electron 9.x).
|
|
6
|
+
*
|
|
7
|
+
* Source: `nt_msg.db` under the QQ data dir (e.g.
|
|
8
|
+
* %APPDATA%\Tencent\QQ\nt_qq_<hash>\nt_db\nt_msg.db).
|
|
9
|
+
* Message tables: `c2c_msg_table` (1-on-1) + `group_msg_table` (群).
|
|
10
|
+
*
|
|
11
|
+
* ⚠️ HONEST CAVEAT (v0.1): unlike Douyin's plaintext im.db, QQ NT is the
|
|
12
|
+
* hardest of the three:
|
|
13
|
+
* - SQLCipher-encrypted (decrypt to plaintext first — recommended).
|
|
14
|
+
* - Column names are NUMERIC + obfuscated ("40050", "40011", ...) and
|
|
15
|
+
* DRIFT across QQ versions.
|
|
16
|
+
* - Message bodies are protobuf BLOBs, not plain text — full text needs
|
|
17
|
+
* per-type protobuf decoding that must be tuned on a real DB.
|
|
18
|
+
*
|
|
19
|
+
* So this reader is deliberately DEFENSIVE + LOUD rather than clever:
|
|
20
|
+
* - It probes a table's columns and resolves time/type/sender/peer/content
|
|
21
|
+
* against candidate lists (readable names FIRST, then known numeric ids).
|
|
22
|
+
* - It extracts text ONLY when the resolved content column is a real
|
|
23
|
+
* string; otherwise text=null but the FULL raw row is preserved in
|
|
24
|
+
* `rawRow` so nothing is lost and a later protobuf decoder can fill it.
|
|
25
|
+
* - The diagnostic reports exactly which tables + columns were resolved,
|
|
26
|
+
* so the user/UI sees what worked instead of silently getting 0 rows.
|
|
27
|
+
*
|
|
28
|
+
* Test seam: inject `_databaseClass` to bypass the native dual-load.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
function loadDatabaseClass() {
|
|
32
|
+
for (const mod of ["better-sqlite3-multiple-ciphers", "better-sqlite3"]) {
|
|
33
|
+
let cls;
|
|
34
|
+
try {
|
|
35
|
+
// eslint-disable-next-line global-require
|
|
36
|
+
cls = require(mod);
|
|
37
|
+
} catch (_e) {
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
try {
|
|
41
|
+
const probe = new cls(":memory:");
|
|
42
|
+
probe.close();
|
|
43
|
+
return cls;
|
|
44
|
+
} catch (_e) {
|
|
45
|
+
// ABI mismatch — try next
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
throw new Error(
|
|
49
|
+
"qq-pc-nt-db-reader: neither better-sqlite3-multiple-ciphers nor better-sqlite3 loaded — both ABI-mismatched",
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function trySelect(db, sql) {
|
|
54
|
+
try {
|
|
55
|
+
return db.prepare(sql).all();
|
|
56
|
+
} catch (_e) {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function pickCol(columns, candidates) {
|
|
62
|
+
for (const c of candidates) {
|
|
63
|
+
if (columns.has(c)) return c;
|
|
64
|
+
}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function normalizeEpochMs(v) {
|
|
69
|
+
if (typeof v !== "number" || !Number.isFinite(v) || v <= 0) return null;
|
|
70
|
+
if (v > 1e15) return Math.floor(v / 1000); // µs
|
|
71
|
+
if (v > 1e12) return Math.floor(v); // ms
|
|
72
|
+
return Math.floor(v * 1000); // seconds
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Candidate column names per field — readable names first (decrypted /
|
|
76
|
+
// re-exported DBs sometimes have them), then the known QQ NT numeric ids.
|
|
77
|
+
// Tune the numeric lists on a real device; unknowns just fall through to
|
|
78
|
+
// rawRow (loud diagnostic), never a silent 0.
|
|
79
|
+
const COL_CANDIDATES = Object.freeze({
|
|
80
|
+
msgId: ["msgId", "msg_id", "40001", "40020"],
|
|
81
|
+
time: ["msgTime", "time", "timestamp", "40050"],
|
|
82
|
+
type: ["msgType", "type", "40011", "40012"],
|
|
83
|
+
sender: ["senderUin", "sender", "senderUid", "40033", "40030"],
|
|
84
|
+
peer: ["peerUin", "peer", "peerUid", "40021", "40027"],
|
|
85
|
+
content: ["content", "text", "msgContent", "40080", "40800"],
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Open a QQ NT DB (plaintext OR SQLCipher-with-key). Mirrors wechat-pc.
|
|
90
|
+
*/
|
|
91
|
+
function openNtDb(dbPath, opts = {}) {
|
|
92
|
+
const Database = opts._databaseClass || loadDatabaseClass();
|
|
93
|
+
const key = typeof opts.key === "string" && opts.key.length > 0 ? opts.key : null;
|
|
94
|
+
if (!key) {
|
|
95
|
+
const db = new Database(dbPath, { readonly: true });
|
|
96
|
+
try {
|
|
97
|
+
db.prepare("SELECT count(*) AS n FROM sqlite_master").get();
|
|
98
|
+
return { db, mode: "plaintext" };
|
|
99
|
+
} catch (err) {
|
|
100
|
+
try { db.close(); } catch (_e) { /* ignore */ }
|
|
101
|
+
const e = new Error(
|
|
102
|
+
`qq-pc-nt-db-reader: db is not plaintext SQLite (decrypt nt_msg.db first, or pass --key): ${err.message}`,
|
|
103
|
+
);
|
|
104
|
+
e.code = "QQ_PC_NEEDS_KEY";
|
|
105
|
+
throw e;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
const keyExpr = /^[0-9a-fA-F]{64}$/.test(key) ? `"x'${key}'"` : `'${key}'`;
|
|
109
|
+
const db = new Database(dbPath, { readonly: true });
|
|
110
|
+
try {
|
|
111
|
+
db.pragma(`key = ${keyExpr}`);
|
|
112
|
+
db.exec("PRAGMA cipher_compatibility = 4");
|
|
113
|
+
db.prepare("SELECT count(*) AS n FROM sqlite_master").get();
|
|
114
|
+
return { db, mode: "sqlcipher" };
|
|
115
|
+
} catch (err) {
|
|
116
|
+
try { db.close(); } catch (_e) { /* ignore */ }
|
|
117
|
+
const e = new Error(
|
|
118
|
+
`qq-pc-nt-db-reader: SQLCipher open failed (key wrong, or decrypt to plaintext first): ${err.message}`,
|
|
119
|
+
);
|
|
120
|
+
e.code = "QQ_PC_DECRYPT_FAILED";
|
|
121
|
+
throw e;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function readMsgTable(db, tableName, isGroup, limit, diag) {
|
|
126
|
+
const info = trySelect(db, `PRAGMA table_info(${tableName})`);
|
|
127
|
+
if (!Array.isArray(info) || info.length === 0) return [];
|
|
128
|
+
const cols = new Set(info.map((r) => r.name));
|
|
129
|
+
const resolved = {
|
|
130
|
+
msgId: pickCol(cols, COL_CANDIDATES.msgId),
|
|
131
|
+
time: pickCol(cols, COL_CANDIDATES.time),
|
|
132
|
+
type: pickCol(cols, COL_CANDIDATES.type),
|
|
133
|
+
sender: pickCol(cols, COL_CANDIDATES.sender),
|
|
134
|
+
peer: pickCol(cols, COL_CANDIDATES.peer),
|
|
135
|
+
content: pickCol(cols, COL_CANDIDATES.content),
|
|
136
|
+
};
|
|
137
|
+
diag.resolvedColumns[tableName] = resolved;
|
|
138
|
+
// Select ALL columns so the full raw row is preserved (protobuf bodies,
|
|
139
|
+
// unknown columns) — we map the resolved fields on top of it.
|
|
140
|
+
const orderBy = resolved.time ? ` ORDER BY "${resolved.time}" DESC` : "";
|
|
141
|
+
const rows =
|
|
142
|
+
trySelect(db, `SELECT * FROM ${tableName}${orderBy} LIMIT ${limit}`) || [];
|
|
143
|
+
return rows.map((row, idx) => {
|
|
144
|
+
const rawTime = resolved.time ? row[resolved.time] : null;
|
|
145
|
+
const contentVal = resolved.content ? row[resolved.content] : null;
|
|
146
|
+
return {
|
|
147
|
+
msgId:
|
|
148
|
+
(resolved.msgId && row[resolved.msgId] != null && String(row[resolved.msgId])) ||
|
|
149
|
+
`${tableName}-${idx}`,
|
|
150
|
+
isGroup,
|
|
151
|
+
createdTimeMs:
|
|
152
|
+
typeof rawTime === "number" ? normalizeEpochMs(rawTime) : null,
|
|
153
|
+
type:
|
|
154
|
+
resolved.type && typeof row[resolved.type] === "number"
|
|
155
|
+
? row[resolved.type]
|
|
156
|
+
: null,
|
|
157
|
+
senderUin: resolved.sender && row[resolved.sender] != null ? String(row[resolved.sender]) : null,
|
|
158
|
+
peerUin: resolved.peer && row[resolved.peer] != null ? String(row[resolved.peer]) : null,
|
|
159
|
+
// Only treat content as text when it's a real string (not a BLOB).
|
|
160
|
+
text: typeof contentVal === "string" ? contentVal : null,
|
|
161
|
+
// Preserve the full raw row so a later protobuf decoder loses nothing.
|
|
162
|
+
rawRow: row,
|
|
163
|
+
};
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Read messages out of a QQ NT nt_msg.db. Returns `{messages, diagnostic}`.
|
|
169
|
+
* Reads c2c_msg_table + group_msg_table (whichever exist).
|
|
170
|
+
*/
|
|
171
|
+
function readQqNt(dbPath, opts = {}) {
|
|
172
|
+
if (typeof dbPath !== "string" || dbPath.length === 0) {
|
|
173
|
+
throw new TypeError("readQqNt: dbPath must be a non-empty string");
|
|
174
|
+
}
|
|
175
|
+
const limit =
|
|
176
|
+
Number.isInteger(opts.limitMessages) && opts.limitMessages > 0
|
|
177
|
+
? opts.limitMessages
|
|
178
|
+
: 20_000;
|
|
179
|
+
const { db, mode } = openNtDb(dbPath, opts);
|
|
180
|
+
const diagnostic = {
|
|
181
|
+
messageCount: 0,
|
|
182
|
+
hadC2cTable: false,
|
|
183
|
+
hadGroupTable: false,
|
|
184
|
+
textCount: 0,
|
|
185
|
+
resolvedColumns: {},
|
|
186
|
+
mode,
|
|
187
|
+
};
|
|
188
|
+
const messages = [];
|
|
189
|
+
try {
|
|
190
|
+
const c2c = readMsgTable(db, "c2c_msg_table", false, limit, diagnostic);
|
|
191
|
+
if (diagnostic.resolvedColumns.c2c_msg_table) diagnostic.hadC2cTable = true;
|
|
192
|
+
const group = readMsgTable(db, "group_msg_table", true, limit, diagnostic);
|
|
193
|
+
if (diagnostic.resolvedColumns.group_msg_table) diagnostic.hadGroupTable = true;
|
|
194
|
+
for (const m of [...c2c, ...group]) {
|
|
195
|
+
messages.push(m);
|
|
196
|
+
if (typeof m.text === "string" && m.text.length > 0) diagnostic.textCount += 1;
|
|
197
|
+
}
|
|
198
|
+
diagnostic.messageCount = messages.length;
|
|
199
|
+
} finally {
|
|
200
|
+
try { db.close(); } catch (_e) { /* ignore */ }
|
|
201
|
+
}
|
|
202
|
+
return { messages, diagnostic };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
module.exports = {
|
|
206
|
+
readQqNt,
|
|
207
|
+
openNtDb,
|
|
208
|
+
COL_CANDIDATES,
|
|
209
|
+
_internals: { loadDatabaseClass, normalizeEpochMs, pickCol, readMsgTable },
|
|
210
|
+
};
|
|
@@ -185,7 +185,27 @@ class DouyinAdapter {
|
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
async *sync(opts = {}) {
|
|
188
|
+
// ── 本地直读样板 (local direct-read) ─────────────────────────────────
|
|
189
|
+
// The most reliable Douyin path: point at a locally-present
|
|
190
|
+
// `<uid>_im.db` (pulled from a rooted device or copied off the phone)
|
|
191
|
+
// and read 私信 + 联系人 straight out of the plaintext SQLite — no ADB
|
|
192
|
+
// orchestration, no snapshot-JSON round trip, no X-Bogus signing.
|
|
193
|
+
//
|
|
194
|
+
// Routing:
|
|
195
|
+
// 1. opts.imDbPath — explicit IM-db path → direct read
|
|
196
|
+
// 2. opts.inputPath that sniffs as a SQLite file → direct read
|
|
197
|
+
// (so `cc hub sync-adapter social-douyin --input <uid>_im.db`
|
|
198
|
+
// just works); a non-SQLite inputPath is a snapshot JSON.
|
|
199
|
+
// 3. opts.dbPath (legacy) — Phase 13.3 video-history tables.
|
|
200
|
+
if (typeof opts.imDbPath === "string" && opts.imDbPath.length > 0) {
|
|
201
|
+
yield* this._syncViaImDb({ ...opts, dbPath: opts.imDbPath });
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
188
204
|
if (typeof opts.inputPath === "string" && opts.inputPath.length > 0) {
|
|
205
|
+
if (this._looksLikeSqlite(opts.inputPath)) {
|
|
206
|
+
yield* this._syncViaImDb({ ...opts, dbPath: opts.inputPath });
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
189
209
|
yield* this._syncViaSnapshot(opts);
|
|
190
210
|
return;
|
|
191
211
|
}
|
|
@@ -195,10 +215,107 @@ class DouyinAdapter {
|
|
|
195
215
|
return;
|
|
196
216
|
}
|
|
197
217
|
throw new Error(
|
|
198
|
-
"social-douyin.sync: needs opts.inputPath (
|
|
218
|
+
"social-douyin.sync: needs opts.imDbPath / opts.inputPath (<uid>_im.db or snapshot JSON) OR opts.dbPath (legacy video-history sqlite)",
|
|
199
219
|
);
|
|
200
220
|
}
|
|
201
221
|
|
|
222
|
+
/**
|
|
223
|
+
* Cheap SQLite-file sniff via the 16-byte magic header
|
|
224
|
+
* ("SQLite format 3\0"). Lets sync() auto-route a `--input <uid>_im.db`
|
|
225
|
+
* to direct IM read vs treating a `.json` snapshot as SQLite. Returns
|
|
226
|
+
* false on any read error (caller falls back to snapshot path).
|
|
227
|
+
*/
|
|
228
|
+
_looksLikeSqlite(filePath) {
|
|
229
|
+
try {
|
|
230
|
+
const fd = this._deps.fs.openSync(filePath, "r");
|
|
231
|
+
try {
|
|
232
|
+
const buf = Buffer.alloc(16);
|
|
233
|
+
this._deps.fs.readSync(fd, buf, 0, 16, 0);
|
|
234
|
+
return buf.toString("latin1").startsWith("SQLite format 3");
|
|
235
|
+
} finally {
|
|
236
|
+
this._deps.fs.closeSync(fd);
|
|
237
|
+
}
|
|
238
|
+
} catch (_e) {
|
|
239
|
+
return false;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* 本地直读 <uid>_im.db — open the plaintext SQLite directly and yield
|
|
245
|
+
* message + contact raw events. Reuses social-douyin-adb/im-db-parser so
|
|
246
|
+
* the defensive column-picker / epoch-normalize / content-JSON logic stays
|
|
247
|
+
* byte-identical with the ADB-pull path (single source of truth). Emits
|
|
248
|
+
* the SAME composite originalIds as the snapshot path so re-syncing the
|
|
249
|
+
* same db (via either route) is idempotent.
|
|
250
|
+
*/
|
|
251
|
+
async *_syncViaImDb(opts) {
|
|
252
|
+
const dbPath = opts.dbPath;
|
|
253
|
+
if (!dbPath || !this._deps.fs.existsSync(dbPath)) return;
|
|
254
|
+
// eslint-disable-next-line global-require
|
|
255
|
+
const { parseImDb } = require("../social-douyin-adb/im-db-parser");
|
|
256
|
+
const parseOpts = {};
|
|
257
|
+
if (Number.isInteger(opts.limitMessages)) parseOpts.limitMessages = opts.limitMessages;
|
|
258
|
+
if (Number.isInteger(opts.limitContacts)) parseOpts.limitContacts = opts.limitContacts;
|
|
259
|
+
if (this._deps.dbDriverFactory) parseOpts._databaseClass = this._deps.dbDriverFactory();
|
|
260
|
+
|
|
261
|
+
const { messages, contacts, diagnostic } = parseImDb(dbPath, parseOpts);
|
|
262
|
+
if (typeof opts.onProgress === "function") {
|
|
263
|
+
try {
|
|
264
|
+
opts.onProgress({ phase: "im-db-parsed", adapter: NAME, ...diagnostic });
|
|
265
|
+
} catch (_e) { /* progress is best-effort */ }
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const include = opts.include || {};
|
|
269
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
|
|
270
|
+
const fallbackCapturedAt = Date.now();
|
|
271
|
+
let emitted = 0;
|
|
272
|
+
|
|
273
|
+
if (include[KIND_MESSAGE] !== false) {
|
|
274
|
+
for (const m of messages) {
|
|
275
|
+
if (emitted >= limit) return;
|
|
276
|
+
if (!m || typeof m !== "object") continue;
|
|
277
|
+
// Mirror snapshot-builder's composite-id strategy verbatim so the
|
|
278
|
+
// direct-read and snapshot routes converge on one originalId.
|
|
279
|
+
const idPart =
|
|
280
|
+
m.conversationId && m.createdTimeMs
|
|
281
|
+
? `${m.conversationId}-${m.createdTimeMs}`
|
|
282
|
+
: m.senderUid && m.createdTimeMs
|
|
283
|
+
? `${m.senderUid}-${m.createdTimeMs}`
|
|
284
|
+
: `msg-${emitted}`;
|
|
285
|
+
const capturedAt =
|
|
286
|
+
typeof m.createdTimeMs === "number" && m.createdTimeMs > 0
|
|
287
|
+
? m.createdTimeMs
|
|
288
|
+
: fallbackCapturedAt;
|
|
289
|
+
yield {
|
|
290
|
+
adapter: NAME,
|
|
291
|
+
kind: KIND_MESSAGE,
|
|
292
|
+
originalId: stableOriginalId(KIND_MESSAGE, `msg-${idPart}`),
|
|
293
|
+
capturedAt,
|
|
294
|
+
payload: { kind: KIND_MESSAGE, ...m },
|
|
295
|
+
};
|
|
296
|
+
emitted += 1;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (include[KIND_CONTACT] !== false) {
|
|
301
|
+
for (const c of contacts) {
|
|
302
|
+
if (emitted >= limit) return;
|
|
303
|
+
if (!c || typeof c !== "object") continue;
|
|
304
|
+
yield {
|
|
305
|
+
adapter: NAME,
|
|
306
|
+
kind: KIND_CONTACT,
|
|
307
|
+
originalId: stableOriginalId(
|
|
308
|
+
KIND_CONTACT,
|
|
309
|
+
c.uid ? `contact-${c.uid}` : `contact-${emitted}`,
|
|
310
|
+
),
|
|
311
|
+
capturedAt: fallbackCapturedAt,
|
|
312
|
+
payload: { kind: KIND_CONTACT, ...c },
|
|
313
|
+
};
|
|
314
|
+
emitted += 1;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
202
319
|
async *_syncViaSnapshot(opts) {
|
|
203
320
|
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
204
321
|
const snapshot = JSON.parse(raw);
|
|
@@ -331,6 +448,12 @@ class DouyinAdapter {
|
|
|
331
448
|
if (kind === KIND_SEARCH) {
|
|
332
449
|
return normalizeSearch(p, raw, ingestedAt);
|
|
333
450
|
}
|
|
451
|
+
if (kind === KIND_MESSAGE) {
|
|
452
|
+
return normalizeMessage(p, raw, ingestedAt);
|
|
453
|
+
}
|
|
454
|
+
if (kind === KIND_CONTACT) {
|
|
455
|
+
return normalizeContact(p, raw, ingestedAt);
|
|
456
|
+
}
|
|
334
457
|
throw new Error(`DouyinAdapter.normalize: unknown kind ${kind}`);
|
|
335
458
|
}
|
|
336
459
|
}
|
|
@@ -502,6 +625,76 @@ function normalizeSearch(p, raw, ingestedAt) {
|
|
|
502
625
|
};
|
|
503
626
|
}
|
|
504
627
|
|
|
628
|
+
function normalizeMessage(p, raw, ingestedAt) {
|
|
629
|
+
// IM private message from <uid>_im.db (snapshot or 本地直读). Becomes one
|
|
630
|
+
// MESSAGE event. We don't reliably know the self uid here, so actor stays
|
|
631
|
+
// person-self and the real sender is preserved in extra.senderUid (the
|
|
632
|
+
// consumer / EntityResolver can correlate it to a contact person).
|
|
633
|
+
const occurredAt =
|
|
634
|
+
parseTime(p.createdTimeMs) || raw.capturedAt || ingestedAt;
|
|
635
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
636
|
+
const text = typeof p.text === "string" ? p.text : "";
|
|
637
|
+
return {
|
|
638
|
+
events: [{
|
|
639
|
+
id: newId(),
|
|
640
|
+
type: ENTITY_TYPES.EVENT,
|
|
641
|
+
subtype: EVENT_SUBTYPES.MESSAGE,
|
|
642
|
+
occurredAt,
|
|
643
|
+
actor: "person-self",
|
|
644
|
+
content: {
|
|
645
|
+
title: text ? text.slice(0, 80) : "(非文本消息)",
|
|
646
|
+
text,
|
|
647
|
+
},
|
|
648
|
+
ingestedAt,
|
|
649
|
+
source,
|
|
650
|
+
extra: {
|
|
651
|
+
platform: "douyin",
|
|
652
|
+
channel: "im",
|
|
653
|
+
senderUid: p.senderUid || null,
|
|
654
|
+
conversationId: p.conversationId || null,
|
|
655
|
+
readStatus: typeof p.readStatus === "number" ? p.readStatus : null,
|
|
656
|
+
// Preserve the raw content blob for non-text message types (stickers
|
|
657
|
+
// / voice / video) so a richer consumer can decode them later.
|
|
658
|
+
contentBlob: typeof p.contentBlob === "string" ? p.contentBlob : null,
|
|
659
|
+
},
|
|
660
|
+
}],
|
|
661
|
+
persons: [], places: [], items: [], topics: [],
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
function normalizeContact(p, raw, ingestedAt) {
|
|
666
|
+
// SIMPLE_USER row from <uid>_im.db → a contact Person. SIMPLE_USER has no
|
|
667
|
+
// per-row timestamp, so occurredAt falls back to capturedAt.
|
|
668
|
+
const uid =
|
|
669
|
+
(typeof p.uid === "string" && p.uid) ||
|
|
670
|
+
(typeof p.uid === "number" && String(p.uid)) ||
|
|
671
|
+
null;
|
|
672
|
+
const occurredAt = raw.capturedAt || ingestedAt;
|
|
673
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
674
|
+
const identifiers = {};
|
|
675
|
+
if (uid) identifiers["douyin-uid"] = [uid];
|
|
676
|
+
if (p.shortId) identifiers["douyin-short-id"] = [String(p.shortId)];
|
|
677
|
+
return {
|
|
678
|
+
events: [],
|
|
679
|
+
persons: [{
|
|
680
|
+
id: uid ? `person-douyin-${uid}` : `person-douyin-${newId()}`,
|
|
681
|
+
type: ENTITY_TYPES.PERSON,
|
|
682
|
+
subtype: PERSON_SUBTYPES.CONTACT,
|
|
683
|
+
names: [p.name || "(unnamed)"],
|
|
684
|
+
ingestedAt,
|
|
685
|
+
source,
|
|
686
|
+
identifiers,
|
|
687
|
+
extra: {
|
|
688
|
+
platform: "douyin",
|
|
689
|
+
avatarUrl: p.avatarUrl || null,
|
|
690
|
+
// 0/1/2 = none / following / mutual (Douyin follow_status)
|
|
691
|
+
followStatus: typeof p.followStatus === "number" ? p.followStatus : null,
|
|
692
|
+
},
|
|
693
|
+
}],
|
|
694
|
+
places: [], items: [], topics: [],
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
|
|
505
698
|
module.exports = {
|
|
506
699
|
DouyinAdapter,
|
|
507
700
|
NAME,
|
|
@@ -72,7 +72,7 @@ class WechatAdapter {
|
|
|
72
72
|
};
|
|
73
73
|
}
|
|
74
74
|
|
|
75
|
-
async authenticate() {
|
|
75
|
+
async authenticate(ctx = {}) {
|
|
76
76
|
// No server auth; sanity check the on-disk state.
|
|
77
77
|
if (!this._dbPath || !fs.existsSync(this._dbPath)) {
|
|
78
78
|
return { ok: false, reason: "DB_NOT_PULLED", error: `DB path missing: ${this._dbPath}` };
|
|
@@ -80,6 +80,12 @@ class WechatAdapter {
|
|
|
80
80
|
if (!this._keyProvider || typeof this._keyProvider.getKey !== "function") {
|
|
81
81
|
return { ok: false, reason: "NO_KEY_PROVIDER", error: "keyProvider required" };
|
|
82
82
|
}
|
|
83
|
+
// Readiness probe — DB + key provider present means "configured". Do NOT
|
|
84
|
+
// invoke the (possibly frida-backed, expensive/side-effectful) key
|
|
85
|
+
// provider during a readiness check; the real sync exercises it.
|
|
86
|
+
if (ctx && ctx.readinessOnly) {
|
|
87
|
+
return { ok: true, mode: "configured" };
|
|
88
|
+
}
|
|
83
89
|
try {
|
|
84
90
|
const key = await this._keyProvider.getKey();
|
|
85
91
|
if (!key) return { ok: false, reason: "EMPTY_KEY", error: "keyProvider returned empty key" };
|