@chainlesschain/personal-data-hub 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/messaging-whatsapp.test.js +289 -0
- package/__tests__/adapters/qq-pc-direct-read.test.js +36 -0
- package/__tests__/adapters/shopping-base.test.js +179 -0
- package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +64 -0
- package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +11 -0
- package/__tests__/adapters/social-xiaohongshu-adb-api-client.test.js +431 -0
- package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
- package/__tests__/adapters/social-xiaohongshu-adb-snapshot-builder.test.js +200 -0
- package/__tests__/adapters/travel-12306.test.js +279 -0
- package/__tests__/adapters/travel-amap.test.js +219 -0
- package/__tests__/adapters/travel-baidu-map.test.js +305 -0
- package/__tests__/adapters/travel-base.test.js +205 -0
- package/__tests__/adapters/travel-ctrip.test.js +203 -0
- package/__tests__/adapters/travel-tencent-map.test.js +207 -0
- package/lib/adapter-guide.js +11 -9
- package/lib/adapters/qq-pc/index.js +72 -1
- package/lib/adapters/qq-pc/qqnt-sidecar.js +109 -0
- package/lib/adapters/social-kuaishou/index.js +7 -2
- package/lib/adapters/social-kuaishou-adb/api-client.js +38 -18
- package/lib/adapters/social-kuaishou-adb/cookies-extension.js +16 -15
- package/lib/adapters/social-toutiao/index.js +8 -4
- package/lib/adapters/travel-base/index.js +9 -2
- package/package.json +1 -1
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
const fs = require("node:fs");
|
|
5
|
+
const path = require("node:path");
|
|
6
|
+
const os = require("node:os");
|
|
7
|
+
const crypto = require("node:crypto");
|
|
8
|
+
|
|
9
|
+
const {
|
|
10
|
+
TencentMapAdapter,
|
|
11
|
+
NAME,
|
|
12
|
+
VERSION,
|
|
13
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
14
|
+
} = require("../../lib/adapters/travel-tencent-map");
|
|
15
|
+
|
|
16
|
+
function writeTmp(content, ext = "json") {
|
|
17
|
+
const p = path.join(
|
|
18
|
+
os.tmpdir(),
|
|
19
|
+
`cc-tencentmap-test-${crypto.randomUUID()}.${ext}`,
|
|
20
|
+
);
|
|
21
|
+
fs.writeFileSync(p, content, "utf-8");
|
|
22
|
+
return p;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async function collect(gen) {
|
|
26
|
+
const out = [];
|
|
27
|
+
for await (const x of gen) out.push(x);
|
|
28
|
+
return out;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function makeFakeDriverFactory(tables, log = {}) {
|
|
32
|
+
return () =>
|
|
33
|
+
class FakeDb {
|
|
34
|
+
constructor(dbPath, opts) {
|
|
35
|
+
log.opened = { dbPath, opts };
|
|
36
|
+
}
|
|
37
|
+
prepare(sql) {
|
|
38
|
+
for (const [needle, rows] of Object.entries(tables)) {
|
|
39
|
+
if (sql.includes(needle)) return { all: () => rows };
|
|
40
|
+
}
|
|
41
|
+
throw new Error(`no such table in: ${sql}`);
|
|
42
|
+
}
|
|
43
|
+
close() {
|
|
44
|
+
log.closed = true;
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const SNAPSHOT = {
|
|
50
|
+
schemaVersion: SNAPSHOT_SCHEMA_VERSION,
|
|
51
|
+
snapshottedAt: 1716383021000,
|
|
52
|
+
vendor: "tencent-map",
|
|
53
|
+
account: { uid: "U1" },
|
|
54
|
+
events: [
|
|
55
|
+
{
|
|
56
|
+
kind: "favourite",
|
|
57
|
+
id: "fav-1",
|
|
58
|
+
capturedAt: 1716383021000,
|
|
59
|
+
name: "公司",
|
|
60
|
+
lat: 31.2,
|
|
61
|
+
lng: 121.44,
|
|
62
|
+
category: "company",
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
kind: "route",
|
|
66
|
+
id: "route-2",
|
|
67
|
+
capturedAt: 1716383021000,
|
|
68
|
+
from: { name: "公司" },
|
|
69
|
+
to: { name: "体育馆" },
|
|
70
|
+
mode: "bike",
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
describe("constants", () => {
|
|
76
|
+
it("exposes name/version/schema", () => {
|
|
77
|
+
expect(NAME).toBe("travel-tencent-map");
|
|
78
|
+
expect(VERSION).toBe("0.2.0");
|
|
79
|
+
expect(SNAPSHOT_SCHEMA_VERSION).toBe(1);
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
describe("authenticate", () => {
|
|
84
|
+
it("mirrors baidu: snapshot / sqlite-needs-deviceId / NO_INPUT", async () => {
|
|
85
|
+
const p = writeTmp("{}");
|
|
86
|
+
try {
|
|
87
|
+
const a = new TencentMapAdapter();
|
|
88
|
+
expect((await a.authenticate({ inputPath: p })).mode).toBe(
|
|
89
|
+
"snapshot-file",
|
|
90
|
+
);
|
|
91
|
+
expect(
|
|
92
|
+
(
|
|
93
|
+
await new TencentMapAdapter({ dbPath: "x.db" }).authenticate({})
|
|
94
|
+
).reason,
|
|
95
|
+
).toBe("NO_ACCOUNT_DEVICE_ID");
|
|
96
|
+
expect((await a.authenticate({})).reason).toBe("NO_INPUT");
|
|
97
|
+
} finally {
|
|
98
|
+
fs.unlinkSync(p);
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
describe("sync — snapshot mode", () => {
|
|
104
|
+
it("yields events with tencent-map: prefixed originalId", async () => {
|
|
105
|
+
const p = writeTmp(JSON.stringify(SNAPSHOT));
|
|
106
|
+
try {
|
|
107
|
+
const a = new TencentMapAdapter();
|
|
108
|
+
const items = await collect(a.sync({ inputPath: p }));
|
|
109
|
+
expect(items.map((i) => i.originalId)).toEqual([
|
|
110
|
+
"tencent-map:favourite:fav-1",
|
|
111
|
+
"tencent-map:route:route-2",
|
|
112
|
+
]);
|
|
113
|
+
expect(items[0].payload.account).toEqual({ uid: "U1" });
|
|
114
|
+
} finally {
|
|
115
|
+
fs.unlinkSync(p);
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it("throws on schemaVersion mismatch", async () => {
|
|
120
|
+
const p = writeTmp(JSON.stringify({ schemaVersion: 2, events: [] }));
|
|
121
|
+
try {
|
|
122
|
+
await expect(
|
|
123
|
+
collect(new TencentMapAdapter().sync({ inputPath: p })),
|
|
124
|
+
).rejects.toThrow(/schemaVersion mismatch/);
|
|
125
|
+
} finally {
|
|
126
|
+
fs.unlinkSync(p);
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
describe("sync — sqlite mode (fake driver)", () => {
|
|
132
|
+
it("yields rows + falls back to tencent_* legacy tables (keyword alias)", async () => {
|
|
133
|
+
const p = writeTmp("fake", "db");
|
|
134
|
+
const log = {};
|
|
135
|
+
try {
|
|
136
|
+
const a = new TencentMapAdapter({
|
|
137
|
+
dbPath: p,
|
|
138
|
+
account: { deviceId: "DEV1" },
|
|
139
|
+
dbDriverFactory: makeFakeDriverFactory(
|
|
140
|
+
{
|
|
141
|
+
tencent_route_history: [
|
|
142
|
+
{
|
|
143
|
+
_id: 5,
|
|
144
|
+
mode: "walk",
|
|
145
|
+
start_name: "家",
|
|
146
|
+
end_name: "菜场",
|
|
147
|
+
time: 1716383021,
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
tencent_search_history: [
|
|
151
|
+
{ _id: 6, keyword: "奶茶", city: "深圳", time: 1716383021000 },
|
|
152
|
+
],
|
|
153
|
+
},
|
|
154
|
+
log,
|
|
155
|
+
),
|
|
156
|
+
});
|
|
157
|
+
const items = await collect(a.sync({}));
|
|
158
|
+
expect(items).toHaveLength(2);
|
|
159
|
+
expect(items[0].payload.record).toMatchObject({
|
|
160
|
+
vendorId: "tencentmap",
|
|
161
|
+
recordId: "route-5",
|
|
162
|
+
vehicleType: "walk",
|
|
163
|
+
carrier: "腾讯地图",
|
|
164
|
+
departureMs: 1716383021 * 1000,
|
|
165
|
+
});
|
|
166
|
+
// searchRowToRecord accepts the tencent `keyword` alias
|
|
167
|
+
expect(items[1].payload.record.to).toMatchObject({
|
|
168
|
+
name: "奶茶",
|
|
169
|
+
city: "深圳",
|
|
170
|
+
});
|
|
171
|
+
expect(log.closed).toBe(true);
|
|
172
|
+
} finally {
|
|
173
|
+
fs.unlinkSync(p);
|
|
174
|
+
}
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it("requires account.deviceId at sync time", async () => {
|
|
178
|
+
const a = new TencentMapAdapter({ dbPath: "x.db" });
|
|
179
|
+
await expect(collect(a.sync({}))).rejects.toThrow(
|
|
180
|
+
/account\.deviceId required/,
|
|
181
|
+
);
|
|
182
|
+
});
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
describe("normalize", () => {
|
|
186
|
+
it("snapshot route → bike trip titled by place names", async () => {
|
|
187
|
+
const p = writeTmp(JSON.stringify(SNAPSHOT));
|
|
188
|
+
try {
|
|
189
|
+
const a = new TencentMapAdapter();
|
|
190
|
+
const [fav, route] = await collect(a.sync({ inputPath: p }));
|
|
191
|
+
expect(a.normalize(fav).events[0].content.title).toBe("visit: → 公司");
|
|
192
|
+
const batch = a.normalize(route);
|
|
193
|
+
expect(batch.events[0].content.title).toBe("bike: 公司 → 体育馆");
|
|
194
|
+
expect(
|
|
195
|
+
batch.persons.find((x) => x.subtype === "merchant").names,
|
|
196
|
+
).toEqual(["腾讯地图"]);
|
|
197
|
+
} finally {
|
|
198
|
+
fs.unlinkSync(p);
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("throws on missing payload", () => {
|
|
203
|
+
expect(() => new TencentMapAdapter().normalize(null)).toThrow(
|
|
204
|
+
/payload missing/,
|
|
205
|
+
);
|
|
206
|
+
});
|
|
207
|
+
});
|
package/lib/adapter-guide.js
CHANGED
|
@@ -346,22 +346,24 @@ const ADAPTER_OVERRIDES = Object.freeze({
|
|
|
346
346
|
|
|
347
347
|
"qq-pc": {
|
|
348
348
|
summary:
|
|
349
|
-
"采集电脑版 QQ(NT 新版)的聊天记录(来自本地 nt_msg.db
|
|
349
|
+
"采集电脑版 QQ(NT 新版)的聊天记录(来自本地 nt_msg.db)。中台已支持自动解密 + 解析:取一次密钥后,自动解密 SQLCipher 库、解析 c2c/群消息的 protobuf 正文为可读文本(含发送者昵称、群号)。",
|
|
350
350
|
methods: [
|
|
351
351
|
{
|
|
352
|
-
label: "
|
|
352
|
+
label: "方式一:取密钥后一键采集(推荐)",
|
|
353
353
|
recommended: true,
|
|
354
354
|
steps: [
|
|
355
|
-
"
|
|
356
|
-
"
|
|
357
|
-
"
|
|
358
|
-
"
|
|
355
|
+
"在电脑上打开并登录 QQ(NT 新版,数据在 文档\\Tencent Files\\<QQ号>\\nt_qq\\nt_db\\nt_msg.db)。",
|
|
356
|
+
"下载并运行 qq-win-db-key(github.com/QQBackup/qq-win-db-key 的 windows_ntqq_get_key.ps1)。它会全关 QQ → 以调试器启动 QQ → 你登录后自动抓出 16 位密钥(形如 5{sww#,6aq=)8=A@)。",
|
|
357
|
+
"回到中台执行 `cc hub sync-adapter qq-pc --passphrase \"<那串密钥>\"`(或点该行「一键采集」并粘贴密钥)。",
|
|
358
|
+
"中台自动解密 + 解析 c2c_msg_table / group_msg_table → 可读消息入库(私聊 + 群聊,含昵称/群号)。",
|
|
359
359
|
],
|
|
360
|
-
note: "QQ
|
|
360
|
+
note: "QQ 每次重启密钥会变,重采时重新跑 qq-win-db-key 取一次即可。纯个人使用、全程本地;首次会要求法律确认。依赖随中台分发的 Python(含 cryptography)。",
|
|
361
361
|
},
|
|
362
362
|
{
|
|
363
|
-
label: "
|
|
364
|
-
steps: [
|
|
363
|
+
label: "方式二:已解密为明文库则直接导入",
|
|
364
|
+
steps: [
|
|
365
|
+
"若已用工具把 nt_msg.db 解密为明文 SQLite,执行 `cc hub sync-adapter qq-pc --input <明文 nt_msg.db>`。",
|
|
366
|
+
],
|
|
365
367
|
},
|
|
366
368
|
],
|
|
367
369
|
},
|
|
@@ -37,6 +37,9 @@ class QQPcAdapter {
|
|
|
37
37
|
constructor(opts = {}) {
|
|
38
38
|
this._dbPath = opts.dbPath || null;
|
|
39
39
|
this._key = opts.key || null;
|
|
40
|
+
// QQ NT passphrase (16-char ASCII from qq-win-db-key). When present, sync
|
|
41
|
+
// routes through the Python sidecar (decrypt + protobuf parse).
|
|
42
|
+
this._passphrase = opts.passphrase || null;
|
|
40
43
|
|
|
41
44
|
this.name = NAME;
|
|
42
45
|
this.version = VERSION;
|
|
@@ -58,6 +61,10 @@ class QQPcAdapter {
|
|
|
58
61
|
this._deps = {
|
|
59
62
|
fs,
|
|
60
63
|
dbDriverFactory: opts.dbDriverFactory || null,
|
|
64
|
+
// DI seam: tests inject a fake QQ sidecar collector; default lazy-loads
|
|
65
|
+
// the forensics-bridge invoker.
|
|
66
|
+
qqCollector: opts.qqCollector || null,
|
|
67
|
+
discoveryDeps: opts.discoveryDeps || undefined,
|
|
61
68
|
};
|
|
62
69
|
}
|
|
63
70
|
|
|
@@ -134,10 +141,18 @@ class QQPcAdapter {
|
|
|
134
141
|
}
|
|
135
142
|
|
|
136
143
|
async *sync(opts = {}) {
|
|
144
|
+
// Sidecar path: with a QQ NT passphrase (from qq-win-db-key), decrypt +
|
|
145
|
+
// parse the encrypted nt_msg.db in Python and yield readable messages.
|
|
146
|
+
const passphrase = opts.passphrase || this._passphrase || null;
|
|
147
|
+
if (passphrase || opts.mode === "sidecar") {
|
|
148
|
+
yield* this._syncViaSidecar(opts, passphrase);
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
137
152
|
const dbPath =
|
|
138
153
|
opts.dbPath || opts.inputPath || this._dbPath || this._resolveDiscoveredDbPath();
|
|
139
154
|
if (!dbPath) {
|
|
140
|
-
throw new Error("qq-pc.sync: 未找到本机 QQ NT 库且未提供 opts.dbPath / opts.inputPath");
|
|
155
|
+
throw new Error("qq-pc.sync: 未找到本机 QQ NT 库且未提供 opts.dbPath / opts.inputPath(或提供 opts.passphrase 走 sidecar 解密)");
|
|
141
156
|
}
|
|
142
157
|
if (!this._deps.fs.existsSync(dbPath)) return;
|
|
143
158
|
|
|
@@ -178,6 +193,60 @@ class QQPcAdapter {
|
|
|
178
193
|
}
|
|
179
194
|
}
|
|
180
195
|
|
|
196
|
+
// Sidecar path: forensics-bridge qq_nt.collect decrypts nt_msg.db (with the
|
|
197
|
+
// qq-win-db-key passphrase) + parses c2c/group protobuf bodies → readable
|
|
198
|
+
// messages, which we map into the same payload normalizeMessage consumes.
|
|
199
|
+
async *_syncViaSidecar(opts = {}, passphrase) {
|
|
200
|
+
let collect = this._deps.qqCollector;
|
|
201
|
+
if (!collect) {
|
|
202
|
+
// eslint-disable-next-line global-require
|
|
203
|
+
collect = require("./qqnt-sidecar").collectQqNt;
|
|
204
|
+
}
|
|
205
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : undefined;
|
|
206
|
+
const result = await collect({
|
|
207
|
+
passphrase,
|
|
208
|
+
key: opts.key || this._key || undefined,
|
|
209
|
+
dbPath: opts.dbPath || this._dbPath || this._resolveDiscoveredDbPath() || undefined,
|
|
210
|
+
limit,
|
|
211
|
+
pythonExe: opts.pythonExe,
|
|
212
|
+
bridgeDir: opts.bridgeDir,
|
|
213
|
+
timeoutMs: opts.timeoutMs,
|
|
214
|
+
onProgress:
|
|
215
|
+
typeof opts.onProgress === "function"
|
|
216
|
+
? (m) => { try { opts.onProgress({ phase: "qq-nt", adapter: NAME, ...m }); } catch (_e) { /* best-effort */ } }
|
|
217
|
+
: undefined,
|
|
218
|
+
_supervisorFactory: opts._supervisorFactory,
|
|
219
|
+
});
|
|
220
|
+
const messages = (result && Array.isArray(result.messages)) ? result.messages : [];
|
|
221
|
+
const fallbackCapturedAt = Date.now();
|
|
222
|
+
let emitted = 0;
|
|
223
|
+
for (const m of messages) {
|
|
224
|
+
if (!m || typeof m !== "object") continue;
|
|
225
|
+
const isGroup = m.kind === "group";
|
|
226
|
+
const createdTimeMs =
|
|
227
|
+
typeof m.createTime === "number" && m.createTime > 0 ? m.createTime * 1000 : null;
|
|
228
|
+
const payload = {
|
|
229
|
+
kind: KIND_MESSAGE,
|
|
230
|
+
text: typeof m.text === "string" ? m.text : "",
|
|
231
|
+
peerUin: m.peer != null ? String(m.peer) : null,
|
|
232
|
+
peerName: m.conversationName || null, // group name / c2c peer nickname (best-effort)
|
|
233
|
+
senderUin: m.senderUin != null ? String(m.senderUin) : null,
|
|
234
|
+
senderName: m.senderName || null,
|
|
235
|
+
isGroup,
|
|
236
|
+
type: typeof m.type === "number" ? m.type : null,
|
|
237
|
+
createdTimeMs,
|
|
238
|
+
};
|
|
239
|
+
yield {
|
|
240
|
+
adapter: NAME,
|
|
241
|
+
kind: KIND_MESSAGE,
|
|
242
|
+
originalId: m.originalId || stableOriginalId(`${m.peer}-${createdTimeMs}-${emitted}`),
|
|
243
|
+
capturedAt: createdTimeMs || fallbackCapturedAt,
|
|
244
|
+
payload,
|
|
245
|
+
};
|
|
246
|
+
emitted += 1;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
181
250
|
normalize(raw) {
|
|
182
251
|
if (!raw || !raw.payload) {
|
|
183
252
|
throw new Error("QQPcAdapter.normalize: payload missing");
|
|
@@ -215,7 +284,9 @@ class QQPcAdapter {
|
|
|
215
284
|
platform: "qq",
|
|
216
285
|
source: "pc-nt",
|
|
217
286
|
peerUin: p.peerUin || null,
|
|
287
|
+
...(p.peerName ? { peerName: p.peerName } : {}),
|
|
218
288
|
senderUin: p.senderUin || null,
|
|
289
|
+
...(p.senderName ? { senderName: p.senderName } : {}),
|
|
219
290
|
isGroup: !!p.isGroup,
|
|
220
291
|
qqMsgType: typeof p.type === "number" ? p.type : null,
|
|
221
292
|
// Full raw row preserved — protobuf bodies + unknown columns — so a
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* QQ NT collection bridge — invokes the forensics-bridge Python sidecar's
|
|
5
|
+
* `qq_nt.collect` method (skip 1024-byte preamble + SQLCipher-4 decrypt with
|
|
6
|
+
* the qq-win-db-key passphrase + parse c2c/group protobuf message bodies) and
|
|
7
|
+
* returns the decrypted, readable messages to the node adapter.
|
|
8
|
+
*
|
|
9
|
+
* The key is the QQ NT passphrase (a 16-char ASCII string like "5{sww#,6aq=)8=A@"
|
|
10
|
+
* extracted by qq-win-db-key). Pass it as opts.passphrase. Decryption + protobuf
|
|
11
|
+
* text extraction run in Python (cryptography), sidestepping the host-node
|
|
12
|
+
* bs3mc ABI problem (node never opens the encrypted DB).
|
|
13
|
+
*
|
|
14
|
+
* Resolution (overridable for tests / packaging):
|
|
15
|
+
* - python exe: opts.pythonExe → env CC_PDH_PYTHON → "python" / "python3"
|
|
16
|
+
* - bridge dir: opts.bridgeDir → env CC_PDH_BRIDGE_DIR → sibling package
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const path = require("node:path");
|
|
20
|
+
const { existsSync } = require("node:fs");
|
|
21
|
+
|
|
22
|
+
function resolveBridgeDir(explicit) {
|
|
23
|
+
if (explicit) return explicit;
|
|
24
|
+
if (process.env.CC_PDH_BRIDGE_DIR) return process.env.CC_PDH_BRIDGE_DIR;
|
|
25
|
+
// lib/adapters/qq-pc → up to packages/, then sibling bridge package.
|
|
26
|
+
return path.resolve(__dirname, "../../../../personal-data-hub-bridge");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function pythonCandidates(explicit) {
|
|
30
|
+
const list = [];
|
|
31
|
+
if (explicit) list.push(explicit);
|
|
32
|
+
if (process.env.CC_PDH_PYTHON) list.push(process.env.CC_PDH_PYTHON);
|
|
33
|
+
list.push(process.platform === "win32" ? "python" : "python3");
|
|
34
|
+
list.push(process.platform === "win32" ? "python3" : "python");
|
|
35
|
+
return [...new Set(list)];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* @param {object} [opts]
|
|
40
|
+
* @param {string} [opts.passphrase] QQ NT key (ASCII passphrase from qq-win-db-key)
|
|
41
|
+
* @param {string} [opts.key] alternatively a hex key
|
|
42
|
+
* @param {string} [opts.dbPath] nt_msg.db path (sidecar auto-discovers if omitted)
|
|
43
|
+
* @param {number} [opts.limit]
|
|
44
|
+
* @param {string} [opts.pythonExe]
|
|
45
|
+
* @param {string} [opts.bridgeDir]
|
|
46
|
+
* @param {number} [opts.timeoutMs]
|
|
47
|
+
* @param {(msg:object)=>void} [opts.onProgress]
|
|
48
|
+
* @param {object} [opts._supervisorFactory] test seam
|
|
49
|
+
* @returns {Promise<{account:string,messageCount:number,c2c:number,group:number,messages:object[]}>}
|
|
50
|
+
*/
|
|
51
|
+
async function collectQqNt(opts = {}) {
|
|
52
|
+
const bridgeDir = resolveBridgeDir(opts.bridgeDir);
|
|
53
|
+
const makeSupervisor =
|
|
54
|
+
opts._supervisorFactory ||
|
|
55
|
+
((command, cwd) => {
|
|
56
|
+
// eslint-disable-next-line global-require
|
|
57
|
+
const { SidecarSupervisor } = require("../../sidecar");
|
|
58
|
+
return new SidecarSupervisor({
|
|
59
|
+
command,
|
|
60
|
+
cwd,
|
|
61
|
+
defaultTimeoutMs: opts.timeoutMs || 120_000,
|
|
62
|
+
healthCheckIntervalMs: 0,
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
if (!opts._supervisorFactory && !existsSync(bridgeDir)) {
|
|
67
|
+
const e = new Error(
|
|
68
|
+
`qq-pc: forensics-bridge not found at ${bridgeDir} (set CC_PDH_BRIDGE_DIR)`,
|
|
69
|
+
);
|
|
70
|
+
e.code = "BRIDGE_NOT_FOUND";
|
|
71
|
+
throw e;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const params = {};
|
|
75
|
+
if (Number.isInteger(opts.limit) && opts.limit > 0) params.limit = opts.limit;
|
|
76
|
+
if (opts.passphrase) params.passphrase = opts.passphrase;
|
|
77
|
+
else if (opts.key) params.key = opts.key;
|
|
78
|
+
if (opts.dbPath) params.db_path = opts.dbPath;
|
|
79
|
+
|
|
80
|
+
let lastErr = null;
|
|
81
|
+
for (const py of pythonCandidates(opts.pythonExe)) {
|
|
82
|
+
const sup = makeSupervisor([py, "-m", "forensics_bridge.ipc_server"], bridgeDir);
|
|
83
|
+
try {
|
|
84
|
+
await sup.start({ readyTimeoutMs: opts.readyTimeoutMs || 15_000 });
|
|
85
|
+
const result = await sup.invoke("qq_nt.collect", params, {
|
|
86
|
+
timeoutMs: opts.timeoutMs || 120_000,
|
|
87
|
+
onProgress: opts.onProgress,
|
|
88
|
+
});
|
|
89
|
+
try { await sup.stop(); } catch (_e) { /* best-effort */ }
|
|
90
|
+
return result;
|
|
91
|
+
} catch (err) {
|
|
92
|
+
lastErr = err;
|
|
93
|
+
try { await sup.stop(); } catch (_e) { /* best-effort */ }
|
|
94
|
+
const msg = (err && err.message) || "";
|
|
95
|
+
// Real QQ-side failures (key/db) surface immediately; sidecar-availability
|
|
96
|
+
// problems (missing python / cryptography / spawn death) → try next python.
|
|
97
|
+
const isDataError = /KEY_REQUIRED|KEY_VERIFY|APP_NOT|DB_TOO|BAD_LAYOUT/i.test(msg);
|
|
98
|
+
if (isDataError) throw err;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
const e = new Error(
|
|
102
|
+
`qq-pc: could not run forensics-bridge sidecar (tried ${pythonCandidates(opts.pythonExe).join(", ")}). ` +
|
|
103
|
+
`Install Python 3.11+ with 'cryptography', or set CC_PDH_PYTHON. Last error: ${lastErr && lastErr.message}`,
|
|
104
|
+
);
|
|
105
|
+
e.code = "SIDECAR_UNAVAILABLE";
|
|
106
|
+
throw e;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
module.exports = { collectQqNt, _internals: { resolveBridgeDir, pythonCandidates } };
|
|
@@ -54,8 +54,13 @@ const KIND_PROFILE = "profile";
|
|
|
54
54
|
const KIND_WATCH = "watch";
|
|
55
55
|
const KIND_COLLECT = "collect";
|
|
56
56
|
const KIND_SEARCH = "search";
|
|
57
|
-
// v0.2.1 — KIND_PROFILE added (mirrors Douyin/Toutiao)
|
|
58
|
-
//
|
|
57
|
+
// v0.2.1 — KIND_PROFILE added (mirrors Douyin/Toutiao). The watch/collect/
|
|
58
|
+
// search producers LANDED since (verified 2026-06-11): Android
|
|
59
|
+
// KuaishouLocalCollector emits all 4 kinds via the NS_sig3 WebSignBridge
|
|
60
|
+
// path, KuaishouRootDbExtractor emits watch/collect/search, and the PC ADB
|
|
61
|
+
// KuaishouApiClient fetches them through its injected signProvider (signed
|
|
62
|
+
// GraphQL). This adapter normalizes whatever the snapshot carries.
|
|
63
|
+
// SNAPSHOT_SCHEMA_VERSION stays at 1 — additive.
|
|
59
64
|
const VALID_SNAPSHOT_KINDS = Object.freeze([
|
|
60
65
|
KIND_PROFILE,
|
|
61
66
|
KIND_WATCH,
|
|
@@ -119,23 +119,19 @@ class KuaishouApiClient {
|
|
|
119
119
|
);
|
|
120
120
|
return null;
|
|
121
121
|
}
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
decoded = cpMatch[1];
|
|
127
|
-
}
|
|
128
|
-
const trimmed = decoded.trimStart();
|
|
129
|
-
if (!trimmed.startsWith("{")) {
|
|
122
|
+
const jsonText = apiPhDecodeCandidates(cpMatch[1]).find((c) =>
|
|
123
|
+
c.trimStart().startsWith("{"),
|
|
124
|
+
);
|
|
125
|
+
if (!jsonText) {
|
|
130
126
|
this._setLastError(
|
|
131
127
|
-9,
|
|
132
|
-
"kuaishou.web.cp.api_ph 解码后非 JSON (
|
|
128
|
+
"kuaishou.web.cp.api_ph 解码后非 JSON (urlencoded + base64 fallback 均失败)",
|
|
133
129
|
);
|
|
134
130
|
return null;
|
|
135
131
|
}
|
|
136
132
|
let obj;
|
|
137
133
|
try {
|
|
138
|
-
obj = JSON.parse(
|
|
134
|
+
obj = JSON.parse(jsonText);
|
|
139
135
|
} catch (e) {
|
|
140
136
|
this._setLastError(-3, "parse: " + (e.message || String(e)));
|
|
141
137
|
return null;
|
|
@@ -359,20 +355,43 @@ function extractPhotoList(feeds, limit, build) {
|
|
|
359
355
|
return out;
|
|
360
356
|
}
|
|
361
357
|
|
|
362
|
-
|
|
358
|
+
/**
|
|
359
|
+
* api_ph payload decode chain (v0.3): newer Kuaishou builds write the
|
|
360
|
+
* `kuaishou.web.cp.api_ph` cookie as base64(JSON) instead of urlencoded
|
|
361
|
+
* JSON. Yields the URI-decoded string first; when that doesn't look like
|
|
362
|
+
* JSON but matches the base64 charset (std or url-safe), also yields the
|
|
363
|
+
* base64-decoded form — gated on the result starting with `{` so lenient
|
|
364
|
+
* Buffer decoding of arbitrary text can't surface garbage.
|
|
365
|
+
*/
|
|
366
|
+
function apiPhDecodeCandidates(cpRaw) {
|
|
363
367
|
let decoded;
|
|
364
368
|
try {
|
|
365
369
|
decoded = decodeURIComponent(cpRaw);
|
|
366
370
|
} catch {
|
|
367
371
|
decoded = cpRaw;
|
|
368
372
|
}
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
373
|
+
const out = [decoded];
|
|
374
|
+
const trimmed = decoded.trim();
|
|
375
|
+
if (!trimmed.startsWith("{") && /^[A-Za-z0-9+/\-_]+={0,2}$/.test(trimmed)) {
|
|
376
|
+
const b64 = Buffer.from(
|
|
377
|
+
trimmed.replace(/-/g, "+").replace(/_/g, "/"),
|
|
378
|
+
"base64",
|
|
379
|
+
).toString("utf-8");
|
|
380
|
+
if (b64.trimStart().startsWith("{")) out.push(b64);
|
|
381
|
+
}
|
|
382
|
+
return out;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function extractEmbeddedUid(cpRaw) {
|
|
386
|
+
for (const decoded of apiPhDecodeCandidates(cpRaw)) {
|
|
387
|
+
for (const pat of [
|
|
388
|
+
/"?user_id"?\s*:\s*"?(\d+)"?/,
|
|
389
|
+
/"?uid"?\s*:\s*"?(\d+)"?/,
|
|
390
|
+
/"?userId"?\s*:\s*"?(\d+)"?/,
|
|
391
|
+
]) {
|
|
392
|
+
const m = pat.exec(decoded);
|
|
393
|
+
if (m && m[1] && m[1] !== "0") return m[1];
|
|
394
|
+
}
|
|
376
395
|
}
|
|
377
396
|
return null;
|
|
378
397
|
}
|
|
@@ -393,5 +412,6 @@ module.exports = {
|
|
|
393
412
|
normalizeMs,
|
|
394
413
|
extractPhotoList,
|
|
395
414
|
extractEmbeddedUid,
|
|
415
|
+
apiPhDecodeCandidates,
|
|
396
416
|
},
|
|
397
417
|
};
|
|
@@ -34,6 +34,9 @@ const crypto = require("node:crypto");
|
|
|
34
34
|
const {
|
|
35
35
|
readChromiumCookies,
|
|
36
36
|
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
37
|
+
const {
|
|
38
|
+
_internals: { apiPhDecodeCandidates },
|
|
39
|
+
} = require("./api-client");
|
|
37
40
|
|
|
38
41
|
const KUAISHOU_COOKIES_REMOTE_PATH =
|
|
39
42
|
"/data/data/com.smile.gifmaker/app_webview/Default/Cookies";
|
|
@@ -137,22 +140,20 @@ function pickUidFromCookieMap(byName) {
|
|
|
137
140
|
}
|
|
138
141
|
const cpRaw = byName.get("kuaishou.web.cp.api_ph")?.value;
|
|
139
142
|
if (cpRaw) {
|
|
140
|
-
let decoded;
|
|
141
|
-
try {
|
|
142
|
-
decoded = decodeURIComponent(cpRaw);
|
|
143
|
-
} catch {
|
|
144
|
-
decoded = cpRaw;
|
|
145
|
-
}
|
|
146
143
|
// Try nested user_id / uid / userId regex (don't require strict JSON
|
|
147
|
-
// — api_ph format isn't documented and varies)
|
|
148
|
-
for
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
144
|
+
// — api_ph format isn't documented and varies). v0.3: candidates
|
|
145
|
+
// include the base64-decoded form for newer Kuaishou builds that
|
|
146
|
+
// write api_ph as base64(JSON).
|
|
147
|
+
for (const decoded of apiPhDecodeCandidates(cpRaw)) {
|
|
148
|
+
for (const pat of [
|
|
149
|
+
/"?user_id"?\s*:\s*"?(\d+)"?/,
|
|
150
|
+
/"?uid"?\s*:\s*"?(\d+)"?/,
|
|
151
|
+
/"?userId"?\s*:\s*"?(\d+)"?/,
|
|
152
|
+
]) {
|
|
153
|
+
const m = pat.exec(decoded);
|
|
154
|
+
if (m && m[1] && m[1] !== "0") {
|
|
155
|
+
return m[1];
|
|
156
|
+
}
|
|
156
157
|
}
|
|
157
158
|
}
|
|
158
159
|
}
|
|
@@ -57,10 +57,14 @@ const KIND_PROFILE = "profile";
|
|
|
57
57
|
const KIND_READ = "read";
|
|
58
58
|
const KIND_COLLECTION = "collection";
|
|
59
59
|
const KIND_SEARCH = "search";
|
|
60
|
-
// v0.2.1 — KIND_PROFILE added (mirrors Douyin)
|
|
61
|
-
//
|
|
62
|
-
//
|
|
63
|
-
//
|
|
60
|
+
// v0.2.1 — KIND_PROFILE added (mirrors Douyin). The read/collection/search
|
|
61
|
+
// producers LANDED since (verified 2026-06-11): Android ToutiaoLocalCollector
|
|
62
|
+
// emits all 4 kinds via the _signature WebSignBridge path, ToutiaoRootDbExtractor
|
|
63
|
+
// emits read/collection/search, and the PC ADB ToutiaoApiClient fetches
|
|
64
|
+
// feed/collection/search through its injected signProvider. This adapter
|
|
65
|
+
// normalizes whatever the snapshot carries. SNAPSHOT_SCHEMA_VERSION stays
|
|
66
|
+
// at 1: old (events-only) snapshots remain compatible; profile events are
|
|
67
|
+
// an additive extension.
|
|
64
68
|
const VALID_SNAPSHOT_KINDS = Object.freeze([
|
|
65
69
|
KIND_PROFILE,
|
|
66
70
|
KIND_READ,
|
|
@@ -150,8 +150,15 @@ function normalizeTravelRecord(rec, ctx = {}) {
|
|
|
150
150
|
|
|
151
151
|
function buildTitle(rec) {
|
|
152
152
|
const vt = rec.vehicleType || "trip";
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
// station > city > name — name matters for Amap route/search records,
|
|
154
|
+
// which carry ONLY p.name (no station/city); without it every Amap trip
|
|
155
|
+
// event was titled "car: ? → ?".
|
|
156
|
+
const from = rec.from
|
|
157
|
+
? (rec.from.station || rec.from.city || rec.from.name || "?")
|
|
158
|
+
: "";
|
|
159
|
+
const to = rec.to
|
|
160
|
+
? (rec.to.station || rec.to.city || rec.to.name || "?")
|
|
161
|
+
: "";
|
|
155
162
|
if (from && to) return `${vt}: ${from} → ${to}`;
|
|
156
163
|
if (to) return `${vt}: → ${to}`;
|
|
157
164
|
return `${vt}: ${rec.carrier || rec.recordId}`;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.4",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|