@chainlesschain/personal-data-hub 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
- package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
- package/__tests__/adapters/ai-chat-history.test.js +8 -7
- package/__tests__/adapters/ai-chat-vendors.test.js +149 -8
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
- package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
- package/__tests__/adapters/system-data-android.test.js +387 -0
- package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
- package/__tests__/adapters/wechat-env-probe.test.js +162 -0
- package/__tests__/adapters/wechat-frida-agent.test.js +322 -0
- package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
- package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
- package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
- package/__tests__/analysis-skills.test.js +147 -0
- package/__tests__/analysis.test.js +329 -1
- package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
- package/__tests__/e2e/full-user-journey.test.js +188 -0
- package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
- package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
- package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
- package/__tests__/integration/social-bilibili-pipeline.test.js +261 -0
- package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
- package/__tests__/registry.test.js +4 -2
- package/__tests__/social-adapters.test.js +63 -14
- package/__tests__/social-bilibili-snapshot.test.js +278 -0
- package/__tests__/wechat-adapter.test.js +118 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +55 -16
- package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
- package/lib/adapters/ai-chat-history/health-checker.js +210 -0
- package/lib/adapters/ai-chat-history/schema-map.js +42 -5
- package/lib/adapters/ai-chat-history/vendor-spec.js +1 -0
- package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
- package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +4 -0
- package/lib/adapters/social-bilibili/adapter.js +500 -0
- package/lib/adapters/social-bilibili/index.js +21 -169
- package/lib/adapters/social-kuaishou/index.js +237 -0
- package/lib/adapters/social-toutiao/index.js +236 -0
- package/lib/adapters/system-data-android/adapter.js +348 -0
- package/lib/adapters/system-data-android/index.js +76 -0
- package/lib/adapters/wechat/bootstrap.js +146 -0
- package/lib/adapters/wechat/content-parser.js +11 -2
- package/lib/adapters/wechat/db-reader.js +88 -10
- package/lib/adapters/wechat/env-probe.js +218 -0
- package/lib/adapters/wechat/frida-agent/loader.js +74 -0
- package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +248 -0
- package/lib/adapters/wechat/index.js +9 -0
- package/lib/adapters/wechat/key-providers/frida-key-provider.js +252 -0
- package/lib/adapters/wechat/key-providers/index.js +22 -0
- package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
- package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
- package/lib/adapters/wechat/normalize.js +12 -3
- package/lib/analysis-skills/spending.js +4 -1
- package/lib/analysis.js +191 -2
- package/lib/index.js +16 -0
- package/lib/prompt-builder.js +11 -1
- package/lib/query-parser.js +7 -1
- package/lib/vault.js +77 -0
- package/package.json +8 -1
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 12.6.3 — FridaKeyProvider (v1 hot path).
|
|
3
|
+
*
|
|
4
|
+
* Attaches frida to a live WeChat process (com.tencent.mm) on a rooted
|
|
5
|
+
* Android device, injects the wechat-key-hook agent (see
|
|
6
|
+
* frida-agent/wechat-key-hook.js), waits for the first sqlite3_key
|
|
7
|
+
* onEnter, captures the 32-byte hex key, then detaches.
|
|
8
|
+
*
|
|
9
|
+
* Why detach immediately:
|
|
10
|
+
* §18.6 anti-detection — minimize injection window so WeChat's
|
|
11
|
+
* ptrace-tracer / mem-scanner doesn't catch frida-gum sitting in
|
|
12
|
+
* the process. We hold the script alive only as long as it takes
|
|
13
|
+
* the user to touch a chat thread (typically 1-3s).
|
|
14
|
+
*
|
|
15
|
+
* Wire to KeyProvider:
|
|
16
|
+
* getKey() resolves with lowercase 64-char hex on success, or
|
|
17
|
+
* rejects with one of the typed error codes:
|
|
18
|
+
* - FRIDA_BINDING_MISSING : opts.frida not provided and require()
|
|
19
|
+
* of "frida" failed (binding not installed)
|
|
20
|
+
* - WECHAT_NOT_RUNNING : device.attach() threw on package name
|
|
21
|
+
* - FRIDA_ATTACH_FAILED : any other attach/createScript error
|
|
22
|
+
* - HOOK_FAILED : agent reported error event before key
|
|
23
|
+
* - WCDB_KEY_TIMEOUT : no key event within timeoutMs
|
|
24
|
+
*
|
|
25
|
+
* Test seam: opts.frida overrides the lazy require("frida"), so unit
|
|
26
|
+
* tests inject a mock device manager without touching the real binding.
|
|
27
|
+
*/
|
|
28
|
+
"use strict";
|
|
29
|
+
|
|
30
|
+
const { KeyProvider } = require("./key-provider-base");
|
|
31
|
+
const { loadAgentScript } = require("../frida-agent/loader");
|
|
32
|
+
|
|
33
|
+
class FridaKeyProvider extends KeyProvider {
|
|
34
|
+
/**
|
|
35
|
+
* @param {object} opts
|
|
36
|
+
* @param {object} [opts.frida] injected nodejs binding (test seam);
|
|
37
|
+
* if absent, lazy require("frida")
|
|
38
|
+
* @param {string} [opts.deviceId] Frida device id (USB device default
|
|
39
|
+
* if omitted; "local" for Wear/host)
|
|
40
|
+
* @param {string} [opts.packageName="com.tencent.mm"]
|
|
41
|
+
* @param {number} [opts.timeoutMs=30000]
|
|
42
|
+
* @param {Function} [opts.agentLoader] test seam: returns agent script
|
|
43
|
+
* text; defaults to loadAgentScript
|
|
44
|
+
* @param {Function} [opts.logger] optional log({level, ...evt})
|
|
45
|
+
*/
|
|
46
|
+
constructor(opts = {}) {
|
|
47
|
+
super();
|
|
48
|
+
if (!opts || typeof opts !== "object") {
|
|
49
|
+
throw new Error("FridaKeyProvider: opts required");
|
|
50
|
+
}
|
|
51
|
+
this._fridaInjected = opts.frida || null;
|
|
52
|
+
this._deviceId = opts.deviceId || null;
|
|
53
|
+
this._packageName = opts.packageName || "com.tencent.mm";
|
|
54
|
+
this._timeoutMs = Number.isFinite(opts.timeoutMs) && opts.timeoutMs > 0
|
|
55
|
+
? opts.timeoutMs
|
|
56
|
+
: 30_000;
|
|
57
|
+
this._agentLoader = typeof opts.agentLoader === "function"
|
|
58
|
+
? opts.agentLoader
|
|
59
|
+
: loadAgentScript;
|
|
60
|
+
this._logger = typeof opts.logger === "function" ? opts.logger : null;
|
|
61
|
+
this._lastTelemetry = null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
get name() {
|
|
65
|
+
return "frida";
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
getLastTelemetry() {
|
|
69
|
+
return this._lastTelemetry;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
_log(evt) {
|
|
73
|
+
if (this._logger) {
|
|
74
|
+
try { this._logger(evt); } catch (_e) { /* swallow logger faults */ }
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
_loadFrida() {
|
|
79
|
+
if (this._fridaInjected) return this._fridaInjected;
|
|
80
|
+
try {
|
|
81
|
+
// eslint-disable-next-line global-require
|
|
82
|
+
return require("frida");
|
|
83
|
+
} catch (err) {
|
|
84
|
+
const e = new Error(
|
|
85
|
+
"FridaKeyProvider: frida nodejs binding not installed. " +
|
|
86
|
+
"Install with `npm install frida` on the host, or pass opts.frida. " +
|
|
87
|
+
"Underlying error: " + (err && err.message ? err.message : String(err))
|
|
88
|
+
);
|
|
89
|
+
e.code = "FRIDA_BINDING_MISSING";
|
|
90
|
+
throw e;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async _getDevice(frida) {
|
|
95
|
+
if (this._deviceId) {
|
|
96
|
+
const dev = await frida.getDevice(this._deviceId);
|
|
97
|
+
return dev;
|
|
98
|
+
}
|
|
99
|
+
// No id → first USB device
|
|
100
|
+
if (typeof frida.getUsbDevice === "function") {
|
|
101
|
+
return await frida.getUsbDevice();
|
|
102
|
+
}
|
|
103
|
+
return await frida.getDeviceManager().getUsbDevice();
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* @returns {Promise<string>} 64-char lowercase hex SQLCipher key
|
|
108
|
+
*/
|
|
109
|
+
async getKey(_callOpts) {
|
|
110
|
+
const telemetry = {
|
|
111
|
+
startedAt: Date.now(),
|
|
112
|
+
packageName: this._packageName,
|
|
113
|
+
deviceId: this._deviceId,
|
|
114
|
+
hooked: [],
|
|
115
|
+
errors: [],
|
|
116
|
+
keySource: null,
|
|
117
|
+
durationMs: null,
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const frida = this._loadFrida();
|
|
121
|
+
let device, session, script;
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
device = await this._getDevice(frida);
|
|
125
|
+
} catch (err) {
|
|
126
|
+
const e = new Error(
|
|
127
|
+
"FridaKeyProvider: failed to acquire Frida device" +
|
|
128
|
+
(this._deviceId ? ` (${this._deviceId})` : "") +
|
|
129
|
+
": " + (err && err.message ? err.message : String(err))
|
|
130
|
+
);
|
|
131
|
+
e.code = "FRIDA_ATTACH_FAILED";
|
|
132
|
+
this._lastTelemetry = telemetry;
|
|
133
|
+
throw e;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
session = await device.attach(this._packageName);
|
|
138
|
+
} catch (err) {
|
|
139
|
+
const errMsg = err && err.message ? err.message : String(err);
|
|
140
|
+
const e = new Error(
|
|
141
|
+
`FridaKeyProvider: device.attach(${this._packageName}) failed: ${errMsg}`
|
|
142
|
+
);
|
|
143
|
+
// Distinguish "process not found" vs other attach errors
|
|
144
|
+
e.code = /unable to find process|process not found/i.test(errMsg)
|
|
145
|
+
? "WECHAT_NOT_RUNNING"
|
|
146
|
+
: "FRIDA_ATTACH_FAILED";
|
|
147
|
+
this._lastTelemetry = telemetry;
|
|
148
|
+
throw e;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
try {
|
|
152
|
+
const agentSrc = this._agentLoader();
|
|
153
|
+
script = await session.createScript(agentSrc);
|
|
154
|
+
} catch (err) {
|
|
155
|
+
const e = new Error(
|
|
156
|
+
"FridaKeyProvider: createScript failed: " +
|
|
157
|
+
(err && err.message ? err.message : String(err))
|
|
158
|
+
);
|
|
159
|
+
e.code = "FRIDA_ATTACH_FAILED";
|
|
160
|
+
this._lastTelemetry = telemetry;
|
|
161
|
+
// Clean up the session before throwing
|
|
162
|
+
try { await session.detach(); } catch (_e) {}
|
|
163
|
+
throw e;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Promise resolves on the first 'key' message; rejects on the first
|
|
167
|
+
// 'error' (after script load) or after timeoutMs without key.
|
|
168
|
+
const keyHex = await new Promise((resolve, reject) => {
|
|
169
|
+
let settled = false;
|
|
170
|
+
let timer = null;
|
|
171
|
+
|
|
172
|
+
const cleanup = async () => {
|
|
173
|
+
if (timer) { clearTimeout(timer); timer = null; }
|
|
174
|
+
try { await script.unload(); } catch (_e) {}
|
|
175
|
+
try { await session.detach(); } catch (_e) {}
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
const onMessage = (message, _data) => {
|
|
179
|
+
if (settled) return;
|
|
180
|
+
if (!message || message.type !== "send" || !message.payload) return;
|
|
181
|
+
const evt = message.payload;
|
|
182
|
+
this._log({ level: "info", kind: "frida-message", evt });
|
|
183
|
+
|
|
184
|
+
if (evt.kind === "hooked") {
|
|
185
|
+
telemetry.hooked.push({ symbol: evt.symbol, module: evt.module });
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
if (evt.kind === "module-waiting") {
|
|
189
|
+
return; // informational
|
|
190
|
+
}
|
|
191
|
+
if (evt.kind === "key") {
|
|
192
|
+
settled = true;
|
|
193
|
+
telemetry.keySource = evt.source;
|
|
194
|
+
// Phase 12.6 (post-sjqz audit) — capture sig/format/length so a
|
|
195
|
+
// failed DB open can be diagnosed: ascii-hex vs raw-bytes
|
|
196
|
+
// determines whether sqlite3_key got the expected key bytes,
|
|
197
|
+
// and sig=v1/v2 confirms args index resolution.
|
|
198
|
+
telemetry.keyFormat = evt.format || null;
|
|
199
|
+
telemetry.keySig = evt.sig || null;
|
|
200
|
+
telemetry.keyLength = evt.length || null;
|
|
201
|
+
telemetry.keyAlt = evt.alt || null;
|
|
202
|
+
telemetry.durationMs = Date.now() - telemetry.startedAt;
|
|
203
|
+
cleanup().then(() => resolve(String(evt.hex || "").toLowerCase()));
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
if (evt.kind === "error") {
|
|
207
|
+
telemetry.errors.push(evt.message);
|
|
208
|
+
// Don't reject on individual hook errors; we may still get a
|
|
209
|
+
// key from a fallback symbol. Only reject on timeout.
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
script.message.connect(onMessage);
|
|
215
|
+
|
|
216
|
+
script.load().catch((err) => {
|
|
217
|
+
if (settled) return;
|
|
218
|
+
settled = true;
|
|
219
|
+
cleanup().then(() => {
|
|
220
|
+
const e = new Error(
|
|
221
|
+
"FridaKeyProvider: script.load failed: " +
|
|
222
|
+
(err && err.message ? err.message : String(err))
|
|
223
|
+
);
|
|
224
|
+
e.code = "FRIDA_ATTACH_FAILED";
|
|
225
|
+
reject(e);
|
|
226
|
+
});
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
timer = setTimeout(() => {
|
|
230
|
+
if (settled) return;
|
|
231
|
+
settled = true;
|
|
232
|
+
cleanup().then(() => {
|
|
233
|
+
const last = telemetry.errors.length > 0
|
|
234
|
+
? ` (last hook error: ${telemetry.errors[telemetry.errors.length - 1]})`
|
|
235
|
+
: "";
|
|
236
|
+
const e = new Error(
|
|
237
|
+
`FridaKeyProvider: no sqlite3_key call within ${this._timeoutMs}ms` +
|
|
238
|
+
(telemetry.hooked.length === 0 ? " — libwcdb.so never loaded; " +
|
|
239
|
+
"did the user touch a chat thread?" : "") + last
|
|
240
|
+
);
|
|
241
|
+
e.code = "WCDB_KEY_TIMEOUT";
|
|
242
|
+
reject(e);
|
|
243
|
+
});
|
|
244
|
+
}, this._timeoutMs);
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
this._lastTelemetry = telemetry;
|
|
248
|
+
return keyHex;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
module.exports = { FridaKeyProvider };
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const { KeyProvider } = require("./key-provider-base");
|
|
4
|
+
const { MD5KeyProvider } = require("./md5-key-provider");
|
|
5
|
+
|
|
6
|
+
// FridaKeyProvider depends on the optional `frida` nodejs binding. Load
|
|
7
|
+
// lazily so users on devices without the binding can still use the v0.5
|
|
8
|
+
// MD5 path. Phase 12.6.3 ships the implementation.
|
|
9
|
+
let FridaKeyProvider = null;
|
|
10
|
+
try {
|
|
11
|
+
// eslint-disable-next-line global-require
|
|
12
|
+
({ FridaKeyProvider } = require("./frida-key-provider"));
|
|
13
|
+
} catch (_e) {
|
|
14
|
+
// Module not yet built / frida binding missing — leave null. Callers
|
|
15
|
+
// that need it should require it directly so they see the real error.
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
module.exports = {
|
|
19
|
+
KeyProvider,
|
|
20
|
+
MD5KeyProvider,
|
|
21
|
+
FridaKeyProvider,
|
|
22
|
+
};
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 12.6 — KeyProvider interface contract.
|
|
3
|
+
*
|
|
4
|
+
* The wechat-adapter is key-source agnostic: it only knows about an
|
|
5
|
+
* object with `getKey()` returning a Promise<string> (32-hex SQLCipher
|
|
6
|
+
* key for v0.5 7-char prefix, or full 64-hex for Frida hot path).
|
|
7
|
+
*
|
|
8
|
+
* Two implementations:
|
|
9
|
+
* - MD5KeyProvider (v0.5, frida-INDEPENDENT) — derives MD5(IMEI+UIN)[:7]
|
|
10
|
+
* from on-disk WeChat data dir. Works for WeChat < 8.0.x.
|
|
11
|
+
* - FridaKeyProvider (v1, frida-DEPENDENT) — attaches frida to live
|
|
12
|
+
* WeChat process and hooks sqlite3_key. Works for WeChat 8.0+.
|
|
13
|
+
*
|
|
14
|
+
* Both expose the same getKey() shape so wechat-adapter.js does not
|
|
15
|
+
* branch on version.
|
|
16
|
+
*/
|
|
17
|
+
"use strict";
|
|
18
|
+
|
|
19
|
+
class KeyProvider {
|
|
20
|
+
/**
|
|
21
|
+
* Return the SQLCipher key (lowercase hex). Throw on failure.
|
|
22
|
+
*
|
|
23
|
+
* Optional opts (per design §18.2):
|
|
24
|
+
* - wxid : string WeChat user identifier (some providers need this)
|
|
25
|
+
* - dbPath : string path to the SQLCipher DB being opened
|
|
26
|
+
*
|
|
27
|
+
* @param {{wxid?: string, dbPath?: string}} [_opts]
|
|
28
|
+
* @returns {Promise<string>}
|
|
29
|
+
*/
|
|
30
|
+
// eslint-disable-next-line no-unused-vars
|
|
31
|
+
async getKey(_opts) {
|
|
32
|
+
throw new Error("KeyProvider.getKey: must be overridden by subclass");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Provider name for telemetry / error attribution. Subclasses
|
|
37
|
+
* override.
|
|
38
|
+
*/
|
|
39
|
+
get name() {
|
|
40
|
+
return "key-provider-base";
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
module.exports = { KeyProvider };
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 12.6.1 — MD5KeyProvider (v0.5 legacy WeChat < 8.0 path).
|
|
3
|
+
*
|
|
4
|
+
* Wraps the existing key-extractor.js (MD5(IMEI+UIN)[:7] lowercase)
|
|
5
|
+
* behind the KeyProvider interface. Pure frida-independent: works from
|
|
6
|
+
* a pulled WeChat data directory (`adb pull /data/data/com.tencent.mm/`).
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* const provider = new MD5KeyProvider({
|
|
10
|
+
* wechatDataPath: "/tmp/com.tencent.mm",
|
|
11
|
+
* // optional manual overrides for testing or when CompatibleInfo.cfg
|
|
12
|
+
* // parsing fails
|
|
13
|
+
* uin: "1234567890",
|
|
14
|
+
* imei: "1234567890abcdef",
|
|
15
|
+
* });
|
|
16
|
+
* const key = await provider.getKey();
|
|
17
|
+
*/
|
|
18
|
+
"use strict";
|
|
19
|
+
|
|
20
|
+
const { KeyProvider } = require("./key-provider-base");
|
|
21
|
+
const { extractWeChatKey } = require("../key-extractor");
|
|
22
|
+
|
|
23
|
+
class MD5KeyProvider extends KeyProvider {
|
|
24
|
+
/**
|
|
25
|
+
* @param {object} opts
|
|
26
|
+
* @param {string} opts.wechatDataPath directory mirroring the pulled
|
|
27
|
+
* /data/data/com.tencent.mm/ tree
|
|
28
|
+
* @param {string} [opts.uin] override (skip auth XML parse)
|
|
29
|
+
* @param {string} [opts.imei] override (skip CompatibleInfo)
|
|
30
|
+
* @param {Function} [opts.extractor] DI seam — defaults to
|
|
31
|
+
* extractWeChatKey
|
|
32
|
+
*/
|
|
33
|
+
constructor(opts = {}) {
|
|
34
|
+
super();
|
|
35
|
+
if (!opts || typeof opts !== "object") {
|
|
36
|
+
throw new Error("MD5KeyProvider: opts required");
|
|
37
|
+
}
|
|
38
|
+
if (!opts.wechatDataPath || typeof opts.wechatDataPath !== "string") {
|
|
39
|
+
throw new Error("MD5KeyProvider: opts.wechatDataPath required");
|
|
40
|
+
}
|
|
41
|
+
this._wechatDataPath = opts.wechatDataPath;
|
|
42
|
+
this._uinOverride = opts.uin || null;
|
|
43
|
+
this._imeiOverride = opts.imei || null;
|
|
44
|
+
this._extractor = typeof opts.extractor === "function"
|
|
45
|
+
? opts.extractor
|
|
46
|
+
: extractWeChatKey;
|
|
47
|
+
this._lastResult = null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
get name() {
|
|
51
|
+
return "md5";
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* @returns {Promise<string>} 7-char lowercase hex MD5 prefix
|
|
56
|
+
*/
|
|
57
|
+
async getKey() {
|
|
58
|
+
const result = this._extractor({
|
|
59
|
+
wechatDataPath: this._wechatDataPath,
|
|
60
|
+
uin: this._uinOverride,
|
|
61
|
+
imei: this._imeiOverride,
|
|
62
|
+
});
|
|
63
|
+
this._lastResult = result;
|
|
64
|
+
if (!result || !result.key) {
|
|
65
|
+
const warnings = (result && result.warnings) || [];
|
|
66
|
+
const reason = warnings.length > 0 ? warnings.join("; ") : "key extraction returned empty";
|
|
67
|
+
throw new Error(`MD5KeyProvider.getKey: ${reason}`);
|
|
68
|
+
}
|
|
69
|
+
return result.key;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Last extraction result for telemetry / debugging — exposes uin /
|
|
74
|
+
* imei sources and warnings. Returns null until getKey() called.
|
|
75
|
+
*/
|
|
76
|
+
getLastResult() {
|
|
77
|
+
return this._lastResult;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
module.exports = { MD5KeyProvider };
|
|
@@ -203,11 +203,20 @@ function contactDisplayName(byUsername, wxid) {
|
|
|
203
203
|
function guessContactSubtype(row) {
|
|
204
204
|
// rcontact.type bits: official accounts / group / regular contact /
|
|
205
205
|
// black list. Detailed mapping in WeChat reverse-eng community —
|
|
206
|
-
// for v0.5 we keep it simple:
|
|
207
|
-
//
|
|
208
|
-
|
|
206
|
+
// for v0.5 we keep it simple: chatroom → unknown (not a Person),
|
|
207
|
+
// `gh_*` username → merchant (公众号 / Official Account — brand /
|
|
208
|
+
// business pushing content; closest enum match), rest → contact.
|
|
209
|
+
// Phase 12.6 will refine with full bit mapping + rcontact.type bits.
|
|
210
|
+
// (sjqz parity wechat.py:282 — get_friends() excludes gh_* from
|
|
211
|
+
// friends view but keeps them in contacts; we keep as Person with
|
|
212
|
+
// distinct subtype so Ask flow / EntityResolver can filter cleanly.)
|
|
213
|
+
if (typeof row.username !== "string") return "contact";
|
|
214
|
+
if (row.username.endsWith("@chatroom")) {
|
|
209
215
|
return "unknown"; // chat group, not a Person
|
|
210
216
|
}
|
|
217
|
+
if (row.username.startsWith("gh_")) {
|
|
218
|
+
return "merchant"; // 公众号 / Official Account
|
|
219
|
+
}
|
|
211
220
|
return "contact";
|
|
212
221
|
}
|
|
213
222
|
|
|
@@ -69,7 +69,10 @@ class SpendingSkill extends AnalysisSkill {
|
|
|
69
69
|
|
|
70
70
|
_fetchPaymentEvents({ since, until }) {
|
|
71
71
|
const events = [];
|
|
72
|
-
|
|
72
|
+
// Phase 7 shopping adapters emit subtype="order" — must include so
|
|
73
|
+
// spending aggregates cover Taobao/JD/Meituan along with Alipay
|
|
74
|
+
// (payment/transfer) + Email (refund) etc.
|
|
75
|
+
const subtypes = ["payment", "transfer", "refund", "utility", "redenvelope", "investment", "income", "order"];
|
|
73
76
|
for (const subtype of subtypes) {
|
|
74
77
|
const q = { subtype, limit: 5000 };
|
|
75
78
|
if (since != null) q.since = since;
|
package/lib/analysis.js
CHANGED
|
@@ -136,14 +136,26 @@ class AnalysisEngine {
|
|
|
136
136
|
intent: parsed.intent,
|
|
137
137
|
timeWindow: parsed.timeWindow,
|
|
138
138
|
maxFacts: this.maxFacts,
|
|
139
|
+
vaultTotals: this._gatherVaultTotals(),
|
|
139
140
|
});
|
|
140
141
|
|
|
141
|
-
// Call LLM.
|
|
142
|
+
// Call LLM. **skipCache: true** is critical: PDH answers depend on
|
|
143
|
+
// current vault state (new contacts / events / items ingested between
|
|
144
|
+
// asks). The desktop LLMManager has a 7-day ResponseCache keyed on
|
|
145
|
+
// sha256(messages); if a stale entry from before the latest sync hits,
|
|
146
|
+
// the user sees yesterday's hallucinated count after fixing _gatherFacts
|
|
147
|
+
// and never finds out (real-device verify 2026-05-21 Xiaomi 24115RA8EC:
|
|
148
|
+
// "几个联系人" served from cache, returned the pre-Path-C-fix wrong
|
|
149
|
+
// answer of "32" even though vault now had real contact data). PDH's
|
|
150
|
+
// freshness-over-latency tradeoff makes the cache strictly counter-
|
|
151
|
+
// productive at this layer. The cache for OTHER LLM uses (chat /
|
|
152
|
+
// skill orchestration / autonomous-agent) is unaffected.
|
|
142
153
|
let llmResp;
|
|
143
154
|
try {
|
|
144
155
|
llmResp = await this.llm.chat(messages, {
|
|
145
156
|
temperature: 0.2,
|
|
146
157
|
purpose: "personal-data-hub.analysis.ask",
|
|
158
|
+
skipCache: true,
|
|
147
159
|
});
|
|
148
160
|
} catch (err) {
|
|
149
161
|
const e = toError(err, "llm.chat");
|
|
@@ -195,6 +207,109 @@ class AnalysisEngine {
|
|
|
195
207
|
};
|
|
196
208
|
}
|
|
197
209
|
|
|
210
|
+
/**
|
|
211
|
+
* Retrieve the prompt context for a question WITHOUT calling the LLM.
|
|
212
|
+
*
|
|
213
|
+
* Mirrors the front half of `ask()` (parseQuery → gatherFacts → ragRetriever
|
|
214
|
+
* → buildPrompt) and returns the assembled messages + facts. The caller is
|
|
215
|
+
* responsible for invoking its own LLM with the returned messages and then
|
|
216
|
+
* (optionally) running citation validation on the answer.
|
|
217
|
+
*
|
|
218
|
+
* Why: lets a mobile / browser front-end host the LLM call locally (e.g.
|
|
219
|
+
* Android-side Volcengine Doubao adapter via API key) while keeping the
|
|
220
|
+
* vault + retrieval on the desktop. The privacy gate does NOT apply here
|
|
221
|
+
* because no LLM is contacted — the caller's gate is the gate.
|
|
222
|
+
*
|
|
223
|
+
* @param {string} question
|
|
224
|
+
* @param {object} [options]
|
|
225
|
+
* @param {number} [options.now]
|
|
226
|
+
* @param {boolean} [options.skipAudit=false]
|
|
227
|
+
* @returns {Promise<RetrieveContextResult>}
|
|
228
|
+
*
|
|
229
|
+
* @typedef {object} RetrieveContextResult
|
|
230
|
+
* @property {string} question
|
|
231
|
+
* @property {object} parsed
|
|
232
|
+
* @property {Array<object>} facts
|
|
233
|
+
* @property {string[]} factIds
|
|
234
|
+
* @property {number} factCount
|
|
235
|
+
* @property {boolean} truncated
|
|
236
|
+
* @property {string[]} ragContextIds
|
|
237
|
+
* @property {Array<{role: string, content: string}>} messages prompt-builder output, LLM-ready
|
|
238
|
+
* @property {string} systemPrompt
|
|
239
|
+
* @property {number} retrievedAt Date.now() at start
|
|
240
|
+
* @property {number} durationMs
|
|
241
|
+
*/
|
|
242
|
+
async retrieveContext(question, options = {}) {
|
|
243
|
+
if (typeof question !== "string" || question.length === 0) {
|
|
244
|
+
throw new Error("AnalysisEngine.retrieveContext: question must be a non-empty string");
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const startedAt = Date.now();
|
|
248
|
+
const parsed = parseQuery(question, { now: options.now });
|
|
249
|
+
const facts = this._gatherFacts(parsed);
|
|
250
|
+
|
|
251
|
+
const ragContextIds = [];
|
|
252
|
+
if (this.ragRetriever) {
|
|
253
|
+
try {
|
|
254
|
+
const docs = await this.ragRetriever(question, parsed);
|
|
255
|
+
if (Array.isArray(docs)) {
|
|
256
|
+
for (const doc of docs) {
|
|
257
|
+
if (!doc || !doc.id) continue;
|
|
258
|
+
const e = this.vault.getEvent(doc.id);
|
|
259
|
+
if (e && !facts.find((f) => f.id === e.id)) {
|
|
260
|
+
facts.push(e);
|
|
261
|
+
ragContextIds.push(doc.id);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
} catch (err) {
|
|
266
|
+
const e = toError(err, "ragRetriever");
|
|
267
|
+
try {
|
|
268
|
+
this.vault.audit("analysis.rag_failed", question, { error: e.message });
|
|
269
|
+
} catch (_e) { /* audit failures are non-fatal */ }
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const { messages, factIds, factCount, truncated } = buildPrompt({
|
|
274
|
+
question,
|
|
275
|
+
facts,
|
|
276
|
+
systemPrompt: this.systemPrompt,
|
|
277
|
+
intent: parsed.intent,
|
|
278
|
+
timeWindow: parsed.timeWindow,
|
|
279
|
+
maxFacts: this.maxFacts,
|
|
280
|
+
vaultTotals: this._gatherVaultTotals(),
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
const durationMs = Date.now() - startedAt;
|
|
284
|
+
|
|
285
|
+
if (!options.skipAudit) {
|
|
286
|
+
try {
|
|
287
|
+
this.vault.audit("analysis.retrieve_context", question, {
|
|
288
|
+
factCount,
|
|
289
|
+
truncated,
|
|
290
|
+
ragContextIds: ragContextIds.length,
|
|
291
|
+
durationMs,
|
|
292
|
+
});
|
|
293
|
+
} catch (_e) { /* audit failures are non-fatal */ }
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
question,
|
|
298
|
+
parsed,
|
|
299
|
+
facts,
|
|
300
|
+
// buildPrompt returns factIds as a Set; flatten to Array so the result
|
|
301
|
+
// round-trips through IPC / WS JSON serialization without becoming `{}`.
|
|
302
|
+
factIds: Array.from(factIds),
|
|
303
|
+
factCount,
|
|
304
|
+
truncated,
|
|
305
|
+
ragContextIds,
|
|
306
|
+
messages,
|
|
307
|
+
systemPrompt: this.systemPrompt,
|
|
308
|
+
retrievedAt: startedAt,
|
|
309
|
+
durationMs,
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
|
|
198
313
|
// ─── Internals ─────────────────────────────────────────────────────
|
|
199
314
|
|
|
200
315
|
_gatherFacts(parsed) {
|
|
@@ -215,7 +330,81 @@ class AnalysisEngine {
|
|
|
215
330
|
if (Number.isFinite(parsed.timeWindow.since)) q.since = parsed.timeWindow.since;
|
|
216
331
|
if (Number.isFinite(parsed.timeWindow.until)) q.until = parsed.timeWindow.until;
|
|
217
332
|
}
|
|
218
|
-
|
|
333
|
+
const events = this.vault.queryEvents(q);
|
|
334
|
+
|
|
335
|
+
// Path C follow-up — events alone miss whole categories of facts:
|
|
336
|
+
// - contacts (system-data-android) land in `persons`, not `events`
|
|
337
|
+
// - installed apps land in `items`, not `events`
|
|
338
|
+
// - places (visited locations) live in `places`
|
|
339
|
+
// Without these the LLM gets 0 facts for "我有几个联系人" style questions
|
|
340
|
+
// and hallucinates a count. We pull a bounded slice of each entity type
|
|
341
|
+
// and append; prompt-builder.summarizeFact already handles `person` /
|
|
342
|
+
// `place` / fallback `item` shapes, so this is additive with no schema
|
|
343
|
+
// change to the LLM-facing prompt.
|
|
344
|
+
//
|
|
345
|
+
// Sizing: keep events as the majority (existing behavior is unchanged for
|
|
346
|
+
// event-heavy queries like 消费 / 通话); split the remaining 1/2 budget
|
|
347
|
+
// between persons + items. Time window + adapter filters don't apply to
|
|
348
|
+
// these tables (persons aren't time-stamped events) — they're current-
|
|
349
|
+
// state snapshots that should always be visible. Adapter filter is also
|
|
350
|
+
// skipped because users asking "我有几个联系人" don't say "from
|
|
351
|
+
// system-data-android".
|
|
352
|
+
const remaining = Math.max(0, this.maxFacts - events.length);
|
|
353
|
+
const sideBudget = Math.floor(remaining / 2);
|
|
354
|
+
const personBudget = sideBudget > 0 ? sideBudget : 0;
|
|
355
|
+
const itemBudget = remaining - personBudget;
|
|
356
|
+
|
|
357
|
+
let persons = [];
|
|
358
|
+
if (personBudget > 0) {
|
|
359
|
+
try {
|
|
360
|
+
persons = this.vault.queryPersons({ limit: personBudget });
|
|
361
|
+
} catch (_e) {
|
|
362
|
+
// Older vaults / forks without queryPersons — fall back gracefully.
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
let items = [];
|
|
366
|
+
if (itemBudget > 0) {
|
|
367
|
+
try {
|
|
368
|
+
items = this.vault.queryItems({ limit: itemBudget });
|
|
369
|
+
} catch (_e) {
|
|
370
|
+
/* same fallback */
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
return [...events, ...persons, ...items];
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* Pull authoritative entity counts from the vault. These go into the
|
|
379
|
+
* prompt's TOTALS block so the LLM can answer "how many X" questions
|
|
380
|
+
* correctly even when the FACTS sample is truncated (maxFacts cap).
|
|
381
|
+
*
|
|
382
|
+
* 2026-05-21 bug: LLM said "32 contacts" when vault actually had ~500.
|
|
383
|
+
* Root cause was a mix of (a) FACTS not including persons (fixed in
|
|
384
|
+
* _gatherFacts), and (b) LLM still counting FACTS array length even after
|
|
385
|
+
* persons were included — capped at the 80-fact ceiling. TOTALS bypasses
|
|
386
|
+
* both: it gives the LLM the real number to quote directly.
|
|
387
|
+
*
|
|
388
|
+
* Wrapped in try because legacy vault forks / mock vaults in tests may
|
|
389
|
+
* not expose `stats()`; falling back to undefined makes prompt-builder
|
|
390
|
+
* skip the block entirely.
|
|
391
|
+
*/
|
|
392
|
+
_gatherVaultTotals() {
|
|
393
|
+
if (typeof this.vault.stats !== "function") return undefined;
|
|
394
|
+
try {
|
|
395
|
+
const s = this.vault.stats();
|
|
396
|
+
// Trim to the fields useful for question answering — schemaVersion /
|
|
397
|
+
// mergeGroups / audit log size are noise here.
|
|
398
|
+
return {
|
|
399
|
+
events: s.events,
|
|
400
|
+
persons: s.persons,
|
|
401
|
+
places: s.places,
|
|
402
|
+
items: s.items,
|
|
403
|
+
topics: s.topics,
|
|
404
|
+
};
|
|
405
|
+
} catch (_e) {
|
|
406
|
+
return undefined;
|
|
407
|
+
}
|
|
219
408
|
}
|
|
220
409
|
}
|
|
221
410
|
|