@chainlesschain/personal-data-hub 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-history.test.js +395 -0
- package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
- package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
- package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
- package/__tests__/adapters/email-adapter.test.js +138 -1
- package/__tests__/adapters/email-classifier.test.js +347 -0
- package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
- package/__tests__/adapters/email-retry-progress.test.js +294 -0
- package/__tests__/adapters/email-templates.test.js +699 -0
- package/__tests__/adapters/system-data-adapter.test.js +440 -0
- package/__tests__/adapters/system-data-disclosure.test.js +153 -0
- package/__tests__/analysis-skills.test.js +409 -0
- package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
- package/__tests__/entity-resolver-stages.test.js +411 -0
- package/__tests__/entity-resolver-vault.test.js +246 -0
- package/__tests__/entity-resolver.test.js +526 -0
- package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
- package/__tests__/longtail-adapters.test.js +217 -0
- package/__tests__/mobile-extractor.test.js +288 -0
- package/__tests__/shopping-adapters.test.js +296 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
- package/__tests__/sidecar-supervisor.test.js +120 -0
- package/__tests__/social-adapters.test.js +206 -0
- package/__tests__/travel-adapters.test.js +325 -0
- package/__tests__/vault.test.js +3 -3
- package/__tests__/wechat-adapter.test.js +476 -0
- package/__tests__/whatsapp-adapter.test.js +135 -0
- package/lib/adapter-spec.js +12 -0
- package/lib/adapters/_python-sidecar-base.js +207 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
- package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
- package/lib/adapters/ai-chat-history/http-client.js +211 -0
- package/lib/adapters/ai-chat-history/index.js +28 -0
- package/lib/adapters/ai-chat-history/schema-map.js +221 -0
- package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
- package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
- package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
- package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
- package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
- package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
- package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
- package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
- package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
- package/lib/adapters/alipay-bill/counterparty.js +129 -0
- package/lib/adapters/alipay-bill/csv-parser.js +217 -0
- package/lib/adapters/alipay-bill/index.js +41 -0
- package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
- package/lib/adapters/email-imap/classifier.js +495 -0
- package/lib/adapters/email-imap/email-adapter.js +419 -8
- package/lib/adapters/email-imap/index.js +42 -0
- package/lib/adapters/email-imap/pdf-extractor.js +192 -0
- package/lib/adapters/email-imap/templates/bill.js +232 -0
- package/lib/adapters/email-imap/templates/government.js +120 -0
- package/lib/adapters/email-imap/templates/index.js +78 -0
- package/lib/adapters/email-imap/templates/order.js +186 -0
- package/lib/adapters/email-imap/templates/other.js +114 -0
- package/lib/adapters/email-imap/templates/register.js +113 -0
- package/lib/adapters/email-imap/templates/travel.js +157 -0
- package/lib/adapters/email-imap/templates/utils.js +275 -0
- package/lib/adapters/email-imap/transactions.js +234 -0
- package/lib/adapters/messaging-qq/index.js +158 -0
- package/lib/adapters/messaging-telegram/index.js +142 -0
- package/lib/adapters/messaging-whatsapp/index.js +189 -0
- package/lib/adapters/shopping-base/index.js +208 -0
- package/lib/adapters/shopping-jd/index.js +150 -0
- package/lib/adapters/shopping-meituan/index.js +154 -0
- package/lib/adapters/shopping-taobao/index.js +176 -0
- package/lib/adapters/social-bilibili/index.js +171 -0
- package/lib/adapters/social-douyin/index.js +116 -0
- package/lib/adapters/social-weibo/index.js +164 -0
- package/lib/adapters/social-xiaohongshu/index.js +96 -0
- package/lib/adapters/system-data/disclosure.js +166 -0
- package/lib/adapters/system-data/index.js +34 -0
- package/lib/adapters/system-data/system-data-adapter.js +344 -0
- package/lib/adapters/travel-12306/index.js +151 -0
- package/lib/adapters/travel-amap/index.js +164 -0
- package/lib/adapters/travel-baidu-map/index.js +162 -0
- package/lib/adapters/travel-base/index.js +240 -0
- package/lib/adapters/travel-ctrip/index.js +151 -0
- package/lib/adapters/wechat/content-parser.js +326 -0
- package/lib/adapters/wechat/db-reader.js +209 -0
- package/lib/adapters/wechat/index.js +28 -0
- package/lib/adapters/wechat/key-extractor.js +158 -0
- package/lib/adapters/wechat/normalize.js +220 -0
- package/lib/adapters/wechat/wechat-adapter.js +205 -0
- package/lib/analysis-skills/base.js +113 -0
- package/lib/analysis-skills/footprint.js +167 -0
- package/lib/analysis-skills/index.js +58 -0
- package/lib/analysis-skills/interests.js +161 -0
- package/lib/analysis-skills/relations.js +226 -0
- package/lib/analysis-skills/spending.js +216 -0
- package/lib/analysis-skills/timeline.js +167 -0
- package/lib/entity-resolver/embedding-stage.js +198 -0
- package/lib/entity-resolver/entity-resolver.js +384 -0
- package/lib/entity-resolver/index.js +42 -0
- package/lib/entity-resolver/llm-stage.js +191 -0
- package/lib/entity-resolver/rule-stage.js +208 -0
- package/lib/entity-resolver/worker.js +149 -0
- package/lib/index.js +115 -0
- package/lib/migrations.js +73 -0
- package/lib/mobile-extractor/android.js +193 -0
- package/lib/mobile-extractor/index.js +9 -0
- package/lib/mobile-extractor/ios.js +223 -0
- package/lib/registry.js +42 -0
- package/lib/sidecar/index.js +15 -0
- package/lib/sidecar/supervisor.js +359 -0
- package/lib/vault.js +266 -0
- package/package.json +29 -3
- package/scripts/_make-fixture-all.js +126 -0
- package/scripts/_make-fixture-contacts.js +84 -0
- package/scripts/evaluate-entity-resolver.js +213 -0
- package/scripts/smoke-phase-5-5.js +196 -0
- package/scripts/smoke-phase-5-7.js +181 -0
- package/scripts/smoke-system-data-contacts.js +309 -0
- package/scripts/smoke-system-data.js +312 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SidecarSupervisor — manages the forensics-bridge Python sidecar lifecycle.
|
|
3
|
+
*
|
|
4
|
+
* Protocol: docs/design/Personal_Data_Hub_Python_Sidecar.md §3 (JSON-lines).
|
|
5
|
+
* Counterpart: packages/personal-data-hub-bridge/forensics_bridge/ipc_server.py.
|
|
6
|
+
*
|
|
7
|
+
* Responsibilities:
|
|
8
|
+
* - Spawn / health-check / restart the sidecar subprocess
|
|
9
|
+
* - Frame stdin/stdout as JSON-lines (one envelope per line)
|
|
10
|
+
* - Correlate request id → pending promise; route progress/chunk callbacks
|
|
11
|
+
* - Forward stderr logs as events for hub audit logging
|
|
12
|
+
*
|
|
13
|
+
* Non-goals (this layer):
|
|
14
|
+
* - Persisting credentials (caller passes them per-invoke)
|
|
15
|
+
* - Audit logging (caller subscribes to "log" events)
|
|
16
|
+
* - Schema validation of params (Python side enforces)
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
"use strict";
|
|
20
|
+
|
|
21
|
+
const { spawn } = require("node:child_process");
|
|
22
|
+
const { EventEmitter } = require("node:events");
|
|
23
|
+
const crypto = require("node:crypto");
|
|
24
|
+
const readline = require("node:readline");
|
|
25
|
+
|
|
26
|
+
const DEFAULT_TIMEOUT_MS = 60_000;
|
|
27
|
+
const DEFAULT_HEALTHCHECK_INTERVAL_MS = 30_000;
|
|
28
|
+
const STOP_GRACE_MS = 5_000;
|
|
29
|
+
|
|
30
|
+
class SidecarTimeoutError extends Error {
|
|
31
|
+
constructor(method, timeoutMs) {
|
|
32
|
+
super(`sidecar method '${method}' timed out after ${timeoutMs}ms`);
|
|
33
|
+
this.name = "SidecarTimeoutError";
|
|
34
|
+
this.code = "TIMEOUT";
|
|
35
|
+
this.retryable = true;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
class SidecarMethodError extends Error {
|
|
40
|
+
constructor({ code, msg, retryable }) {
|
|
41
|
+
super(msg || code);
|
|
42
|
+
this.name = "SidecarMethodError";
|
|
43
|
+
this.code = code;
|
|
44
|
+
this.retryable = Boolean(retryable);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
class SidecarNotRunningError extends Error {
|
|
49
|
+
constructor(method) {
|
|
50
|
+
super(`sidecar is not running; cannot invoke '${method}'`);
|
|
51
|
+
this.name = "SidecarNotRunningError";
|
|
52
|
+
this.code = "SIDECAR_NOT_RUNNING";
|
|
53
|
+
this.retryable = true;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
class SidecarSupervisor extends EventEmitter {
|
|
58
|
+
/**
|
|
59
|
+
* @param {object} opts
|
|
60
|
+
* @param {string|string[]} opts.command - Executable (string) or [exec, ...args] for spawn.
|
|
61
|
+
* @param {string[]} [opts.args] - Extra args appended after `command`.
|
|
62
|
+
* @param {string} [opts.cwd] - Working directory for the child.
|
|
63
|
+
* @param {object} [opts.env] - Env vars merged over process.env.
|
|
64
|
+
* @param {number} [opts.healthCheckIntervalMs] - 0 to disable.
|
|
65
|
+
* @param {number} [opts.defaultTimeoutMs] - Per-invoke default.
|
|
66
|
+
*/
|
|
67
|
+
constructor(opts) {
|
|
68
|
+
super();
|
|
69
|
+
if (!opts || !opts.command) {
|
|
70
|
+
throw new Error("SidecarSupervisor requires opts.command");
|
|
71
|
+
}
|
|
72
|
+
const [first, ...rest] = Array.isArray(opts.command)
|
|
73
|
+
? opts.command
|
|
74
|
+
: [opts.command];
|
|
75
|
+
this._exec = first;
|
|
76
|
+
this._args = [...rest, ...(opts.args || [])];
|
|
77
|
+
this._cwd = opts.cwd;
|
|
78
|
+
this._env = opts.env;
|
|
79
|
+
this._healthCheckIntervalMs =
|
|
80
|
+
opts.healthCheckIntervalMs ?? DEFAULT_HEALTHCHECK_INTERVAL_MS;
|
|
81
|
+
this._defaultTimeoutMs = opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
82
|
+
|
|
83
|
+
this._proc = null;
|
|
84
|
+
this._stdoutReader = null;
|
|
85
|
+
this._stderrReader = null;
|
|
86
|
+
this._pending = new Map(); // id → { resolve, reject, timer, onProgress, onChunk, method }
|
|
87
|
+
this._healthTimer = null;
|
|
88
|
+
this._stopping = false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Spawn the sidecar and verify it responds to sidecar.ping.
|
|
93
|
+
* Idempotent — calling start() twice returns the same ready promise.
|
|
94
|
+
*/
|
|
95
|
+
async start({ readyTimeoutMs = 5_000 } = {}) {
|
|
96
|
+
if (this._proc && !this._proc.killed) {
|
|
97
|
+
return; // already running
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const env = {
|
|
101
|
+
...process.env,
|
|
102
|
+
...(this._env || {}),
|
|
103
|
+
PYTHONIOENCODING: "utf-8",
|
|
104
|
+
PYTHONUNBUFFERED: "1",
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
this._proc = spawn(this._exec, this._args, {
|
|
108
|
+
cwd: this._cwd,
|
|
109
|
+
env,
|
|
110
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
111
|
+
windowsHide: true,
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
this._stopping = false;
|
|
115
|
+
this._wireStreams();
|
|
116
|
+
|
|
117
|
+
this._proc.on("error", (err) => {
|
|
118
|
+
this.emit("error", err);
|
|
119
|
+
this._failAllPending(err);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
this._proc.on("exit", (code, signal) => {
|
|
123
|
+
this.emit("exit", { code, signal });
|
|
124
|
+
const reason = new Error(
|
|
125
|
+
`sidecar exited (code=${code} signal=${signal})`,
|
|
126
|
+
);
|
|
127
|
+
this._failAllPending(reason);
|
|
128
|
+
this._teardown();
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
await this.invoke("sidecar.ping", {}, { timeoutMs: readyTimeoutMs });
|
|
132
|
+
this._scheduleHealthCheck();
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Invoke a sidecar method.
|
|
137
|
+
*
|
|
138
|
+
* @param {string} method
|
|
139
|
+
* @param {object} params
|
|
140
|
+
* @param {object} [opts]
|
|
141
|
+
* @param {number} [opts.timeoutMs]
|
|
142
|
+
* @param {(data: object) => void} [opts.onProgress] - invoked on progress envelopes
|
|
143
|
+
* @param {(data: object) => void} [opts.onChunk] - invoked on chunk envelopes
|
|
144
|
+
* @returns {Promise<object>} resolves with the final `result.data`
|
|
145
|
+
*/
|
|
146
|
+
invoke(method, params = {}, opts = {}) {
|
|
147
|
+
if (!this._proc || this._proc.killed || this._stopping) {
|
|
148
|
+
return Promise.reject(new SidecarNotRunningError(method));
|
|
149
|
+
}
|
|
150
|
+
const timeoutMs = opts.timeoutMs ?? this._defaultTimeoutMs;
|
|
151
|
+
const id =
|
|
152
|
+
typeof crypto.randomUUID === "function"
|
|
153
|
+
? crypto.randomUUID()
|
|
154
|
+
: `req-${Date.now()}-${Math.random().toString(16).slice(2)}`;
|
|
155
|
+
|
|
156
|
+
return new Promise((resolve, reject) => {
|
|
157
|
+
const timer = setTimeout(() => {
|
|
158
|
+
this._pending.delete(id);
|
|
159
|
+
// Best-effort cancel on the sidecar side; don't await the response.
|
|
160
|
+
this._writeLine({
|
|
161
|
+
id: `cancel-${id}`,
|
|
162
|
+
method: "request.cancel",
|
|
163
|
+
params: { id },
|
|
164
|
+
}).catch(() => {});
|
|
165
|
+
reject(new SidecarTimeoutError(method, timeoutMs));
|
|
166
|
+
}, timeoutMs);
|
|
167
|
+
// Allow the process to exit even if a sidecar invocation timer is pending.
|
|
168
|
+
if (typeof timer.unref === "function") timer.unref();
|
|
169
|
+
|
|
170
|
+
this._pending.set(id, {
|
|
171
|
+
resolve,
|
|
172
|
+
reject,
|
|
173
|
+
timer,
|
|
174
|
+
onProgress: opts.onProgress,
|
|
175
|
+
onChunk: opts.onChunk,
|
|
176
|
+
method,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
const envelope = { id, method, params, timeout_ms: timeoutMs };
|
|
180
|
+
this._writeLine(envelope).catch((err) => {
|
|
181
|
+
clearTimeout(timer);
|
|
182
|
+
this._pending.delete(id);
|
|
183
|
+
reject(err);
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Stop the sidecar: SIGTERM → wait grace → SIGKILL.
|
|
190
|
+
* Pending invocations reject with SidecarNotRunningError.
|
|
191
|
+
*/
|
|
192
|
+
async stop({ graceMs = STOP_GRACE_MS } = {}) {
|
|
193
|
+
if (!this._proc || this._proc.killed) {
|
|
194
|
+
this._teardown();
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
this._stopping = true;
|
|
198
|
+
if (this._healthTimer) {
|
|
199
|
+
clearInterval(this._healthTimer);
|
|
200
|
+
this._healthTimer = null;
|
|
201
|
+
}
|
|
202
|
+
const proc = this._proc;
|
|
203
|
+
const exited = new Promise((res) => proc.once("exit", res));
|
|
204
|
+
try {
|
|
205
|
+
proc.stdin?.end();
|
|
206
|
+
} catch (_err) {
|
|
207
|
+
/* already closed */
|
|
208
|
+
}
|
|
209
|
+
proc.kill("SIGTERM");
|
|
210
|
+
const killed = await Promise.race([
|
|
211
|
+
exited.then(() => true),
|
|
212
|
+
new Promise((res) => {
|
|
213
|
+
const t = setTimeout(() => res(false), graceMs);
|
|
214
|
+
if (typeof t.unref === "function") t.unref();
|
|
215
|
+
}),
|
|
216
|
+
]);
|
|
217
|
+
if (!killed) {
|
|
218
|
+
proc.kill("SIGKILL");
|
|
219
|
+
await exited;
|
|
220
|
+
}
|
|
221
|
+
this._teardown();
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
isRunning() {
|
|
225
|
+
return Boolean(this._proc) && !this._proc.killed && !this._stopping;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// -------------------------------------------------------------------------
|
|
229
|
+
// Internals
|
|
230
|
+
// -------------------------------------------------------------------------
|
|
231
|
+
|
|
232
|
+
_wireStreams() {
|
|
233
|
+
this._stdoutReader = readline.createInterface({
|
|
234
|
+
input: this._proc.stdout,
|
|
235
|
+
crlfDelay: Infinity,
|
|
236
|
+
});
|
|
237
|
+
this._stdoutReader.on("line", (line) => {
|
|
238
|
+
if (!line) return;
|
|
239
|
+
let envelope;
|
|
240
|
+
try {
|
|
241
|
+
envelope = JSON.parse(line);
|
|
242
|
+
} catch (err) {
|
|
243
|
+
this.emit("error", new Error(`invalid envelope from sidecar: ${line}`));
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
this._handleEnvelope(envelope);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
this._stderrReader = readline.createInterface({
|
|
250
|
+
input: this._proc.stderr,
|
|
251
|
+
crlfDelay: Infinity,
|
|
252
|
+
});
|
|
253
|
+
this._stderrReader.on("line", (line) => {
|
|
254
|
+
if (!line) return;
|
|
255
|
+
// Sidecar logs pino-style JSON; pass through for hub audit.
|
|
256
|
+
this.emit("log", line);
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
_handleEnvelope(env) {
|
|
261
|
+
// Envelopes with no id (e.g. INVALID_JSON parse failures) surface as events.
|
|
262
|
+
if (env.id === null || env.id === undefined) {
|
|
263
|
+
this.emit("orphan", env);
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
266
|
+
const pending = this._pending.get(env.id);
|
|
267
|
+
if (!pending) {
|
|
268
|
+
this.emit("orphan", env);
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (env.type === "progress") {
|
|
273
|
+
try {
|
|
274
|
+
pending.onProgress?.(env.data);
|
|
275
|
+
} catch (err) {
|
|
276
|
+
this.emit("error", err);
|
|
277
|
+
}
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
if (env.type === "chunk") {
|
|
281
|
+
try {
|
|
282
|
+
pending.onChunk?.(env.data);
|
|
283
|
+
} catch (err) {
|
|
284
|
+
this.emit("error", err);
|
|
285
|
+
}
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
// Terminal frames remove the pending entry.
|
|
289
|
+
clearTimeout(pending.timer);
|
|
290
|
+
this._pending.delete(env.id);
|
|
291
|
+
|
|
292
|
+
if (env.type === "result") {
|
|
293
|
+
pending.resolve(env.data);
|
|
294
|
+
} else if (env.type === "error") {
|
|
295
|
+
pending.reject(new SidecarMethodError(env.error || { code: "UNKNOWN" }));
|
|
296
|
+
} else {
|
|
297
|
+
pending.reject(
|
|
298
|
+
new SidecarMethodError({
|
|
299
|
+
code: "UNKNOWN_ENVELOPE_TYPE",
|
|
300
|
+
msg: `unexpected envelope type: ${env.type}`,
|
|
301
|
+
}),
|
|
302
|
+
);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
_writeLine(envelope) {
|
|
307
|
+
return new Promise((resolve, reject) => {
|
|
308
|
+
if (!this._proc || !this._proc.stdin || this._proc.stdin.destroyed) {
|
|
309
|
+
reject(new SidecarNotRunningError(envelope.method || "<unknown>"));
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
const line = JSON.stringify(envelope) + "\n";
|
|
313
|
+
this._proc.stdin.write(line, "utf8", (err) => {
|
|
314
|
+
if (err) reject(err);
|
|
315
|
+
else resolve();
|
|
316
|
+
});
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
_scheduleHealthCheck() {
|
|
321
|
+
if (!this._healthCheckIntervalMs) return;
|
|
322
|
+
if (this._healthTimer) clearInterval(this._healthTimer);
|
|
323
|
+
this._healthTimer = setInterval(() => {
|
|
324
|
+
this.invoke("sidecar.ping", {}, { timeoutMs: 3_000 }).catch((err) => {
|
|
325
|
+
this.emit("healthCheckFailed", err);
|
|
326
|
+
});
|
|
327
|
+
}, this._healthCheckIntervalMs);
|
|
328
|
+
if (typeof this._healthTimer.unref === "function") {
|
|
329
|
+
this._healthTimer.unref();
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
_failAllPending(err) {
|
|
334
|
+
for (const [, pending] of this._pending) {
|
|
335
|
+
clearTimeout(pending.timer);
|
|
336
|
+
pending.reject(err);
|
|
337
|
+
}
|
|
338
|
+
this._pending.clear();
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
_teardown() {
|
|
342
|
+
if (this._healthTimer) {
|
|
343
|
+
clearInterval(this._healthTimer);
|
|
344
|
+
this._healthTimer = null;
|
|
345
|
+
}
|
|
346
|
+
this._stdoutReader?.close();
|
|
347
|
+
this._stderrReader?.close();
|
|
348
|
+
this._stdoutReader = null;
|
|
349
|
+
this._stderrReader = null;
|
|
350
|
+
this._proc = null;
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
module.exports = {
|
|
355
|
+
SidecarSupervisor,
|
|
356
|
+
SidecarTimeoutError,
|
|
357
|
+
SidecarMethodError,
|
|
358
|
+
SidecarNotRunningError,
|
|
359
|
+
};
|
package/lib/vault.js
CHANGED
|
@@ -37,6 +37,13 @@ const DEFAULT_CIPHER_PAGE_SIZE = 4096;
|
|
|
37
37
|
|
|
38
38
|
// ─── Helpers ─────────────────────────────────────────────────────────────
|
|
39
39
|
|
|
40
|
+
function newGroupId() {
|
|
41
|
+
// Lightweight uuid v4-ish for merge_groups.id. Doesn't need crypto
|
|
42
|
+
// strength — uniqueness within one user's vault is enough.
|
|
43
|
+
const r = () => Math.random().toString(16).slice(2, 10);
|
|
44
|
+
return `mg-${r()}${r()}-${Date.now().toString(36)}`;
|
|
45
|
+
}
|
|
46
|
+
|
|
40
47
|
function loadDriver() {
|
|
41
48
|
// Lazy require so consumers that only need schemas don't pay for the
|
|
42
49
|
// native binding load. Errors surface here with a precise message.
|
|
@@ -716,6 +723,9 @@ class LocalVault {
|
|
|
716
723
|
stats() {
|
|
717
724
|
const db = this._requireOpen();
|
|
718
725
|
const count = (tbl) => db.prepare(`SELECT COUNT(*) as n FROM ${tbl}`).get().n;
|
|
726
|
+
const safeCount = (tbl) => {
|
|
727
|
+
try { return count(tbl); } catch (_e) { return 0; }
|
|
728
|
+
};
|
|
719
729
|
return {
|
|
720
730
|
schemaVersion: getSchemaVersion(db),
|
|
721
731
|
events: count("events"),
|
|
@@ -726,9 +736,265 @@ class LocalVault {
|
|
|
726
736
|
rawEvents: count("raw_events"),
|
|
727
737
|
auditLog: count("audit_log"),
|
|
728
738
|
watermarks: count("sync_watermarks"),
|
|
739
|
+
// Phase 8 — EntityResolver tables (safeCount because v1 vaults
|
|
740
|
+
// don't have these yet until migrate).
|
|
741
|
+
mergeGroups: safeCount("merge_groups"),
|
|
742
|
+
mergeMembers: safeCount("merge_members"),
|
|
743
|
+
resolveQueue: safeCount("resolve_queue"),
|
|
744
|
+
reviewQueue: safeCount("review_queue"),
|
|
745
|
+
resolveDecisions: safeCount("resolve_decisions"),
|
|
729
746
|
};
|
|
730
747
|
}
|
|
731
748
|
|
|
749
|
+
// ─── Phase 8 EntityResolver helpers ───────────────────────────────────
|
|
750
|
+
|
|
751
|
+
/**
|
|
752
|
+
* Insert a new pending row into resolve_queue. Idempotent — already-
|
|
753
|
+
* pending rows for the same person are not duplicated. Returns the
|
|
754
|
+
* row id (existing or newly inserted).
|
|
755
|
+
*/
|
|
756
|
+
enqueueResolve(personId) {
|
|
757
|
+
if (typeof personId !== "string" || personId.length === 0) {
|
|
758
|
+
throw new Error("enqueueResolve: personId required");
|
|
759
|
+
}
|
|
760
|
+
const db = this._requireOpen();
|
|
761
|
+
const existing = db.prepare(
|
|
762
|
+
"SELECT id FROM resolve_queue WHERE person_id = ? AND status IN ('pending','in-progress')"
|
|
763
|
+
).get(personId);
|
|
764
|
+
if (existing) return existing.id;
|
|
765
|
+
const info = db.prepare(
|
|
766
|
+
"INSERT INTO resolve_queue (person_id, enqueued_at, status) VALUES (?, ?, 'pending')"
|
|
767
|
+
).run(personId, Date.now());
|
|
768
|
+
return info.lastInsertRowid;
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
/**
|
|
772
|
+
* Pull up to `limit` pending rows + atomically mark them in-progress.
|
|
773
|
+
* Returns [{id, person_id, attempts}, ...].
|
|
774
|
+
*/
|
|
775
|
+
claimResolveBatch(limit = 50) {
|
|
776
|
+
const db = this._requireOpen();
|
|
777
|
+
const tx = db.transaction(() => {
|
|
778
|
+
const rows = db.prepare(
|
|
779
|
+
"SELECT id, person_id, attempts FROM resolve_queue WHERE status = 'pending' ORDER BY enqueued_at LIMIT ?"
|
|
780
|
+
).all(limit);
|
|
781
|
+
if (rows.length === 0) return [];
|
|
782
|
+
const stmt = db.prepare(
|
|
783
|
+
"UPDATE resolve_queue SET status = 'in-progress', attempts = attempts + 1 WHERE id = ?"
|
|
784
|
+
);
|
|
785
|
+
for (const r of rows) stmt.run(r.id);
|
|
786
|
+
return rows;
|
|
787
|
+
});
|
|
788
|
+
return tx();
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
/**
|
|
792
|
+
* Mark a resolve_queue row as done (success path).
|
|
793
|
+
*/
|
|
794
|
+
completeResolve(queueId) {
|
|
795
|
+
const db = this._requireOpen();
|
|
796
|
+
db.prepare("UPDATE resolve_queue SET status = 'done' WHERE id = ?").run(queueId);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
/**
|
|
800
|
+
* Mark a resolve_queue row as errored (retry-eligible if attempts < 3).
|
|
801
|
+
*/
|
|
802
|
+
errorResolve(queueId, errMsg) {
|
|
803
|
+
const db = this._requireOpen();
|
|
804
|
+
// If attempts < 3, leave status 'pending' for retry; else 'error'
|
|
805
|
+
db.prepare(
|
|
806
|
+
`UPDATE resolve_queue
|
|
807
|
+
SET status = CASE WHEN attempts >= 3 THEN 'error' ELSE 'pending' END,
|
|
808
|
+
last_error = ?
|
|
809
|
+
WHERE id = ?`
|
|
810
|
+
).run(errMsg || "unknown", queueId);
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
/**
|
|
814
|
+
* Record a resolve_decisions row. Lex-orders the two ids so each pair
|
|
815
|
+
* is stored only once. Returns inserted-or-updated row.
|
|
816
|
+
*/
|
|
817
|
+
recordResolveDecision({ aId, bId, verdict, confidence, decidedBy, reason }) {
|
|
818
|
+
const db = this._requireOpen();
|
|
819
|
+
const [lo, hi] = aId < bId ? [aId, bId] : [bId, aId];
|
|
820
|
+
db.prepare(
|
|
821
|
+
`INSERT INTO resolve_decisions
|
|
822
|
+
(a_person_id, b_person_id, verdict, confidence, decided_at, decided_by, reason)
|
|
823
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
824
|
+
ON CONFLICT(a_person_id, b_person_id) DO UPDATE SET
|
|
825
|
+
verdict = excluded.verdict,
|
|
826
|
+
confidence = excluded.confidence,
|
|
827
|
+
decided_at = excluded.decided_at,
|
|
828
|
+
decided_by = excluded.decided_by,
|
|
829
|
+
reason = excluded.reason`
|
|
830
|
+
).run(lo, hi, verdict, confidence, Date.now(), decidedBy || "rule", reason || null);
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
getResolveDecision(aId, bId) {
|
|
834
|
+
const db = this._requireOpen();
|
|
835
|
+
const [lo, hi] = aId < bId ? [aId, bId] : [bId, aId];
|
|
836
|
+
return db.prepare(
|
|
837
|
+
"SELECT * FROM resolve_decisions WHERE a_person_id = ? AND b_person_id = ?"
|
|
838
|
+
).get(lo, hi);
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
/**
|
|
842
|
+
* Merge a pair into a merge_group. If either side already belongs to a
|
|
843
|
+
* group, the other side joins it (and the two groups merge if both
|
|
844
|
+
* already existed). Returns the resulting group_id.
|
|
845
|
+
*/
|
|
846
|
+
mergePair({ aId, bId, joinedBy = "rule" }) {
|
|
847
|
+
const db = this._requireOpen();
|
|
848
|
+
const tx = db.transaction(() => {
|
|
849
|
+
const aGroup = db.prepare("SELECT group_id FROM merge_members WHERE person_id = ?").get(aId);
|
|
850
|
+
const bGroup = db.prepare("SELECT group_id FROM merge_members WHERE person_id = ?").get(bId);
|
|
851
|
+
const now = Date.now();
|
|
852
|
+
|
|
853
|
+
if (aGroup && bGroup && aGroup.group_id === bGroup.group_id) {
|
|
854
|
+
return aGroup.group_id; // already same group
|
|
855
|
+
}
|
|
856
|
+
if (aGroup && bGroup) {
|
|
857
|
+
// Merge two existing groups → keep aGroup, move bGroup members in
|
|
858
|
+
db.prepare(
|
|
859
|
+
"UPDATE merge_members SET group_id = ? WHERE group_id = ?"
|
|
860
|
+
).run(aGroup.group_id, bGroup.group_id);
|
|
861
|
+
db.prepare("DELETE FROM merge_groups WHERE id = ?").run(bGroup.group_id);
|
|
862
|
+
db.prepare(
|
|
863
|
+
"UPDATE merge_groups SET member_count = (SELECT COUNT(*) FROM merge_members WHERE group_id = ?), last_updated = ? WHERE id = ?"
|
|
864
|
+
).run(aGroup.group_id, now, aGroup.group_id);
|
|
865
|
+
return aGroup.group_id;
|
|
866
|
+
}
|
|
867
|
+
if (aGroup) {
|
|
868
|
+
// Add b to a's group
|
|
869
|
+
db.prepare(
|
|
870
|
+
"INSERT INTO merge_members (group_id, person_id, joined_at, joined_by) VALUES (?, ?, ?, ?)"
|
|
871
|
+
).run(aGroup.group_id, bId, now, joinedBy);
|
|
872
|
+
db.prepare(
|
|
873
|
+
"UPDATE merge_groups SET member_count = member_count + 1, last_updated = ? WHERE id = ?"
|
|
874
|
+
).run(now, aGroup.group_id);
|
|
875
|
+
return aGroup.group_id;
|
|
876
|
+
}
|
|
877
|
+
if (bGroup) {
|
|
878
|
+
db.prepare(
|
|
879
|
+
"INSERT INTO merge_members (group_id, person_id, joined_at, joined_by) VALUES (?, ?, ?, ?)"
|
|
880
|
+
).run(bGroup.group_id, aId, now, joinedBy);
|
|
881
|
+
db.prepare(
|
|
882
|
+
"UPDATE merge_groups SET member_count = member_count + 1, last_updated = ? WHERE id = ?"
|
|
883
|
+
).run(now, bGroup.group_id);
|
|
884
|
+
return bGroup.group_id;
|
|
885
|
+
}
|
|
886
|
+
// Neither in any group — create new
|
|
887
|
+
const groupId = newGroupId();
|
|
888
|
+
db.prepare(
|
|
889
|
+
"INSERT INTO merge_groups (id, primary_id, member_count, created_at, last_updated) VALUES (?, ?, 2, ?, ?)"
|
|
890
|
+
).run(groupId, aId, now, now);
|
|
891
|
+
const ins = db.prepare(
|
|
892
|
+
"INSERT INTO merge_members (group_id, person_id, joined_at, joined_by) VALUES (?, ?, ?, ?)"
|
|
893
|
+
);
|
|
894
|
+
ins.run(groupId, aId, now, joinedBy);
|
|
895
|
+
ins.run(groupId, bId, now, joinedBy);
|
|
896
|
+
return groupId;
|
|
897
|
+
});
|
|
898
|
+
return tx();
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
/**
|
|
902
|
+
* Remove a person from its merge group (unmerge). If only one member
|
|
903
|
+
* remains, the group is deleted entirely.
|
|
904
|
+
*/
|
|
905
|
+
unmergePerson(personId) {
|
|
906
|
+
const db = this._requireOpen();
|
|
907
|
+
const tx = db.transaction(() => {
|
|
908
|
+
const row = db.prepare(
|
|
909
|
+
"SELECT group_id FROM merge_members WHERE person_id = ?"
|
|
910
|
+
).get(personId);
|
|
911
|
+
if (!row) return { ok: false, reason: "not in any group" };
|
|
912
|
+
const groupId = row.group_id;
|
|
913
|
+
db.prepare("DELETE FROM merge_members WHERE person_id = ?").run(personId);
|
|
914
|
+
const remaining = db.prepare(
|
|
915
|
+
"SELECT COUNT(*) as n FROM merge_members WHERE group_id = ?"
|
|
916
|
+
).get(groupId).n;
|
|
917
|
+
if (remaining < 2) {
|
|
918
|
+
// Group of 1 or 0 — delete the group + remaining member row
|
|
919
|
+
db.prepare("DELETE FROM merge_members WHERE group_id = ?").run(groupId);
|
|
920
|
+
db.prepare("DELETE FROM merge_groups WHERE id = ?").run(groupId);
|
|
921
|
+
} else {
|
|
922
|
+
db.prepare(
|
|
923
|
+
"UPDATE merge_groups SET member_count = ?, last_updated = ? WHERE id = ?"
|
|
924
|
+
).run(remaining, Date.now(), groupId);
|
|
925
|
+
}
|
|
926
|
+
return { ok: true, groupId, remaining };
|
|
927
|
+
});
|
|
928
|
+
return tx();
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
/**
|
|
932
|
+
* Get all person ids in the same merge group as the given person.
|
|
933
|
+
* Returns [personId, ...] including the input (whether or not it's in
|
|
934
|
+
* a group — a "group of 1" is just `[personId]`).
|
|
935
|
+
*/
|
|
936
|
+
getMergeGroupMembers(personId) {
|
|
937
|
+
const db = this._requireOpen();
|
|
938
|
+
const groupRow = db.prepare(
|
|
939
|
+
"SELECT group_id FROM merge_members WHERE person_id = ?"
|
|
940
|
+
).get(personId);
|
|
941
|
+
if (!groupRow) return [personId];
|
|
942
|
+
return db.prepare(
|
|
943
|
+
"SELECT person_id FROM merge_members WHERE group_id = ? ORDER BY joined_at"
|
|
944
|
+
).all(groupRow.group_id).map((r) => r.person_id);
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
/**
|
|
948
|
+
* Insert a row into review_queue when the LLM stage returns "maybe".
|
|
949
|
+
* UI lists these for user one-click decisions.
|
|
950
|
+
*/
|
|
951
|
+
enqueueReview({ aId, bId, embedSim, llmVerdict, llmReason, llmConfidence }) {
|
|
952
|
+
const db = this._requireOpen();
|
|
953
|
+
const [lo, hi] = aId < bId ? [aId, bId] : [bId, aId];
|
|
954
|
+
const info = db.prepare(
|
|
955
|
+
`INSERT INTO review_queue
|
|
956
|
+
(a_person_id, b_person_id, embed_sim, llm_verdict, llm_reason, llm_confidence, enqueued_at)
|
|
957
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
958
|
+
).run(lo, hi, embedSim || null, llmVerdict || null, llmReason || null, llmConfidence || null, Date.now());
|
|
959
|
+
return info.lastInsertRowid;
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
/**
|
|
963
|
+
* List pending review rows (oldest first).
|
|
964
|
+
*/
|
|
965
|
+
listReviewQueue({ limit = 50 } = {}) {
|
|
966
|
+
const db = this._requireOpen();
|
|
967
|
+
return db.prepare(
|
|
968
|
+
"SELECT * FROM review_queue WHERE reviewed_at IS NULL ORDER BY enqueued_at ASC LIMIT ?"
|
|
969
|
+
).all(Math.min(limit, 1000));
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
/**
|
|
973
|
+
* Mark a review row as decided by the user.
|
|
974
|
+
*/
|
|
975
|
+
recordReviewDecision({ reviewId, decision }) {
|
|
976
|
+
if (!["same", "different", "skip"].includes(decision)) {
|
|
977
|
+
throw new Error(`invalid review decision: ${decision}`);
|
|
978
|
+
}
|
|
979
|
+
const db = this._requireOpen();
|
|
980
|
+
const row = db.prepare("SELECT * FROM review_queue WHERE id = ?").get(reviewId);
|
|
981
|
+
if (!row) throw new Error(`review row ${reviewId} not found`);
|
|
982
|
+
db.prepare(
|
|
983
|
+
"UPDATE review_queue SET reviewed_at = ?, user_decision = ? WHERE id = ?"
|
|
984
|
+
).run(Date.now(), decision, reviewId);
|
|
985
|
+
return row;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
resolveQueueStats() {
|
|
989
|
+
const db = this._requireOpen();
|
|
990
|
+
const rows = db.prepare(
|
|
991
|
+
"SELECT status, COUNT(*) as n FROM resolve_queue GROUP BY status"
|
|
992
|
+
).all();
|
|
993
|
+
const out = { pending: 0, "in-progress": 0, done: 0, error: 0 };
|
|
994
|
+
for (const r of rows) out[r.status] = r.n;
|
|
995
|
+
return out;
|
|
996
|
+
}
|
|
997
|
+
|
|
732
998
|
// ─── Key rotation ──────────────────────────────────────────────────────
|
|
733
999
|
|
|
734
1000
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|
|
@@ -26,7 +26,31 @@
|
|
|
26
26
|
"./bridges/cc-llm-adapter": "./lib/bridges/cc-llm-adapter.js",
|
|
27
27
|
"./bridges/cc-kg-sink": "./lib/bridges/cc-kg-sink.js",
|
|
28
28
|
"./bridges/cc-rag-sink": "./lib/bridges/cc-rag-sink.js",
|
|
29
|
-
"./adapters/email-imap": "./lib/adapters/email-imap/index.js"
|
|
29
|
+
"./adapters/email-imap": "./lib/adapters/email-imap/index.js",
|
|
30
|
+
"./adapters/alipay-bill": "./lib/adapters/alipay-bill/index.js",
|
|
31
|
+
"./adapters/system-data": "./lib/adapters/system-data/index.js",
|
|
32
|
+
"./entity-resolver": "./lib/entity-resolver/index.js",
|
|
33
|
+
"./analysis-skills": "./lib/analysis-skills/index.js",
|
|
34
|
+
"./mobile-extractor": "./lib/mobile-extractor/index.js",
|
|
35
|
+
"./adapters/wechat": "./lib/adapters/wechat/index.js",
|
|
36
|
+
"./adapters/ai-chat-history": "./lib/adapters/ai-chat-history/index.js",
|
|
37
|
+
"./adapters/travel-base": "./lib/adapters/travel-base/index.js",
|
|
38
|
+
"./adapters/travel-12306": "./lib/adapters/travel-12306/index.js",
|
|
39
|
+
"./adapters/travel-ctrip": "./lib/adapters/travel-ctrip/index.js",
|
|
40
|
+
"./adapters/travel-amap": "./lib/adapters/travel-amap/index.js",
|
|
41
|
+
"./adapters/travel-baidu-map": "./lib/adapters/travel-baidu-map/index.js",
|
|
42
|
+
"./adapters/shopping-base": "./lib/adapters/shopping-base/index.js",
|
|
43
|
+
"./adapters/shopping-taobao": "./lib/adapters/shopping-taobao/index.js",
|
|
44
|
+
"./adapters/shopping-jd": "./lib/adapters/shopping-jd/index.js",
|
|
45
|
+
"./adapters/shopping-meituan": "./lib/adapters/shopping-meituan/index.js",
|
|
46
|
+
"./adapters/social-bilibili": "./lib/adapters/social-bilibili/index.js",
|
|
47
|
+
"./adapters/social-weibo": "./lib/adapters/social-weibo/index.js",
|
|
48
|
+
"./adapters/social-douyin": "./lib/adapters/social-douyin/index.js",
|
|
49
|
+
"./adapters/social-xiaohongshu": "./lib/adapters/social-xiaohongshu/index.js",
|
|
50
|
+
"./adapters/messaging-qq": "./lib/adapters/messaging-qq/index.js",
|
|
51
|
+
"./adapters/messaging-telegram": "./lib/adapters/messaging-telegram/index.js",
|
|
52
|
+
"./adapters/messaging-whatsapp": "./lib/adapters/messaging-whatsapp/index.js",
|
|
53
|
+
"./sidecar": "./lib/sidecar/index.js"
|
|
30
54
|
},
|
|
31
55
|
"scripts": {
|
|
32
56
|
"test": "vitest run",
|
|
@@ -55,7 +79,9 @@
|
|
|
55
79
|
"mailparser": "^3.7.1"
|
|
56
80
|
},
|
|
57
81
|
"optionalDependencies": {
|
|
58
|
-
"imapflow": "^1.0.183"
|
|
82
|
+
"imapflow": "^1.0.183",
|
|
83
|
+
"adm-zip": "^0.5.16",
|
|
84
|
+
"iconv-lite": "^0.6.3"
|
|
59
85
|
},
|
|
60
86
|
"devDependencies": {
|
|
61
87
|
"vitest": "^4.1.5"
|