@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,359 @@
1
+ /**
2
+ * SidecarSupervisor — manages the forensics-bridge Python sidecar lifecycle.
3
+ *
4
+ * Protocol: docs/design/Personal_Data_Hub_Python_Sidecar.md §3 (JSON-lines).
5
+ * Counterpart: packages/personal-data-hub-bridge/forensics_bridge/ipc_server.py.
6
+ *
7
+ * Responsibilities:
8
+ * - Spawn / health-check / restart the sidecar subprocess
9
+ * - Frame stdin/stdout as JSON-lines (one envelope per line)
10
+ * - Correlate request id → pending promise; route progress/chunk callbacks
11
+ * - Forward stderr logs as events for hub audit logging
12
+ *
13
+ * Non-goals (this layer):
14
+ * - Persisting credentials (caller passes them per-invoke)
15
+ * - Audit logging (caller subscribes to "log" events)
16
+ * - Schema validation of params (Python side enforces)
17
+ */
18
+
19
+ "use strict";
20
+
21
+ const { spawn } = require("node:child_process");
22
+ const { EventEmitter } = require("node:events");
23
+ const crypto = require("node:crypto");
24
+ const readline = require("node:readline");
25
+
26
+ const DEFAULT_TIMEOUT_MS = 60_000;
27
+ const DEFAULT_HEALTHCHECK_INTERVAL_MS = 30_000;
28
+ const STOP_GRACE_MS = 5_000;
29
+
30
+ class SidecarTimeoutError extends Error {
31
+ constructor(method, timeoutMs) {
32
+ super(`sidecar method '${method}' timed out after ${timeoutMs}ms`);
33
+ this.name = "SidecarTimeoutError";
34
+ this.code = "TIMEOUT";
35
+ this.retryable = true;
36
+ }
37
+ }
38
+
39
+ class SidecarMethodError extends Error {
40
+ constructor({ code, msg, retryable }) {
41
+ super(msg || code);
42
+ this.name = "SidecarMethodError";
43
+ this.code = code;
44
+ this.retryable = Boolean(retryable);
45
+ }
46
+ }
47
+
48
+ class SidecarNotRunningError extends Error {
49
+ constructor(method) {
50
+ super(`sidecar is not running; cannot invoke '${method}'`);
51
+ this.name = "SidecarNotRunningError";
52
+ this.code = "SIDECAR_NOT_RUNNING";
53
+ this.retryable = true;
54
+ }
55
+ }
56
+
57
+ class SidecarSupervisor extends EventEmitter {
58
+ /**
59
+ * @param {object} opts
60
+ * @param {string|string[]} opts.command - Executable (string) or [exec, ...args] for spawn.
61
+ * @param {string[]} [opts.args] - Extra args appended after `command`.
62
+ * @param {string} [opts.cwd] - Working directory for the child.
63
+ * @param {object} [opts.env] - Env vars merged over process.env.
64
+ * @param {number} [opts.healthCheckIntervalMs] - 0 to disable.
65
+ * @param {number} [opts.defaultTimeoutMs] - Per-invoke default.
66
+ */
67
+ constructor(opts) {
68
+ super();
69
+ if (!opts || !opts.command) {
70
+ throw new Error("SidecarSupervisor requires opts.command");
71
+ }
72
+ const [first, ...rest] = Array.isArray(opts.command)
73
+ ? opts.command
74
+ : [opts.command];
75
+ this._exec = first;
76
+ this._args = [...rest, ...(opts.args || [])];
77
+ this._cwd = opts.cwd;
78
+ this._env = opts.env;
79
+ this._healthCheckIntervalMs =
80
+ opts.healthCheckIntervalMs ?? DEFAULT_HEALTHCHECK_INTERVAL_MS;
81
+ this._defaultTimeoutMs = opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS;
82
+
83
+ this._proc = null;
84
+ this._stdoutReader = null;
85
+ this._stderrReader = null;
86
+ this._pending = new Map(); // id → { resolve, reject, timer, onProgress, onChunk, method }
87
+ this._healthTimer = null;
88
+ this._stopping = false;
89
+ }
90
+
91
+ /**
92
+ * Spawn the sidecar and verify it responds to sidecar.ping.
93
+ * Idempotent — calling start() twice returns the same ready promise.
94
+ */
95
+ async start({ readyTimeoutMs = 5_000 } = {}) {
96
+ if (this._proc && !this._proc.killed) {
97
+ return; // already running
98
+ }
99
+
100
+ const env = {
101
+ ...process.env,
102
+ ...(this._env || {}),
103
+ PYTHONIOENCODING: "utf-8",
104
+ PYTHONUNBUFFERED: "1",
105
+ };
106
+
107
+ this._proc = spawn(this._exec, this._args, {
108
+ cwd: this._cwd,
109
+ env,
110
+ stdio: ["pipe", "pipe", "pipe"],
111
+ windowsHide: true,
112
+ });
113
+
114
+ this._stopping = false;
115
+ this._wireStreams();
116
+
117
+ this._proc.on("error", (err) => {
118
+ this.emit("error", err);
119
+ this._failAllPending(err);
120
+ });
121
+
122
+ this._proc.on("exit", (code, signal) => {
123
+ this.emit("exit", { code, signal });
124
+ const reason = new Error(
125
+ `sidecar exited (code=${code} signal=${signal})`,
126
+ );
127
+ this._failAllPending(reason);
128
+ this._teardown();
129
+ });
130
+
131
+ await this.invoke("sidecar.ping", {}, { timeoutMs: readyTimeoutMs });
132
+ this._scheduleHealthCheck();
133
+ }
134
+
135
+ /**
136
+ * Invoke a sidecar method.
137
+ *
138
+ * @param {string} method
139
+ * @param {object} params
140
+ * @param {object} [opts]
141
+ * @param {number} [opts.timeoutMs]
142
+ * @param {(data: object) => void} [opts.onProgress] - invoked on progress envelopes
143
+ * @param {(data: object) => void} [opts.onChunk] - invoked on chunk envelopes
144
+ * @returns {Promise<object>} resolves with the final `result.data`
145
+ */
146
+ invoke(method, params = {}, opts = {}) {
147
+ if (!this._proc || this._proc.killed || this._stopping) {
148
+ return Promise.reject(new SidecarNotRunningError(method));
149
+ }
150
+ const timeoutMs = opts.timeoutMs ?? this._defaultTimeoutMs;
151
+ const id =
152
+ typeof crypto.randomUUID === "function"
153
+ ? crypto.randomUUID()
154
+ : `req-${Date.now()}-${Math.random().toString(16).slice(2)}`;
155
+
156
+ return new Promise((resolve, reject) => {
157
+ const timer = setTimeout(() => {
158
+ this._pending.delete(id);
159
+ // Best-effort cancel on the sidecar side; don't await the response.
160
+ this._writeLine({
161
+ id: `cancel-${id}`,
162
+ method: "request.cancel",
163
+ params: { id },
164
+ }).catch(() => {});
165
+ reject(new SidecarTimeoutError(method, timeoutMs));
166
+ }, timeoutMs);
167
+ // Allow the process to exit even if a sidecar invocation timer is pending.
168
+ if (typeof timer.unref === "function") timer.unref();
169
+
170
+ this._pending.set(id, {
171
+ resolve,
172
+ reject,
173
+ timer,
174
+ onProgress: opts.onProgress,
175
+ onChunk: opts.onChunk,
176
+ method,
177
+ });
178
+
179
+ const envelope = { id, method, params, timeout_ms: timeoutMs };
180
+ this._writeLine(envelope).catch((err) => {
181
+ clearTimeout(timer);
182
+ this._pending.delete(id);
183
+ reject(err);
184
+ });
185
+ });
186
+ }
187
+
188
+ /**
189
+ * Stop the sidecar: SIGTERM → wait grace → SIGKILL.
190
+ * Pending invocations reject with SidecarNotRunningError.
191
+ */
192
+ async stop({ graceMs = STOP_GRACE_MS } = {}) {
193
+ if (!this._proc || this._proc.killed) {
194
+ this._teardown();
195
+ return;
196
+ }
197
+ this._stopping = true;
198
+ if (this._healthTimer) {
199
+ clearInterval(this._healthTimer);
200
+ this._healthTimer = null;
201
+ }
202
+ const proc = this._proc;
203
+ const exited = new Promise((res) => proc.once("exit", res));
204
+ try {
205
+ proc.stdin?.end();
206
+ } catch (_err) {
207
+ /* already closed */
208
+ }
209
+ proc.kill("SIGTERM");
210
+ const killed = await Promise.race([
211
+ exited.then(() => true),
212
+ new Promise((res) => {
213
+ const t = setTimeout(() => res(false), graceMs);
214
+ if (typeof t.unref === "function") t.unref();
215
+ }),
216
+ ]);
217
+ if (!killed) {
218
+ proc.kill("SIGKILL");
219
+ await exited;
220
+ }
221
+ this._teardown();
222
+ }
223
+
224
+ isRunning() {
225
+ return Boolean(this._proc) && !this._proc.killed && !this._stopping;
226
+ }
227
+
228
+ // -------------------------------------------------------------------------
229
+ // Internals
230
+ // -------------------------------------------------------------------------
231
+
232
+ _wireStreams() {
233
+ this._stdoutReader = readline.createInterface({
234
+ input: this._proc.stdout,
235
+ crlfDelay: Infinity,
236
+ });
237
+ this._stdoutReader.on("line", (line) => {
238
+ if (!line) return;
239
+ let envelope;
240
+ try {
241
+ envelope = JSON.parse(line);
242
+ } catch (err) {
243
+ this.emit("error", new Error(`invalid envelope from sidecar: ${line}`));
244
+ return;
245
+ }
246
+ this._handleEnvelope(envelope);
247
+ });
248
+
249
+ this._stderrReader = readline.createInterface({
250
+ input: this._proc.stderr,
251
+ crlfDelay: Infinity,
252
+ });
253
+ this._stderrReader.on("line", (line) => {
254
+ if (!line) return;
255
+ // Sidecar logs pino-style JSON; pass through for hub audit.
256
+ this.emit("log", line);
257
+ });
258
+ }
259
+
260
+ _handleEnvelope(env) {
261
+ // Envelopes with no id (e.g. INVALID_JSON parse failures) surface as events.
262
+ if (env.id === null || env.id === undefined) {
263
+ this.emit("orphan", env);
264
+ return;
265
+ }
266
+ const pending = this._pending.get(env.id);
267
+ if (!pending) {
268
+ this.emit("orphan", env);
269
+ return;
270
+ }
271
+
272
+ if (env.type === "progress") {
273
+ try {
274
+ pending.onProgress?.(env.data);
275
+ } catch (err) {
276
+ this.emit("error", err);
277
+ }
278
+ return;
279
+ }
280
+ if (env.type === "chunk") {
281
+ try {
282
+ pending.onChunk?.(env.data);
283
+ } catch (err) {
284
+ this.emit("error", err);
285
+ }
286
+ return;
287
+ }
288
+ // Terminal frames remove the pending entry.
289
+ clearTimeout(pending.timer);
290
+ this._pending.delete(env.id);
291
+
292
+ if (env.type === "result") {
293
+ pending.resolve(env.data);
294
+ } else if (env.type === "error") {
295
+ pending.reject(new SidecarMethodError(env.error || { code: "UNKNOWN" }));
296
+ } else {
297
+ pending.reject(
298
+ new SidecarMethodError({
299
+ code: "UNKNOWN_ENVELOPE_TYPE",
300
+ msg: `unexpected envelope type: ${env.type}`,
301
+ }),
302
+ );
303
+ }
304
+ }
305
+
306
+ _writeLine(envelope) {
307
+ return new Promise((resolve, reject) => {
308
+ if (!this._proc || !this._proc.stdin || this._proc.stdin.destroyed) {
309
+ reject(new SidecarNotRunningError(envelope.method || "<unknown>"));
310
+ return;
311
+ }
312
+ const line = JSON.stringify(envelope) + "\n";
313
+ this._proc.stdin.write(line, "utf8", (err) => {
314
+ if (err) reject(err);
315
+ else resolve();
316
+ });
317
+ });
318
+ }
319
+
320
+ _scheduleHealthCheck() {
321
+ if (!this._healthCheckIntervalMs) return;
322
+ if (this._healthTimer) clearInterval(this._healthTimer);
323
+ this._healthTimer = setInterval(() => {
324
+ this.invoke("sidecar.ping", {}, { timeoutMs: 3_000 }).catch((err) => {
325
+ this.emit("healthCheckFailed", err);
326
+ });
327
+ }, this._healthCheckIntervalMs);
328
+ if (typeof this._healthTimer.unref === "function") {
329
+ this._healthTimer.unref();
330
+ }
331
+ }
332
+
333
+ _failAllPending(err) {
334
+ for (const [, pending] of this._pending) {
335
+ clearTimeout(pending.timer);
336
+ pending.reject(err);
337
+ }
338
+ this._pending.clear();
339
+ }
340
+
341
+ _teardown() {
342
+ if (this._healthTimer) {
343
+ clearInterval(this._healthTimer);
344
+ this._healthTimer = null;
345
+ }
346
+ this._stdoutReader?.close();
347
+ this._stderrReader?.close();
348
+ this._stdoutReader = null;
349
+ this._stderrReader = null;
350
+ this._proc = null;
351
+ }
352
+ }
353
+
354
+ module.exports = {
355
+ SidecarSupervisor,
356
+ SidecarTimeoutError,
357
+ SidecarMethodError,
358
+ SidecarNotRunningError,
359
+ };
package/lib/vault.js CHANGED
@@ -37,6 +37,13 @@ const DEFAULT_CIPHER_PAGE_SIZE = 4096;
37
37
 
38
38
  // ─── Helpers ─────────────────────────────────────────────────────────────
39
39
 
40
+ function newGroupId() {
41
+ // Lightweight uuid v4-ish for merge_groups.id. Doesn't need crypto
42
+ // strength — uniqueness within one user's vault is enough.
43
+ const r = () => Math.random().toString(16).slice(2, 10);
44
+ return `mg-${r()}${r()}-${Date.now().toString(36)}`;
45
+ }
46
+
40
47
  function loadDriver() {
41
48
  // Lazy require so consumers that only need schemas don't pay for the
42
49
  // native binding load. Errors surface here with a precise message.
@@ -716,6 +723,9 @@ class LocalVault {
716
723
  stats() {
717
724
  const db = this._requireOpen();
718
725
  const count = (tbl) => db.prepare(`SELECT COUNT(*) as n FROM ${tbl}`).get().n;
726
+ const safeCount = (tbl) => {
727
+ try { return count(tbl); } catch (_e) { return 0; }
728
+ };
719
729
  return {
720
730
  schemaVersion: getSchemaVersion(db),
721
731
  events: count("events"),
@@ -726,9 +736,265 @@ class LocalVault {
726
736
  rawEvents: count("raw_events"),
727
737
  auditLog: count("audit_log"),
728
738
  watermarks: count("sync_watermarks"),
739
+ // Phase 8 — EntityResolver tables (safeCount because v1 vaults
740
+ // don't have these yet until migrate).
741
+ mergeGroups: safeCount("merge_groups"),
742
+ mergeMembers: safeCount("merge_members"),
743
+ resolveQueue: safeCount("resolve_queue"),
744
+ reviewQueue: safeCount("review_queue"),
745
+ resolveDecisions: safeCount("resolve_decisions"),
729
746
  };
730
747
  }
731
748
 
749
+ // ─── Phase 8 EntityResolver helpers ───────────────────────────────────
750
+
751
+ /**
752
+ * Insert a new pending row into resolve_queue. Idempotent — already-
753
+ * pending rows for the same person are not duplicated. Returns the
754
+ * row id (existing or newly inserted).
755
+ */
756
+ enqueueResolve(personId) {
757
+ if (typeof personId !== "string" || personId.length === 0) {
758
+ throw new Error("enqueueResolve: personId required");
759
+ }
760
+ const db = this._requireOpen();
761
+ const existing = db.prepare(
762
+ "SELECT id FROM resolve_queue WHERE person_id = ? AND status IN ('pending','in-progress')"
763
+ ).get(personId);
764
+ if (existing) return existing.id;
765
+ const info = db.prepare(
766
+ "INSERT INTO resolve_queue (person_id, enqueued_at, status) VALUES (?, ?, 'pending')"
767
+ ).run(personId, Date.now());
768
+ return info.lastInsertRowid;
769
+ }
770
+
771
+ /**
772
+ * Pull up to `limit` pending rows + atomically mark them in-progress.
773
+ * Returns [{id, person_id, attempts}, ...].
774
+ */
775
+ claimResolveBatch(limit = 50) {
776
+ const db = this._requireOpen();
777
+ const tx = db.transaction(() => {
778
+ const rows = db.prepare(
779
+ "SELECT id, person_id, attempts FROM resolve_queue WHERE status = 'pending' ORDER BY enqueued_at LIMIT ?"
780
+ ).all(limit);
781
+ if (rows.length === 0) return [];
782
+ const stmt = db.prepare(
783
+ "UPDATE resolve_queue SET status = 'in-progress', attempts = attempts + 1 WHERE id = ?"
784
+ );
785
+ for (const r of rows) stmt.run(r.id);
786
+ return rows;
787
+ });
788
+ return tx();
789
+ }
790
+
791
+ /**
792
+ * Mark a resolve_queue row as done (success path).
793
+ */
794
+ completeResolve(queueId) {
795
+ const db = this._requireOpen();
796
+ db.prepare("UPDATE resolve_queue SET status = 'done' WHERE id = ?").run(queueId);
797
+ }
798
+
799
+ /**
800
+ * Mark a resolve_queue row as errored (retry-eligible if attempts < 3).
801
+ */
802
+ errorResolve(queueId, errMsg) {
803
+ const db = this._requireOpen();
804
+ // If attempts < 3, leave status 'pending' for retry; else 'error'
805
+ db.prepare(
806
+ `UPDATE resolve_queue
807
+ SET status = CASE WHEN attempts >= 3 THEN 'error' ELSE 'pending' END,
808
+ last_error = ?
809
+ WHERE id = ?`
810
+ ).run(errMsg || "unknown", queueId);
811
+ }
812
+
813
+ /**
814
+ * Record a resolve_decisions row. Lex-orders the two ids so each pair
815
+ * is stored only once. Returns inserted-or-updated row.
816
+ */
817
+ recordResolveDecision({ aId, bId, verdict, confidence, decidedBy, reason }) {
818
+ const db = this._requireOpen();
819
+ const [lo, hi] = aId < bId ? [aId, bId] : [bId, aId];
820
+ db.prepare(
821
+ `INSERT INTO resolve_decisions
822
+ (a_person_id, b_person_id, verdict, confidence, decided_at, decided_by, reason)
823
+ VALUES (?, ?, ?, ?, ?, ?, ?)
824
+ ON CONFLICT(a_person_id, b_person_id) DO UPDATE SET
825
+ verdict = excluded.verdict,
826
+ confidence = excluded.confidence,
827
+ decided_at = excluded.decided_at,
828
+ decided_by = excluded.decided_by,
829
+ reason = excluded.reason`
830
+ ).run(lo, hi, verdict, confidence, Date.now(), decidedBy || "rule", reason || null);
831
+ }
832
+
833
+ getResolveDecision(aId, bId) {
834
+ const db = this._requireOpen();
835
+ const [lo, hi] = aId < bId ? [aId, bId] : [bId, aId];
836
+ return db.prepare(
837
+ "SELECT * FROM resolve_decisions WHERE a_person_id = ? AND b_person_id = ?"
838
+ ).get(lo, hi);
839
+ }
840
+
841
+ /**
842
+ * Merge a pair into a merge_group. If either side already belongs to a
843
+ * group, the other side joins it (and the two groups merge if both
844
+ * already existed). Returns the resulting group_id.
845
+ */
846
+ mergePair({ aId, bId, joinedBy = "rule" }) {
847
+ const db = this._requireOpen();
848
+ const tx = db.transaction(() => {
849
+ const aGroup = db.prepare("SELECT group_id FROM merge_members WHERE person_id = ?").get(aId);
850
+ const bGroup = db.prepare("SELECT group_id FROM merge_members WHERE person_id = ?").get(bId);
851
+ const now = Date.now();
852
+
853
+ if (aGroup && bGroup && aGroup.group_id === bGroup.group_id) {
854
+ return aGroup.group_id; // already same group
855
+ }
856
+ if (aGroup && bGroup) {
857
+ // Merge two existing groups → keep aGroup, move bGroup members in
858
+ db.prepare(
859
+ "UPDATE merge_members SET group_id = ? WHERE group_id = ?"
860
+ ).run(aGroup.group_id, bGroup.group_id);
861
+ db.prepare("DELETE FROM merge_groups WHERE id = ?").run(bGroup.group_id);
862
+ db.prepare(
863
+ "UPDATE merge_groups SET member_count = (SELECT COUNT(*) FROM merge_members WHERE group_id = ?), last_updated = ? WHERE id = ?"
864
+ ).run(aGroup.group_id, now, aGroup.group_id);
865
+ return aGroup.group_id;
866
+ }
867
+ if (aGroup) {
868
+ // Add b to a's group
869
+ db.prepare(
870
+ "INSERT INTO merge_members (group_id, person_id, joined_at, joined_by) VALUES (?, ?, ?, ?)"
871
+ ).run(aGroup.group_id, bId, now, joinedBy);
872
+ db.prepare(
873
+ "UPDATE merge_groups SET member_count = member_count + 1, last_updated = ? WHERE id = ?"
874
+ ).run(now, aGroup.group_id);
875
+ return aGroup.group_id;
876
+ }
877
+ if (bGroup) {
878
+ db.prepare(
879
+ "INSERT INTO merge_members (group_id, person_id, joined_at, joined_by) VALUES (?, ?, ?, ?)"
880
+ ).run(bGroup.group_id, aId, now, joinedBy);
881
+ db.prepare(
882
+ "UPDATE merge_groups SET member_count = member_count + 1, last_updated = ? WHERE id = ?"
883
+ ).run(now, bGroup.group_id);
884
+ return bGroup.group_id;
885
+ }
886
+ // Neither in any group — create new
887
+ const groupId = newGroupId();
888
+ db.prepare(
889
+ "INSERT INTO merge_groups (id, primary_id, member_count, created_at, last_updated) VALUES (?, ?, 2, ?, ?)"
890
+ ).run(groupId, aId, now, now);
891
+ const ins = db.prepare(
892
+ "INSERT INTO merge_members (group_id, person_id, joined_at, joined_by) VALUES (?, ?, ?, ?)"
893
+ );
894
+ ins.run(groupId, aId, now, joinedBy);
895
+ ins.run(groupId, bId, now, joinedBy);
896
+ return groupId;
897
+ });
898
+ return tx();
899
+ }
900
+
901
+ /**
902
+ * Remove a person from its merge group (unmerge). If only one member
903
+ * remains, the group is deleted entirely.
904
+ */
905
+ unmergePerson(personId) {
906
+ const db = this._requireOpen();
907
+ const tx = db.transaction(() => {
908
+ const row = db.prepare(
909
+ "SELECT group_id FROM merge_members WHERE person_id = ?"
910
+ ).get(personId);
911
+ if (!row) return { ok: false, reason: "not in any group" };
912
+ const groupId = row.group_id;
913
+ db.prepare("DELETE FROM merge_members WHERE person_id = ?").run(personId);
914
+ const remaining = db.prepare(
915
+ "SELECT COUNT(*) as n FROM merge_members WHERE group_id = ?"
916
+ ).get(groupId).n;
917
+ if (remaining < 2) {
918
+ // Group of 1 or 0 — delete the group + remaining member row
919
+ db.prepare("DELETE FROM merge_members WHERE group_id = ?").run(groupId);
920
+ db.prepare("DELETE FROM merge_groups WHERE id = ?").run(groupId);
921
+ } else {
922
+ db.prepare(
923
+ "UPDATE merge_groups SET member_count = ?, last_updated = ? WHERE id = ?"
924
+ ).run(remaining, Date.now(), groupId);
925
+ }
926
+ return { ok: true, groupId, remaining };
927
+ });
928
+ return tx();
929
+ }
930
+
931
+ /**
932
+ * Get all person ids in the same merge group as the given person.
933
+ * Returns [personId, ...] including the input (whether or not it's in
934
+ * a group — a "group of 1" is just `[personId]`).
935
+ */
936
+ getMergeGroupMembers(personId) {
937
+ const db = this._requireOpen();
938
+ const groupRow = db.prepare(
939
+ "SELECT group_id FROM merge_members WHERE person_id = ?"
940
+ ).get(personId);
941
+ if (!groupRow) return [personId];
942
+ return db.prepare(
943
+ "SELECT person_id FROM merge_members WHERE group_id = ? ORDER BY joined_at"
944
+ ).all(groupRow.group_id).map((r) => r.person_id);
945
+ }
946
+
947
+ /**
948
+ * Insert a row into review_queue when the LLM stage returns "maybe".
949
+ * UI lists these for user one-click decisions.
950
+ */
951
+ enqueueReview({ aId, bId, embedSim, llmVerdict, llmReason, llmConfidence }) {
952
+ const db = this._requireOpen();
953
+ const [lo, hi] = aId < bId ? [aId, bId] : [bId, aId];
954
+ const info = db.prepare(
955
+ `INSERT INTO review_queue
956
+ (a_person_id, b_person_id, embed_sim, llm_verdict, llm_reason, llm_confidence, enqueued_at)
957
+ VALUES (?, ?, ?, ?, ?, ?, ?)`
958
+ ).run(lo, hi, embedSim || null, llmVerdict || null, llmReason || null, llmConfidence || null, Date.now());
959
+ return info.lastInsertRowid;
960
+ }
961
+
962
+ /**
963
+ * List pending review rows (oldest first).
964
+ */
965
+ listReviewQueue({ limit = 50 } = {}) {
966
+ const db = this._requireOpen();
967
+ return db.prepare(
968
+ "SELECT * FROM review_queue WHERE reviewed_at IS NULL ORDER BY enqueued_at ASC LIMIT ?"
969
+ ).all(Math.min(limit, 1000));
970
+ }
971
+
972
+ /**
973
+ * Mark a review row as decided by the user.
974
+ */
975
+ recordReviewDecision({ reviewId, decision }) {
976
+ if (!["same", "different", "skip"].includes(decision)) {
977
+ throw new Error(`invalid review decision: ${decision}`);
978
+ }
979
+ const db = this._requireOpen();
980
+ const row = db.prepare("SELECT * FROM review_queue WHERE id = ?").get(reviewId);
981
+ if (!row) throw new Error(`review row ${reviewId} not found`);
982
+ db.prepare(
983
+ "UPDATE review_queue SET reviewed_at = ?, user_decision = ? WHERE id = ?"
984
+ ).run(Date.now(), decision, reviewId);
985
+ return row;
986
+ }
987
+
988
+ resolveQueueStats() {
989
+ const db = this._requireOpen();
990
+ const rows = db.prepare(
991
+ "SELECT status, COUNT(*) as n FROM resolve_queue GROUP BY status"
992
+ ).all();
993
+ const out = { pending: 0, "in-progress": 0, done: 0, error: 0 };
994
+ for (const r of rows) out[r.status] = r.n;
995
+ return out;
996
+ }
997
+
732
998
  // ─── Key rotation ──────────────────────────────────────────────────────
733
999
 
734
1000
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",
@@ -26,7 +26,31 @@
26
26
  "./bridges/cc-llm-adapter": "./lib/bridges/cc-llm-adapter.js",
27
27
  "./bridges/cc-kg-sink": "./lib/bridges/cc-kg-sink.js",
28
28
  "./bridges/cc-rag-sink": "./lib/bridges/cc-rag-sink.js",
29
- "./adapters/email-imap": "./lib/adapters/email-imap/index.js"
29
+ "./adapters/email-imap": "./lib/adapters/email-imap/index.js",
30
+ "./adapters/alipay-bill": "./lib/adapters/alipay-bill/index.js",
31
+ "./adapters/system-data": "./lib/adapters/system-data/index.js",
32
+ "./entity-resolver": "./lib/entity-resolver/index.js",
33
+ "./analysis-skills": "./lib/analysis-skills/index.js",
34
+ "./mobile-extractor": "./lib/mobile-extractor/index.js",
35
+ "./adapters/wechat": "./lib/adapters/wechat/index.js",
36
+ "./adapters/ai-chat-history": "./lib/adapters/ai-chat-history/index.js",
37
+ "./adapters/travel-base": "./lib/adapters/travel-base/index.js",
38
+ "./adapters/travel-12306": "./lib/adapters/travel-12306/index.js",
39
+ "./adapters/travel-ctrip": "./lib/adapters/travel-ctrip/index.js",
40
+ "./adapters/travel-amap": "./lib/adapters/travel-amap/index.js",
41
+ "./adapters/travel-baidu-map": "./lib/adapters/travel-baidu-map/index.js",
42
+ "./adapters/shopping-base": "./lib/adapters/shopping-base/index.js",
43
+ "./adapters/shopping-taobao": "./lib/adapters/shopping-taobao/index.js",
44
+ "./adapters/shopping-jd": "./lib/adapters/shopping-jd/index.js",
45
+ "./adapters/shopping-meituan": "./lib/adapters/shopping-meituan/index.js",
46
+ "./adapters/social-bilibili": "./lib/adapters/social-bilibili/index.js",
47
+ "./adapters/social-weibo": "./lib/adapters/social-weibo/index.js",
48
+ "./adapters/social-douyin": "./lib/adapters/social-douyin/index.js",
49
+ "./adapters/social-xiaohongshu": "./lib/adapters/social-xiaohongshu/index.js",
50
+ "./adapters/messaging-qq": "./lib/adapters/messaging-qq/index.js",
51
+ "./adapters/messaging-telegram": "./lib/adapters/messaging-telegram/index.js",
52
+ "./adapters/messaging-whatsapp": "./lib/adapters/messaging-whatsapp/index.js",
53
+ "./sidecar": "./lib/sidecar/index.js"
30
54
  },
31
55
  "scripts": {
32
56
  "test": "vitest run",
@@ -55,7 +79,9 @@
55
79
  "mailparser": "^3.7.1"
56
80
  },
57
81
  "optionalDependencies": {
58
- "imapflow": "^1.0.183"
82
+ "imapflow": "^1.0.183",
83
+ "adm-zip": "^0.5.16",
84
+ "iconv-lite": "^0.6.3"
59
85
  },
60
86
  "devDependencies": {
61
87
  "vitest": "^4.1.5"