@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,312 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Full system-data smoke / real-device runner — Phase 4.5.7.
4
+ *
5
+ * Drives the full SystemDataAdapter end-to-end across all 4 sources
6
+ * (contacts / calllog / sms / wifi), exercising:
7
+ *
8
+ * - PythonSidecarAdapter base class
9
+ * - SidecarSupervisor lifecycle
10
+ * - SystemDataAdapter.authenticate
11
+ * - 4 sidecar parse_* methods
12
+ * - per-entity hub-side UnifiedSchema validation
13
+ * - dataDisclosure metadata sanitization
14
+ *
15
+ * Replaces the contacts-only `smoke-system-data-contacts.js`. The older
16
+ * script remains for users who only want to exercise the Phase 4.5.2 slice.
17
+ *
18
+ * Usage:
19
+ *
20
+ * # Offline mode — pre-extracted host files (no ADB)
21
+ * node scripts/smoke-system-data.js \
22
+ * --contacts-db ./fixtures/contacts2.db \
23
+ * --calllog-db ./fixtures/contacts2.db \
24
+ * --wifi-dir ./fixtures/wifi/
25
+ *
26
+ * # Live device, /sdcard workaround (non-root)
27
+ * node scripts/smoke-system-data.js \
28
+ * --serial 24115RA8ECabc123 --extract-mode sdcard
29
+ *
30
+ * # Live device with adb root
31
+ * node scripts/smoke-system-data.js --serial 24115RA8ECabc123
32
+ *
33
+ * # Include SMS (default off — explicit opt-in for legality)
34
+ * node scripts/smoke-system-data.js --serial XYZ --extract-mode sdcard --include sms
35
+ *
36
+ * # Disable contacts but include sms
37
+ * node scripts/smoke-system-data.js --db ... --include sms --exclude contacts
38
+ *
39
+ * Exit codes:
40
+ * 0 success
41
+ * 1 sidecar / hub error
42
+ * 2 invalid entities (schema validation failed)
43
+ */
44
+
45
+ "use strict";
46
+
47
+ const path = require("node:path");
48
+ const fs = require("node:fs");
49
+ const os = require("node:os");
50
+
51
+ const { SidecarSupervisor } = require("../lib/sidecar");
52
+ const {
53
+ SystemDataAdapter,
54
+ sanitizeInclude,
55
+ DEFAULT_INCLUDE,
56
+ } = require("../lib/adapters/system-data");
57
+ const { validate } = require("../lib/schemas");
58
+
59
+ const SIDECAR_ROOT = path.resolve(__dirname, "..", "..", "personal-data-hub-bridge");
60
+ const PYTHON = process.env.FORENSICS_BRIDGE_PYTHON || "python";
61
+
62
+ // ---------------------------------------------------------------------------
63
+
64
+ function parseArgs(argv) {
65
+ const out = {
66
+ serial: null,
67
+ extractMode: "normal",
68
+ dataPaths: {},
69
+ include: [],
70
+ exclude: [],
71
+ list: false,
72
+ outDir: null,
73
+ help: false,
74
+ };
75
+ for (let i = 0; i < argv.length; i += 1) {
76
+ const a = argv[i];
77
+ switch (a) {
78
+ case "--serial": out.serial = argv[++i]; break;
79
+ case "--extract-mode": out.extractMode = argv[++i]; break;
80
+ case "--contacts-db": out.dataPaths.contacts = path.resolve(argv[++i]); break;
81
+ case "--calllog-db": out.dataPaths.calllog = path.resolve(argv[++i]); break;
82
+ case "--sms-db": out.dataPaths.sms = path.resolve(argv[++i]); break;
83
+ case "--wifi-dir": out.dataPaths.wifi = path.resolve(argv[++i]); break;
84
+ case "--include": out.include.push(argv[++i]); break;
85
+ case "--exclude": out.exclude.push(argv[++i]); break;
86
+ case "--list": out.list = true; break;
87
+ case "--out": out.outDir = argv[++i]; break;
88
+ case "-h": case "--help": out.help = true; break;
89
+ default:
90
+ if (a.startsWith("--")) throw new Error(`unknown flag: ${a}`);
91
+ }
92
+ }
93
+ return out;
94
+ }
95
+
96
+ function printHelp() {
97
+ process.stdout.write(`
98
+ smoke-system-data — drive SystemDataAdapter end-to-end for all 4 sources.
99
+
100
+ Modes (mutually exclusive):
101
+ Pre-extracted (offline):
102
+ --contacts-db <path> contacts2.db on disk
103
+ --calllog-db <path> calls db on disk (may be same as --contacts-db)
104
+ --sms-db <path> mmssms.db on disk
105
+ --wifi-dir <path> directory with WifiConfigStore.xml / wpa_supplicant.conf
106
+
107
+ Live device (ADB):
108
+ --serial <serial> target device
109
+ --extract-mode normal pull from /data/data (requires adb root)
110
+ --extract-mode sdcard pull from /sdcard/Download/ (Termux+tsu workaround)
111
+
112
+ Source gating:
113
+ --include <key> force-enable a source (key: contacts/calllog/sms/wifi)
114
+ --exclude <key> force-disable a source
115
+ (defaults per adapter.dataDisclosure: contacts=on / calllog=on / sms=OFF / wifi=on)
116
+
117
+ Misc:
118
+ --list list ADB devices and exit
119
+ --out <dir> output directory (default: ./out/<timestamp>)
120
+ -h, --help show this help
121
+
122
+ Env:
123
+ FORENSICS_BRIDGE_PYTHON python interpreter (default: python)
124
+ `);
125
+ }
126
+
127
+ function timestampSlug() {
128
+ const d = new Date();
129
+ const z = (n) => String(n).padStart(2, "0");
130
+ return `${d.getFullYear()}${z(d.getMonth() + 1)}${z(d.getDate())}-${z(d.getHours())}${z(d.getMinutes())}${z(d.getSeconds())}`;
131
+ }
132
+
133
+ function log(level, msg, extra = {}) {
134
+ const line = JSON.stringify({ ts: new Date().toISOString(), level, msg, ...extra });
135
+ if (level === "error") process.stderr.write(line + "\n");
136
+ else process.stdout.write(line + "\n");
137
+ }
138
+
139
+ function resolveInclude(args) {
140
+ const include = { ...DEFAULT_INCLUDE };
141
+ for (const k of args.include) {
142
+ if (!Object.prototype.hasOwnProperty.call(include, k)) {
143
+ throw new Error(`unknown source for --include: ${k}`);
144
+ }
145
+ include[k] = true;
146
+ }
147
+ for (const k of args.exclude) {
148
+ if (!Object.prototype.hasOwnProperty.call(include, k)) {
149
+ throw new Error(`unknown source for --exclude: ${k}`);
150
+ }
151
+ include[k] = false;
152
+ }
153
+ return sanitizeInclude(include);
154
+ }
155
+
156
+ // ---------------------------------------------------------------------------
157
+
158
+ async function main(rawArgs) {
159
+ let args;
160
+ try {
161
+ args = parseArgs(rawArgs);
162
+ } catch (err) {
163
+ console.error(err.message);
164
+ printHelp();
165
+ process.exit(2);
166
+ }
167
+ if (args.help) { printHelp(); return; }
168
+
169
+ const outDir = path.resolve(args.outDir || path.join(process.cwd(), "out", timestampSlug()));
170
+ fs.mkdirSync(outDir, { recursive: true });
171
+ log("info", "output directory ready", { outDir });
172
+
173
+ const supervisor = new SidecarSupervisor({
174
+ command: PYTHON,
175
+ args: ["-u", "-m", "forensics_bridge.ipc_server"],
176
+ cwd: SIDECAR_ROOT,
177
+ healthCheckIntervalMs: 0,
178
+ env: { PYTHONPATH: SIDECAR_ROOT },
179
+ });
180
+ supervisor.on("log", (line) => process.stderr.write(`[sidecar] ${line}\n`));
181
+
182
+ await supervisor.start({ readyTimeoutMs: 10_000 });
183
+ log("info", "sidecar ready");
184
+
185
+ const adapter = new SystemDataAdapter({ supervisor });
186
+
187
+ try {
188
+ if (args.list) {
189
+ const devices = await supervisor.invoke("android.list_devices");
190
+ console.log(JSON.stringify(devices, null, 2));
191
+ return;
192
+ }
193
+
194
+ const include = resolveInclude(args);
195
+ log("info", "include resolved", include);
196
+
197
+ // 1. authenticate
198
+ const auth = await adapter.authenticate({
199
+ dataPaths: Object.keys(args.dataPaths).length ? args.dataPaths : undefined,
200
+ serial: args.serial || undefined,
201
+ });
202
+ log("info", "authenticated", auth);
203
+ if (!auth.ok) {
204
+ log("error", "authentication failed", auth);
205
+ process.exit(1);
206
+ }
207
+
208
+ // 2. drain sync stream + validate every entity
209
+ const entitiesByType = { person: [], event: [], place: [], item: [], topic: [] };
210
+ const invalid = [];
211
+ let total = 0;
212
+ const t0 = Date.now();
213
+
214
+ const scratchDir = path.join(outDir, "scratch");
215
+ fs.mkdirSync(scratchDir, { recursive: true });
216
+
217
+ const progressEvents = [];
218
+ for await (const raw of adapter.sync({
219
+ include,
220
+ serial: args.serial || undefined,
221
+ extractMode: args.extractMode,
222
+ dataPaths: Object.keys(args.dataPaths).length ? args.dataPaths : undefined,
223
+ scratchDir,
224
+ onProgress: (msg) => {
225
+ progressEvents.push(msg);
226
+ if (msg.phase === "progress") return; // too chatty
227
+ log("info", `adapter:${msg.source}`, msg);
228
+ },
229
+ })) {
230
+ total += 1;
231
+ const bucket = entitiesByType[raw.entityType];
232
+ if (bucket) bucket.push(raw.payload);
233
+
234
+ // Cross-source schema validation
235
+ const v = validate(raw.payload);
236
+ if (!v.valid) {
237
+ invalid.push({
238
+ id: raw.payload && raw.payload.id,
239
+ entityType: raw.entityType,
240
+ errors: v.errors,
241
+ });
242
+ }
243
+ }
244
+ const wallMs = Date.now() - t0;
245
+
246
+ log("info", "sync drained", {
247
+ wallMs,
248
+ total,
249
+ persons: entitiesByType.person.length,
250
+ events: entitiesByType.event.length,
251
+ places: entitiesByType.place.length,
252
+ invalidCount: invalid.length,
253
+ });
254
+
255
+ if (invalid.length) {
256
+ log("error", "validation failed", { count: invalid.length });
257
+ fs.writeFileSync(
258
+ path.join(outDir, "validation-errors.json"),
259
+ JSON.stringify(invalid, null, 2),
260
+ );
261
+ process.exitCode = 2;
262
+ } else {
263
+ log("info", "all entities passed UnifiedSchema validation");
264
+ }
265
+
266
+ // 3. write per-source NormalizedBatch JSON dumps for inspection
267
+ const dump = {
268
+ schemaVersion: "0.1.0",
269
+ generatedAt: new Date().toISOString(),
270
+ adapter: "system-data",
271
+ adapterVersion: adapter.version,
272
+ include,
273
+ input: {
274
+ serial: args.serial || null,
275
+ extractMode: args.extractMode,
276
+ dataPaths: args.dataPaths,
277
+ },
278
+ wallMs,
279
+ totals: {
280
+ persons: entitiesByType.person.length,
281
+ events: entitiesByType.event.length,
282
+ places: entitiesByType.place.length,
283
+ invalid: invalid.length,
284
+ },
285
+ progressEvents,
286
+ persons: entitiesByType.person,
287
+ events: entitiesByType.event,
288
+ places: entitiesByType.place,
289
+ };
290
+ const dumpPath = path.join(outDir, "system-data-batch.json");
291
+ fs.writeFileSync(dumpPath, JSON.stringify(dump, null, 2));
292
+ log("info", "wrote dump", { dumpPath, bytes: fs.statSync(dumpPath).size });
293
+
294
+ log("info", "summary", {
295
+ total,
296
+ persons: entitiesByType.person.length,
297
+ events: entitiesByType.event.length,
298
+ places: entitiesByType.place.length,
299
+ invalid: invalid.length,
300
+ wallMs,
301
+ outDir,
302
+ });
303
+ } finally {
304
+ await supervisor.stop({ graceMs: 2000 });
305
+ }
306
+ }
307
+
308
+ main(process.argv.slice(2)).catch((err) => {
309
+ log("error", "fatal", { name: err.name, message: err.message, code: err.code });
310
+ if (err.stack) process.stderr.write(err.stack + "\n");
311
+ process.exit(1);
312
+ });