@chainlesschain/personal-data-hub 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
  2. package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
  3. package/__tests__/adapters/ai-chat-history.test.js +396 -0
  4. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  5. package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
  6. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  7. package/__tests__/adapters/email-adapter.test.js +138 -1
  8. package/__tests__/adapters/email-classifier.test.js +347 -0
  9. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  10. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  11. package/__tests__/adapters/email-templates.test.js +699 -0
  12. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
  13. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  14. package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
  15. package/__tests__/adapters/system-data-android.test.js +387 -0
  16. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  17. package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
  18. package/__tests__/adapters/wechat-env-probe.test.js +162 -0
  19. package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
  20. package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
  21. package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
  22. package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
  23. package/__tests__/analysis-skills.test.js +556 -0
  24. package/__tests__/analysis.test.js +329 -1
  25. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
  26. package/__tests__/e2e/full-user-journey.test.js +188 -0
  27. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  28. package/__tests__/entity-resolver-stages.test.js +411 -0
  29. package/__tests__/entity-resolver-vault.test.js +246 -0
  30. package/__tests__/entity-resolver.test.js +526 -0
  31. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  32. package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
  33. package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
  34. package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
  35. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
  36. package/__tests__/longtail-adapters.test.js +217 -0
  37. package/__tests__/mobile-extractor.test.js +288 -0
  38. package/__tests__/registry.test.js +4 -2
  39. package/__tests__/shopping-adapters.test.js +296 -0
  40. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  41. package/__tests__/sidecar-supervisor.test.js +120 -0
  42. package/__tests__/social-adapters.test.js +206 -0
  43. package/__tests__/travel-adapters.test.js +325 -0
  44. package/__tests__/vault.test.js +3 -3
  45. package/__tests__/wechat-adapter.test.js +476 -0
  46. package/__tests__/whatsapp-adapter.test.js +135 -0
  47. package/lib/adapter-spec.js +12 -0
  48. package/lib/adapters/_python-sidecar-base.js +207 -0
  49. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
  50. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  51. package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
  52. package/lib/adapters/ai-chat-history/health-checker.js +210 -0
  53. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  54. package/lib/adapters/ai-chat-history/index.js +28 -0
  55. package/lib/adapters/ai-chat-history/schema-map.js +258 -0
  56. package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
  57. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  58. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  59. package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
  60. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  61. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  62. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  63. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  64. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  65. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  66. package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
  67. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
  68. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  69. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  70. package/lib/adapters/alipay-bill/index.js +41 -0
  71. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  72. package/lib/adapters/email-imap/classifier.js +495 -0
  73. package/lib/adapters/email-imap/email-adapter.js +419 -8
  74. package/lib/adapters/email-imap/index.js +42 -0
  75. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  76. package/lib/adapters/email-imap/templates/bill.js +232 -0
  77. package/lib/adapters/email-imap/templates/government.js +120 -0
  78. package/lib/adapters/email-imap/templates/index.js +78 -0
  79. package/lib/adapters/email-imap/templates/order.js +186 -0
  80. package/lib/adapters/email-imap/templates/other.js +114 -0
  81. package/lib/adapters/email-imap/templates/register.js +113 -0
  82. package/lib/adapters/email-imap/templates/travel.js +157 -0
  83. package/lib/adapters/email-imap/templates/utils.js +275 -0
  84. package/lib/adapters/email-imap/transactions.js +234 -0
  85. package/lib/adapters/messaging-qq/index.js +158 -0
  86. package/lib/adapters/messaging-telegram/index.js +142 -0
  87. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  88. package/lib/adapters/shopping-base/index.js +208 -0
  89. package/lib/adapters/shopping-jd/index.js +150 -0
  90. package/lib/adapters/shopping-meituan/index.js +154 -0
  91. package/lib/adapters/shopping-taobao/index.js +176 -0
  92. package/lib/adapters/social-bilibili/index.js +171 -0
  93. package/lib/adapters/social-douyin/index.js +116 -0
  94. package/lib/adapters/social-kuaishou/index.js +237 -0
  95. package/lib/adapters/social-toutiao/index.js +236 -0
  96. package/lib/adapters/social-weibo/index.js +164 -0
  97. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  98. package/lib/adapters/system-data/disclosure.js +166 -0
  99. package/lib/adapters/system-data/index.js +34 -0
  100. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  101. package/lib/adapters/system-data-android/adapter.js +348 -0
  102. package/lib/adapters/system-data-android/index.js +76 -0
  103. package/lib/adapters/travel-12306/index.js +151 -0
  104. package/lib/adapters/travel-amap/index.js +164 -0
  105. package/lib/adapters/travel-baidu-map/index.js +162 -0
  106. package/lib/adapters/travel-base/index.js +240 -0
  107. package/lib/adapters/travel-ctrip/index.js +151 -0
  108. package/lib/adapters/wechat/bootstrap.js +146 -0
  109. package/lib/adapters/wechat/content-parser.js +326 -0
  110. package/lib/adapters/wechat/db-reader.js +209 -0
  111. package/lib/adapters/wechat/env-probe.js +218 -0
  112. package/lib/adapters/wechat/frida-agent/loader.js +67 -0
  113. package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
  114. package/lib/adapters/wechat/index.js +37 -0
  115. package/lib/adapters/wechat/key-extractor.js +158 -0
  116. package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
  117. package/lib/adapters/wechat/key-providers/index.js +22 -0
  118. package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
  119. package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
  120. package/lib/adapters/wechat/normalize.js +220 -0
  121. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  122. package/lib/analysis-skills/base.js +113 -0
  123. package/lib/analysis-skills/footprint.js +167 -0
  124. package/lib/analysis-skills/index.js +58 -0
  125. package/lib/analysis-skills/interests.js +161 -0
  126. package/lib/analysis-skills/relations.js +226 -0
  127. package/lib/analysis-skills/spending.js +219 -0
  128. package/lib/analysis-skills/timeline.js +167 -0
  129. package/lib/analysis.js +191 -2
  130. package/lib/entity-resolver/embedding-stage.js +198 -0
  131. package/lib/entity-resolver/entity-resolver.js +384 -0
  132. package/lib/entity-resolver/index.js +42 -0
  133. package/lib/entity-resolver/llm-stage.js +191 -0
  134. package/lib/entity-resolver/rule-stage.js +208 -0
  135. package/lib/entity-resolver/worker.js +149 -0
  136. package/lib/index.js +131 -0
  137. package/lib/migrations.js +73 -0
  138. package/lib/mobile-extractor/android.js +193 -0
  139. package/lib/mobile-extractor/index.js +9 -0
  140. package/lib/mobile-extractor/ios.js +223 -0
  141. package/lib/prompt-builder.js +11 -1
  142. package/lib/query-parser.js +7 -1
  143. package/lib/registry.js +42 -0
  144. package/lib/sidecar/index.js +15 -0
  145. package/lib/sidecar/supervisor.js +359 -0
  146. package/lib/vault.js +343 -0
  147. package/package.json +36 -3
  148. package/scripts/_make-fixture-all.js +126 -0
  149. package/scripts/_make-fixture-contacts.js +84 -0
  150. package/scripts/evaluate-entity-resolver.js +213 -0
  151. package/scripts/smoke-phase-5-5.js +196 -0
  152. package/scripts/smoke-phase-5-7.js +181 -0
  153. package/scripts/smoke-system-data-contacts.js +309 -0
  154. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,309 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Smoke / real-device runner for Phase 4.5.2 — Contacts extraction.
4
+ *
5
+ * Drives the full vertical:
6
+ *
7
+ * ┌──────────────────────────────────────────────────────────────┐
8
+ * │ 1. (optional) android.list_devices │
9
+ * │ 2. (optional) android.pull_file /data/.../contacts2.db │
10
+ * │ 3. system.parse_contacts → Persons │
11
+ * │ 4. hub-side UnifiedSchema validatePerson() on every row │
12
+ * │ 5. write NormalizedBatch JSON to ./out/<timestamp>/ │
13
+ * └──────────────────────────────────────────────────────────────┘
14
+ *
15
+ * Three modes:
16
+ *
17
+ * --db <path> Skip ADB entirely; parse a contacts2.db already
18
+ * on disk. Best for first-run sanity on the dev box.
19
+ *
20
+ * --serial <serial> Run `adb pull` first. Requires `adb root` (most
21
+ * retail builds reject this) OR a userdebug build.
22
+ * On a stock Redmi 24115RA8EC use --workaround.
23
+ *
24
+ * --workaround sdcard Look for a contacts2.db copy at
25
+ * /sdcard/Download/contacts2.db (you copied it
26
+ * out via Termux + tsu, or via Mi cloud export,
27
+ * per docs/design/Adapter_System_Data.md §2.1).
28
+ *
29
+ * Usage examples:
30
+ *
31
+ * # Local fixture
32
+ * node scripts/smoke-system-data-contacts.js --db ./fixtures/contacts2.db
33
+ *
34
+ * # List devices, then prompt for serial
35
+ * node scripts/smoke-system-data-contacts.js --list
36
+ *
37
+ * # Real device with /sdcard workaround
38
+ * node scripts/smoke-system-data-contacts.js \
39
+ * --serial 24115RA8ECabc123 --workaround sdcard
40
+ *
41
+ * Exits non-zero on any sidecar error or schema validation failure.
42
+ */
43
+
44
+ "use strict";
45
+
46
+ const path = require("node:path");
47
+ const fs = require("node:fs");
48
+ const os = require("node:os");
49
+
50
+ const { SidecarSupervisor } = require("../lib/sidecar");
51
+ const { validatePerson } = require("../lib/schemas");
52
+
53
+ const SIDECAR_ROOT = path.resolve(__dirname, "..", "..", "personal-data-hub-bridge");
54
+ const PYTHON = process.env.FORENSICS_BRIDGE_PYTHON || "python";
55
+
56
+ const SDCARD_WORKAROUND_PATH = "/sdcard/Download/contacts2.db";
57
+ const SYSTEM_PROVIDER_PATH =
58
+ "/data/data/com.android.providers.contacts/databases/contacts2.db";
59
+
60
+ // ---------------------------------------------------------------------------
61
+ // CLI parsing — kept dependency-free
62
+ // ---------------------------------------------------------------------------
63
+
64
+ function parseArgs(argv) {
65
+ const out = {
66
+ db: null,
67
+ serial: null,
68
+ workaround: null,
69
+ list: false,
70
+ outDir: null,
71
+ help: false,
72
+ };
73
+ for (let i = 0; i < argv.length; i += 1) {
74
+ const a = argv[i];
75
+ switch (a) {
76
+ case "--db":
77
+ out.db = argv[++i];
78
+ break;
79
+ case "--serial":
80
+ out.serial = argv[++i];
81
+ break;
82
+ case "--workaround":
83
+ out.workaround = argv[++i];
84
+ break;
85
+ case "--list":
86
+ out.list = true;
87
+ break;
88
+ case "--out":
89
+ out.outDir = argv[++i];
90
+ break;
91
+ case "-h":
92
+ case "--help":
93
+ out.help = true;
94
+ break;
95
+ default:
96
+ if (a.startsWith("--")) {
97
+ throw new Error(`unknown flag: ${a}`);
98
+ }
99
+ }
100
+ }
101
+ return out;
102
+ }
103
+
104
+ function printHelp() {
105
+ process.stdout.write(`
106
+ smoke-system-data-contacts — drive sidecar end-to-end for contacts.
107
+
108
+ --db <path> Parse a contacts2.db already on disk (skip ADB).
109
+ --serial <serial> Target this ADB device for the pull step.
110
+ --workaround sdcard Pull from ${SDCARD_WORKAROUND_PATH} instead of /data/data.
111
+ Required on stock Android (no adb root).
112
+ --list Just list ADB devices and exit.
113
+ --out <dir> Write NormalizedBatch JSON here. Default: ./out/<ts>.
114
+ -h, --help Show this help.
115
+
116
+ Env:
117
+ FORENSICS_BRIDGE_PYTHON override Python interpreter (default: python).
118
+
119
+ Exit codes:
120
+ 0 success
121
+ 1 sidecar / hub error
122
+ 2 invalid Persons (schema validation failed)
123
+ `);
124
+ }
125
+
126
+ // ---------------------------------------------------------------------------
127
+ // Helpers
128
+ // ---------------------------------------------------------------------------
129
+
130
+ function timestampSlug() {
131
+ const d = new Date();
132
+ const z = (n) => String(n).padStart(2, "0");
133
+ return (
134
+ `${d.getFullYear()}${z(d.getMonth() + 1)}${z(d.getDate())}-` +
135
+ `${z(d.getHours())}${z(d.getMinutes())}${z(d.getSeconds())}`
136
+ );
137
+ }
138
+
139
+ function makeSupervisor() {
140
+ return new SidecarSupervisor({
141
+ command: PYTHON,
142
+ args: ["-u", "-m", "forensics_bridge.ipc_server"],
143
+ cwd: SIDECAR_ROOT,
144
+ healthCheckIntervalMs: 0,
145
+ env: { PYTHONPATH: SIDECAR_ROOT },
146
+ });
147
+ }
148
+
149
+ function log(level, msg, extra = {}) {
150
+ const line = JSON.stringify({
151
+ ts: new Date().toISOString(),
152
+ level,
153
+ msg,
154
+ ...extra,
155
+ });
156
+ if (level === "error") process.stderr.write(line + "\n");
157
+ else process.stdout.write(line + "\n");
158
+ }
159
+
160
+ // ---------------------------------------------------------------------------
161
+ // Main
162
+ // ---------------------------------------------------------------------------
163
+
164
+ async function main(rawArgs) {
165
+ let args;
166
+ try {
167
+ args = parseArgs(rawArgs);
168
+ } catch (err) {
169
+ console.error(err.message);
170
+ printHelp();
171
+ process.exit(2);
172
+ }
173
+ if (args.help) {
174
+ printHelp();
175
+ return;
176
+ }
177
+
178
+ const outDir = path.resolve(
179
+ args.outDir || path.join(process.cwd(), "out", timestampSlug()),
180
+ );
181
+ fs.mkdirSync(outDir, { recursive: true });
182
+ log("info", "output directory ready", { outDir });
183
+
184
+ const sup = makeSupervisor();
185
+ // Stream sidecar pino-style logs out as ndjson so the user sees timing.
186
+ sup.on("log", (line) => process.stderr.write(`[sidecar] ${line}\n`));
187
+
188
+ await sup.start({ readyTimeoutMs: 10_000 });
189
+ log("info", "sidecar ready");
190
+
191
+ try {
192
+ // ---------- list-only path ----------
193
+ if (args.list) {
194
+ const devices = await sup.invoke("android.list_devices");
195
+ log("info", "adb devices", devices);
196
+ console.log(JSON.stringify(devices, null, 2));
197
+ return;
198
+ }
199
+
200
+ // ---------- choose source for contacts2.db ----------
201
+ let dbPath = args.db ? path.resolve(args.db) : null;
202
+
203
+ if (!dbPath) {
204
+ if (!args.serial) {
205
+ throw new Error(
206
+ "neither --db nor --serial provided; nothing to extract",
207
+ );
208
+ }
209
+ const remotePath =
210
+ args.workaround === "sdcard" ? SDCARD_WORKAROUND_PATH : SYSTEM_PROVIDER_PATH;
211
+ log("info", "pulling from device", { serial: args.serial, remotePath });
212
+ const pulled = await sup.invoke(
213
+ "android.pull_file",
214
+ {
215
+ serial: args.serial,
216
+ remote_path: remotePath,
217
+ local_dir: outDir,
218
+ },
219
+ { timeoutMs: 60_000 },
220
+ );
221
+ log("info", "pull completed", pulled);
222
+ dbPath = pulled.local;
223
+ }
224
+
225
+ if (!fs.existsSync(dbPath)) {
226
+ throw new Error(`contacts db not found at ${dbPath}`);
227
+ }
228
+
229
+ // ---------- parse + validate ----------
230
+ const persons = [];
231
+ let chunks = 0;
232
+ const t0 = Date.now();
233
+ const parseResult = await sup.invoke(
234
+ "system.parse_contacts",
235
+ {
236
+ data_path: dbPath,
237
+ device_serial: args.serial || null,
238
+ },
239
+ {
240
+ timeoutMs: 120_000,
241
+ onProgress: (p) => log("info", "progress", p),
242
+ onChunk: (batch) => {
243
+ chunks += 1;
244
+ for (const person of batch.persons || []) persons.push(person);
245
+ },
246
+ },
247
+ );
248
+ const wallMs = Date.now() - t0;
249
+ log("info", "parse completed", {
250
+ ...parseResult,
251
+ chunks,
252
+ wallMs,
253
+ personsCollected: persons.length,
254
+ });
255
+
256
+ // ---------- hub-side schema check ----------
257
+ const invalid = [];
258
+ for (const p of persons) {
259
+ const v = validatePerson(p);
260
+ if (!v.valid) invalid.push({ id: p.id, errors: v.errors });
261
+ }
262
+ if (invalid.length) {
263
+ log("error", "validation failed", { count: invalid.length });
264
+ fs.writeFileSync(
265
+ path.join(outDir, "validation-errors.json"),
266
+ JSON.stringify(invalid, null, 2),
267
+ );
268
+ process.exitCode = 2;
269
+ } else {
270
+ log("info", "all persons passed UnifiedSchema validation");
271
+ }
272
+
273
+ // ---------- persist for inspection ----------
274
+ const dump = {
275
+ schemaVersion: "0.1.0",
276
+ generatedAt: new Date().toISOString(),
277
+ sidecar: { pythonRoot: SIDECAR_ROOT },
278
+ input: {
279
+ dbPath,
280
+ serial: args.serial || null,
281
+ workaround: args.workaround || null,
282
+ },
283
+ parseResult,
284
+ wallMs,
285
+ persons,
286
+ };
287
+ const dumpPath = path.join(outDir, "contacts-normalized-batch.json");
288
+ fs.writeFileSync(dumpPath, JSON.stringify(dump, null, 2));
289
+ log("info", "wrote dump", { dumpPath, bytes: fs.statSync(dumpPath).size });
290
+
291
+ // ---------- compact summary ----------
292
+ log("info", "summary", {
293
+ totalPersons: parseResult.totalPersons,
294
+ withPhone: parseResult.stats?.with_phone,
295
+ withEmail: parseResult.stats?.with_email,
296
+ starred: parseResult.stats?.starred,
297
+ invalidPersons: invalid.length,
298
+ outDir,
299
+ });
300
+ } finally {
301
+ await sup.stop({ graceMs: 2000 });
302
+ }
303
+ }
304
+
305
+ main(process.argv.slice(2)).catch((err) => {
306
+ log("error", "fatal", { name: err.name, message: err.message, code: err.code });
307
+ if (err.stack) process.stderr.write(err.stack + "\n");
308
+ process.exit(1);
309
+ });
@@ -0,0 +1,312 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Full system-data smoke / real-device runner — Phase 4.5.7.
4
+ *
5
+ * Drives the full SystemDataAdapter end-to-end across all 4 sources
6
+ * (contacts / calllog / sms / wifi), exercising:
7
+ *
8
+ * - PythonSidecarAdapter base class
9
+ * - SidecarSupervisor lifecycle
10
+ * - SystemDataAdapter.authenticate
11
+ * - 4 sidecar parse_* methods
12
+ * - per-entity hub-side UnifiedSchema validation
13
+ * - dataDisclosure metadata sanitization
14
+ *
15
+ * Replaces the contacts-only `smoke-system-data-contacts.js`. The older
16
+ * script remains for users who only want to exercise the Phase 4.5.2 slice.
17
+ *
18
+ * Usage:
19
+ *
20
+ * # Offline mode — pre-extracted host files (no ADB)
21
+ * node scripts/smoke-system-data.js \
22
+ * --contacts-db ./fixtures/contacts2.db \
23
+ * --calllog-db ./fixtures/contacts2.db \
24
+ * --wifi-dir ./fixtures/wifi/
25
+ *
26
+ * # Live device, /sdcard workaround (non-root)
27
+ * node scripts/smoke-system-data.js \
28
+ * --serial 24115RA8ECabc123 --extract-mode sdcard
29
+ *
30
+ * # Live device with adb root
31
+ * node scripts/smoke-system-data.js --serial 24115RA8ECabc123
32
+ *
33
+ * # Include SMS (default off — explicit opt-in for legality)
34
+ * node scripts/smoke-system-data.js --serial XYZ --extract-mode sdcard --include sms
35
+ *
36
+ * # Disable contacts but include sms
37
+ * node scripts/smoke-system-data.js --db ... --include sms --exclude contacts
38
+ *
39
+ * Exit codes:
40
+ * 0 success
41
+ * 1 sidecar / hub error
42
+ * 2 invalid entities (schema validation failed)
43
+ */
44
+
45
+ "use strict";
46
+
47
+ const path = require("node:path");
48
+ const fs = require("node:fs");
49
+ const os = require("node:os");
50
+
51
+ const { SidecarSupervisor } = require("../lib/sidecar");
52
+ const {
53
+ SystemDataAdapter,
54
+ sanitizeInclude,
55
+ DEFAULT_INCLUDE,
56
+ } = require("../lib/adapters/system-data");
57
+ const { validate } = require("../lib/schemas");
58
+
59
+ const SIDECAR_ROOT = path.resolve(__dirname, "..", "..", "personal-data-hub-bridge");
60
+ const PYTHON = process.env.FORENSICS_BRIDGE_PYTHON || "python";
61
+
62
+ // ---------------------------------------------------------------------------
63
+
64
+ function parseArgs(argv) {
65
+ const out = {
66
+ serial: null,
67
+ extractMode: "normal",
68
+ dataPaths: {},
69
+ include: [],
70
+ exclude: [],
71
+ list: false,
72
+ outDir: null,
73
+ help: false,
74
+ };
75
+ for (let i = 0; i < argv.length; i += 1) {
76
+ const a = argv[i];
77
+ switch (a) {
78
+ case "--serial": out.serial = argv[++i]; break;
79
+ case "--extract-mode": out.extractMode = argv[++i]; break;
80
+ case "--contacts-db": out.dataPaths.contacts = path.resolve(argv[++i]); break;
81
+ case "--calllog-db": out.dataPaths.calllog = path.resolve(argv[++i]); break;
82
+ case "--sms-db": out.dataPaths.sms = path.resolve(argv[++i]); break;
83
+ case "--wifi-dir": out.dataPaths.wifi = path.resolve(argv[++i]); break;
84
+ case "--include": out.include.push(argv[++i]); break;
85
+ case "--exclude": out.exclude.push(argv[++i]); break;
86
+ case "--list": out.list = true; break;
87
+ case "--out": out.outDir = argv[++i]; break;
88
+ case "-h": case "--help": out.help = true; break;
89
+ default:
90
+ if (a.startsWith("--")) throw new Error(`unknown flag: ${a}`);
91
+ }
92
+ }
93
+ return out;
94
+ }
95
+
96
+ function printHelp() {
97
+ process.stdout.write(`
98
+ smoke-system-data — drive SystemDataAdapter end-to-end for all 4 sources.
99
+
100
+ Modes (mutually exclusive):
101
+ Pre-extracted (offline):
102
+ --contacts-db <path> contacts2.db on disk
103
+ --calllog-db <path> calls db on disk (may be same as --contacts-db)
104
+ --sms-db <path> mmssms.db on disk
105
+ --wifi-dir <path> directory with WifiConfigStore.xml / wpa_supplicant.conf
106
+
107
+ Live device (ADB):
108
+ --serial <serial> target device
109
+ --extract-mode normal pull from /data/data (requires adb root)
110
+ --extract-mode sdcard pull from /sdcard/Download/ (Termux+tsu workaround)
111
+
112
+ Source gating:
113
+ --include <key> force-enable a source (key: contacts/calllog/sms/wifi)
114
+ --exclude <key> force-disable a source
115
+ (defaults per adapter.dataDisclosure: contacts=on / calllog=on / sms=OFF / wifi=on)
116
+
117
+ Misc:
118
+ --list list ADB devices and exit
119
+ --out <dir> output directory (default: ./out/<timestamp>)
120
+ -h, --help show this help
121
+
122
+ Env:
123
+ FORENSICS_BRIDGE_PYTHON python interpreter (default: python)
124
+ `);
125
+ }
126
+
127
+ function timestampSlug() {
128
+ const d = new Date();
129
+ const z = (n) => String(n).padStart(2, "0");
130
+ return `${d.getFullYear()}${z(d.getMonth() + 1)}${z(d.getDate())}-${z(d.getHours())}${z(d.getMinutes())}${z(d.getSeconds())}`;
131
+ }
132
+
133
+ function log(level, msg, extra = {}) {
134
+ const line = JSON.stringify({ ts: new Date().toISOString(), level, msg, ...extra });
135
+ if (level === "error") process.stderr.write(line + "\n");
136
+ else process.stdout.write(line + "\n");
137
+ }
138
+
139
+ function resolveInclude(args) {
140
+ const include = { ...DEFAULT_INCLUDE };
141
+ for (const k of args.include) {
142
+ if (!Object.prototype.hasOwnProperty.call(include, k)) {
143
+ throw new Error(`unknown source for --include: ${k}`);
144
+ }
145
+ include[k] = true;
146
+ }
147
+ for (const k of args.exclude) {
148
+ if (!Object.prototype.hasOwnProperty.call(include, k)) {
149
+ throw new Error(`unknown source for --exclude: ${k}`);
150
+ }
151
+ include[k] = false;
152
+ }
153
+ return sanitizeInclude(include);
154
+ }
155
+
156
+ // ---------------------------------------------------------------------------
157
+
158
+ async function main(rawArgs) {
159
+ let args;
160
+ try {
161
+ args = parseArgs(rawArgs);
162
+ } catch (err) {
163
+ console.error(err.message);
164
+ printHelp();
165
+ process.exit(2);
166
+ }
167
+ if (args.help) { printHelp(); return; }
168
+
169
+ const outDir = path.resolve(args.outDir || path.join(process.cwd(), "out", timestampSlug()));
170
+ fs.mkdirSync(outDir, { recursive: true });
171
+ log("info", "output directory ready", { outDir });
172
+
173
+ const supervisor = new SidecarSupervisor({
174
+ command: PYTHON,
175
+ args: ["-u", "-m", "forensics_bridge.ipc_server"],
176
+ cwd: SIDECAR_ROOT,
177
+ healthCheckIntervalMs: 0,
178
+ env: { PYTHONPATH: SIDECAR_ROOT },
179
+ });
180
+ supervisor.on("log", (line) => process.stderr.write(`[sidecar] ${line}\n`));
181
+
182
+ await supervisor.start({ readyTimeoutMs: 10_000 });
183
+ log("info", "sidecar ready");
184
+
185
+ const adapter = new SystemDataAdapter({ supervisor });
186
+
187
+ try {
188
+ if (args.list) {
189
+ const devices = await supervisor.invoke("android.list_devices");
190
+ console.log(JSON.stringify(devices, null, 2));
191
+ return;
192
+ }
193
+
194
+ const include = resolveInclude(args);
195
+ log("info", "include resolved", include);
196
+
197
+ // 1. authenticate
198
+ const auth = await adapter.authenticate({
199
+ dataPaths: Object.keys(args.dataPaths).length ? args.dataPaths : undefined,
200
+ serial: args.serial || undefined,
201
+ });
202
+ log("info", "authenticated", auth);
203
+ if (!auth.ok) {
204
+ log("error", "authentication failed", auth);
205
+ process.exit(1);
206
+ }
207
+
208
+ // 2. drain sync stream + validate every entity
209
+ const entitiesByType = { person: [], event: [], place: [], item: [], topic: [] };
210
+ const invalid = [];
211
+ let total = 0;
212
+ const t0 = Date.now();
213
+
214
+ const scratchDir = path.join(outDir, "scratch");
215
+ fs.mkdirSync(scratchDir, { recursive: true });
216
+
217
+ const progressEvents = [];
218
+ for await (const raw of adapter.sync({
219
+ include,
220
+ serial: args.serial || undefined,
221
+ extractMode: args.extractMode,
222
+ dataPaths: Object.keys(args.dataPaths).length ? args.dataPaths : undefined,
223
+ scratchDir,
224
+ onProgress: (msg) => {
225
+ progressEvents.push(msg);
226
+ if (msg.phase === "progress") return; // too chatty
227
+ log("info", `adapter:${msg.source}`, msg);
228
+ },
229
+ })) {
230
+ total += 1;
231
+ const bucket = entitiesByType[raw.entityType];
232
+ if (bucket) bucket.push(raw.payload);
233
+
234
+ // Cross-source schema validation
235
+ const v = validate(raw.payload);
236
+ if (!v.valid) {
237
+ invalid.push({
238
+ id: raw.payload && raw.payload.id,
239
+ entityType: raw.entityType,
240
+ errors: v.errors,
241
+ });
242
+ }
243
+ }
244
+ const wallMs = Date.now() - t0;
245
+
246
+ log("info", "sync drained", {
247
+ wallMs,
248
+ total,
249
+ persons: entitiesByType.person.length,
250
+ events: entitiesByType.event.length,
251
+ places: entitiesByType.place.length,
252
+ invalidCount: invalid.length,
253
+ });
254
+
255
+ if (invalid.length) {
256
+ log("error", "validation failed", { count: invalid.length });
257
+ fs.writeFileSync(
258
+ path.join(outDir, "validation-errors.json"),
259
+ JSON.stringify(invalid, null, 2),
260
+ );
261
+ process.exitCode = 2;
262
+ } else {
263
+ log("info", "all entities passed UnifiedSchema validation");
264
+ }
265
+
266
+ // 3. write per-source NormalizedBatch JSON dumps for inspection
267
+ const dump = {
268
+ schemaVersion: "0.1.0",
269
+ generatedAt: new Date().toISOString(),
270
+ adapter: "system-data",
271
+ adapterVersion: adapter.version,
272
+ include,
273
+ input: {
274
+ serial: args.serial || null,
275
+ extractMode: args.extractMode,
276
+ dataPaths: args.dataPaths,
277
+ },
278
+ wallMs,
279
+ totals: {
280
+ persons: entitiesByType.person.length,
281
+ events: entitiesByType.event.length,
282
+ places: entitiesByType.place.length,
283
+ invalid: invalid.length,
284
+ },
285
+ progressEvents,
286
+ persons: entitiesByType.person,
287
+ events: entitiesByType.event,
288
+ places: entitiesByType.place,
289
+ };
290
+ const dumpPath = path.join(outDir, "system-data-batch.json");
291
+ fs.writeFileSync(dumpPath, JSON.stringify(dump, null, 2));
292
+ log("info", "wrote dump", { dumpPath, bytes: fs.statSync(dumpPath).size });
293
+
294
+ log("info", "summary", {
295
+ total,
296
+ persons: entitiesByType.person.length,
297
+ events: entitiesByType.event.length,
298
+ places: entitiesByType.place.length,
299
+ invalid: invalid.length,
300
+ wallMs,
301
+ outDir,
302
+ });
303
+ } finally {
304
+ await supervisor.stop({ graceMs: 2000 });
305
+ }
306
+ }
307
+
308
+ main(process.argv.slice(2)).catch((err) => {
309
+ log("error", "fatal", { name: err.name, message: err.message, code: err.code });
310
+ if (err.stack) process.stderr.write(err.stack + "\n");
311
+ process.exit(1);
312
+ });