@chainlesschain/personal-data-hub 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
- package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
- package/__tests__/adapters/ai-chat-history.test.js +396 -0
- package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
- package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
- package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
- package/__tests__/adapters/email-adapter.test.js +138 -1
- package/__tests__/adapters/email-classifier.test.js +347 -0
- package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
- package/__tests__/adapters/email-retry-progress.test.js +294 -0
- package/__tests__/adapters/email-templates.test.js +699 -0
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
- package/__tests__/adapters/system-data-adapter.test.js +440 -0
- package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
- package/__tests__/adapters/system-data-android.test.js +387 -0
- package/__tests__/adapters/system-data-disclosure.test.js +153 -0
- package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
- package/__tests__/adapters/wechat-env-probe.test.js +162 -0
- package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
- package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
- package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
- package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
- package/__tests__/analysis-skills.test.js +556 -0
- package/__tests__/analysis.test.js +329 -1
- package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
- package/__tests__/e2e/full-user-journey.test.js +188 -0
- package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
- package/__tests__/entity-resolver-stages.test.js +411 -0
- package/__tests__/entity-resolver-vault.test.js +246 -0
- package/__tests__/entity-resolver.test.js +526 -0
- package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
- package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
- package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
- package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
- package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
- package/__tests__/longtail-adapters.test.js +217 -0
- package/__tests__/mobile-extractor.test.js +288 -0
- package/__tests__/registry.test.js +4 -2
- package/__tests__/shopping-adapters.test.js +296 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
- package/__tests__/sidecar-supervisor.test.js +120 -0
- package/__tests__/social-adapters.test.js +206 -0
- package/__tests__/travel-adapters.test.js +325 -0
- package/__tests__/vault.test.js +3 -3
- package/__tests__/wechat-adapter.test.js +476 -0
- package/__tests__/whatsapp-adapter.test.js +135 -0
- package/lib/adapter-spec.js +12 -0
- package/lib/adapters/_python-sidecar-base.js +207 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
- package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
- package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
- package/lib/adapters/ai-chat-history/health-checker.js +210 -0
- package/lib/adapters/ai-chat-history/http-client.js +211 -0
- package/lib/adapters/ai-chat-history/index.js +28 -0
- package/lib/adapters/ai-chat-history/schema-map.js +258 -0
- package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
- package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
- package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
- package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
- package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
- package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
- package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
- package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
- package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
- package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
- package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
- package/lib/adapters/alipay-bill/counterparty.js +129 -0
- package/lib/adapters/alipay-bill/csv-parser.js +217 -0
- package/lib/adapters/alipay-bill/index.js +41 -0
- package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
- package/lib/adapters/email-imap/classifier.js +495 -0
- package/lib/adapters/email-imap/email-adapter.js +419 -8
- package/lib/adapters/email-imap/index.js +42 -0
- package/lib/adapters/email-imap/pdf-extractor.js +192 -0
- package/lib/adapters/email-imap/templates/bill.js +232 -0
- package/lib/adapters/email-imap/templates/government.js +120 -0
- package/lib/adapters/email-imap/templates/index.js +78 -0
- package/lib/adapters/email-imap/templates/order.js +186 -0
- package/lib/adapters/email-imap/templates/other.js +114 -0
- package/lib/adapters/email-imap/templates/register.js +113 -0
- package/lib/adapters/email-imap/templates/travel.js +157 -0
- package/lib/adapters/email-imap/templates/utils.js +275 -0
- package/lib/adapters/email-imap/transactions.js +234 -0
- package/lib/adapters/messaging-qq/index.js +158 -0
- package/lib/adapters/messaging-telegram/index.js +142 -0
- package/lib/adapters/messaging-whatsapp/index.js +189 -0
- package/lib/adapters/shopping-base/index.js +208 -0
- package/lib/adapters/shopping-jd/index.js +150 -0
- package/lib/adapters/shopping-meituan/index.js +154 -0
- package/lib/adapters/shopping-taobao/index.js +176 -0
- package/lib/adapters/social-bilibili/index.js +171 -0
- package/lib/adapters/social-douyin/index.js +116 -0
- package/lib/adapters/social-kuaishou/index.js +237 -0
- package/lib/adapters/social-toutiao/index.js +236 -0
- package/lib/adapters/social-weibo/index.js +164 -0
- package/lib/adapters/social-xiaohongshu/index.js +96 -0
- package/lib/adapters/system-data/disclosure.js +166 -0
- package/lib/adapters/system-data/index.js +34 -0
- package/lib/adapters/system-data/system-data-adapter.js +344 -0
- package/lib/adapters/system-data-android/adapter.js +348 -0
- package/lib/adapters/system-data-android/index.js +76 -0
- package/lib/adapters/travel-12306/index.js +151 -0
- package/lib/adapters/travel-amap/index.js +164 -0
- package/lib/adapters/travel-baidu-map/index.js +162 -0
- package/lib/adapters/travel-base/index.js +240 -0
- package/lib/adapters/travel-ctrip/index.js +151 -0
- package/lib/adapters/wechat/bootstrap.js +146 -0
- package/lib/adapters/wechat/content-parser.js +326 -0
- package/lib/adapters/wechat/db-reader.js +209 -0
- package/lib/adapters/wechat/env-probe.js +218 -0
- package/lib/adapters/wechat/frida-agent/loader.js +67 -0
- package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
- package/lib/adapters/wechat/index.js +37 -0
- package/lib/adapters/wechat/key-extractor.js +158 -0
- package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
- package/lib/adapters/wechat/key-providers/index.js +22 -0
- package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
- package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
- package/lib/adapters/wechat/normalize.js +220 -0
- package/lib/adapters/wechat/wechat-adapter.js +205 -0
- package/lib/analysis-skills/base.js +113 -0
- package/lib/analysis-skills/footprint.js +167 -0
- package/lib/analysis-skills/index.js +58 -0
- package/lib/analysis-skills/interests.js +161 -0
- package/lib/analysis-skills/relations.js +226 -0
- package/lib/analysis-skills/spending.js +219 -0
- package/lib/analysis-skills/timeline.js +167 -0
- package/lib/analysis.js +191 -2
- package/lib/entity-resolver/embedding-stage.js +198 -0
- package/lib/entity-resolver/entity-resolver.js +384 -0
- package/lib/entity-resolver/index.js +42 -0
- package/lib/entity-resolver/llm-stage.js +191 -0
- package/lib/entity-resolver/rule-stage.js +208 -0
- package/lib/entity-resolver/worker.js +149 -0
- package/lib/index.js +131 -0
- package/lib/migrations.js +73 -0
- package/lib/mobile-extractor/android.js +193 -0
- package/lib/mobile-extractor/index.js +9 -0
- package/lib/mobile-extractor/ios.js +223 -0
- package/lib/prompt-builder.js +11 -1
- package/lib/query-parser.js +7 -1
- package/lib/registry.js +42 -0
- package/lib/sidecar/index.js +15 -0
- package/lib/sidecar/supervisor.js +359 -0
- package/lib/vault.js +343 -0
- package/package.json +36 -3
- package/scripts/_make-fixture-all.js +126 -0
- package/scripts/_make-fixture-contacts.js +84 -0
- package/scripts/evaluate-entity-resolver.js +213 -0
- package/scripts/smoke-phase-5-5.js +196 -0
- package/scripts/smoke-phase-5-7.js +181 -0
- package/scripts/smoke-system-data-contacts.js +309 -0
- package/scripts/smoke-system-data.js +312 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Smoke / real-device runner for Phase 4.5.2 — Contacts extraction.
|
|
4
|
+
*
|
|
5
|
+
* Drives the full vertical:
|
|
6
|
+
*
|
|
7
|
+
* ┌──────────────────────────────────────────────────────────────┐
|
|
8
|
+
* │ 1. (optional) android.list_devices │
|
|
9
|
+
* │ 2. (optional) android.pull_file /data/.../contacts2.db │
|
|
10
|
+
* │ 3. system.parse_contacts → Persons │
|
|
11
|
+
* │ 4. hub-side UnifiedSchema validatePerson() on every row │
|
|
12
|
+
* │ 5. write NormalizedBatch JSON to ./out/<timestamp>/ │
|
|
13
|
+
* └──────────────────────────────────────────────────────────────┘
|
|
14
|
+
*
|
|
15
|
+
* Three modes:
|
|
16
|
+
*
|
|
17
|
+
* --db <path> Skip ADB entirely; parse a contacts2.db already
|
|
18
|
+
* on disk. Best for first-run sanity on the dev box.
|
|
19
|
+
*
|
|
20
|
+
* --serial <serial> Run `adb pull` first. Requires `adb root` (most
|
|
21
|
+
* retail builds reject this) OR a userdebug build.
|
|
22
|
+
* On a stock Redmi 24115RA8EC use --workaround.
|
|
23
|
+
*
|
|
24
|
+
* --workaround sdcard Look for a contacts2.db copy at
|
|
25
|
+
* /sdcard/Download/contacts2.db (you copied it
|
|
26
|
+
* out via Termux + tsu, or via Mi cloud export,
|
|
27
|
+
* per docs/design/Adapter_System_Data.md §2.1).
|
|
28
|
+
*
|
|
29
|
+
* Usage examples:
|
|
30
|
+
*
|
|
31
|
+
* # Local fixture
|
|
32
|
+
* node scripts/smoke-system-data-contacts.js --db ./fixtures/contacts2.db
|
|
33
|
+
*
|
|
34
|
+
* # List devices, then prompt for serial
|
|
35
|
+
* node scripts/smoke-system-data-contacts.js --list
|
|
36
|
+
*
|
|
37
|
+
* # Real device with /sdcard workaround
|
|
38
|
+
* node scripts/smoke-system-data-contacts.js \
|
|
39
|
+
* --serial 24115RA8ECabc123 --workaround sdcard
|
|
40
|
+
*
|
|
41
|
+
* Exits non-zero on any sidecar error or schema validation failure.
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
"use strict";
|
|
45
|
+
|
|
46
|
+
const path = require("node:path");
|
|
47
|
+
const fs = require("node:fs");
|
|
48
|
+
const os = require("node:os");
|
|
49
|
+
|
|
50
|
+
const { SidecarSupervisor } = require("../lib/sidecar");
|
|
51
|
+
const { validatePerson } = require("../lib/schemas");
|
|
52
|
+
|
|
53
|
+
const SIDECAR_ROOT = path.resolve(__dirname, "..", "..", "personal-data-hub-bridge");
|
|
54
|
+
const PYTHON = process.env.FORENSICS_BRIDGE_PYTHON || "python";
|
|
55
|
+
|
|
56
|
+
const SDCARD_WORKAROUND_PATH = "/sdcard/Download/contacts2.db";
|
|
57
|
+
const SYSTEM_PROVIDER_PATH =
|
|
58
|
+
"/data/data/com.android.providers.contacts/databases/contacts2.db";
|
|
59
|
+
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// CLI parsing — kept dependency-free
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
function parseArgs(argv) {
|
|
65
|
+
const out = {
|
|
66
|
+
db: null,
|
|
67
|
+
serial: null,
|
|
68
|
+
workaround: null,
|
|
69
|
+
list: false,
|
|
70
|
+
outDir: null,
|
|
71
|
+
help: false,
|
|
72
|
+
};
|
|
73
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
74
|
+
const a = argv[i];
|
|
75
|
+
switch (a) {
|
|
76
|
+
case "--db":
|
|
77
|
+
out.db = argv[++i];
|
|
78
|
+
break;
|
|
79
|
+
case "--serial":
|
|
80
|
+
out.serial = argv[++i];
|
|
81
|
+
break;
|
|
82
|
+
case "--workaround":
|
|
83
|
+
out.workaround = argv[++i];
|
|
84
|
+
break;
|
|
85
|
+
case "--list":
|
|
86
|
+
out.list = true;
|
|
87
|
+
break;
|
|
88
|
+
case "--out":
|
|
89
|
+
out.outDir = argv[++i];
|
|
90
|
+
break;
|
|
91
|
+
case "-h":
|
|
92
|
+
case "--help":
|
|
93
|
+
out.help = true;
|
|
94
|
+
break;
|
|
95
|
+
default:
|
|
96
|
+
if (a.startsWith("--")) {
|
|
97
|
+
throw new Error(`unknown flag: ${a}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return out;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function printHelp() {
|
|
105
|
+
process.stdout.write(`
|
|
106
|
+
smoke-system-data-contacts — drive sidecar end-to-end for contacts.
|
|
107
|
+
|
|
108
|
+
--db <path> Parse a contacts2.db already on disk (skip ADB).
|
|
109
|
+
--serial <serial> Target this ADB device for the pull step.
|
|
110
|
+
--workaround sdcard Pull from ${SDCARD_WORKAROUND_PATH} instead of /data/data.
|
|
111
|
+
Required on stock Android (no adb root).
|
|
112
|
+
--list Just list ADB devices and exit.
|
|
113
|
+
--out <dir> Write NormalizedBatch JSON here. Default: ./out/<ts>.
|
|
114
|
+
-h, --help Show this help.
|
|
115
|
+
|
|
116
|
+
Env:
|
|
117
|
+
FORENSICS_BRIDGE_PYTHON override Python interpreter (default: python).
|
|
118
|
+
|
|
119
|
+
Exit codes:
|
|
120
|
+
0 success
|
|
121
|
+
1 sidecar / hub error
|
|
122
|
+
2 invalid Persons (schema validation failed)
|
|
123
|
+
`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Helpers
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
function timestampSlug() {
|
|
131
|
+
const d = new Date();
|
|
132
|
+
const z = (n) => String(n).padStart(2, "0");
|
|
133
|
+
return (
|
|
134
|
+
`${d.getFullYear()}${z(d.getMonth() + 1)}${z(d.getDate())}-` +
|
|
135
|
+
`${z(d.getHours())}${z(d.getMinutes())}${z(d.getSeconds())}`
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function makeSupervisor() {
|
|
140
|
+
return new SidecarSupervisor({
|
|
141
|
+
command: PYTHON,
|
|
142
|
+
args: ["-u", "-m", "forensics_bridge.ipc_server"],
|
|
143
|
+
cwd: SIDECAR_ROOT,
|
|
144
|
+
healthCheckIntervalMs: 0,
|
|
145
|
+
env: { PYTHONPATH: SIDECAR_ROOT },
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function log(level, msg, extra = {}) {
|
|
150
|
+
const line = JSON.stringify({
|
|
151
|
+
ts: new Date().toISOString(),
|
|
152
|
+
level,
|
|
153
|
+
msg,
|
|
154
|
+
...extra,
|
|
155
|
+
});
|
|
156
|
+
if (level === "error") process.stderr.write(line + "\n");
|
|
157
|
+
else process.stdout.write(line + "\n");
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
// Main
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
async function main(rawArgs) {
|
|
165
|
+
let args;
|
|
166
|
+
try {
|
|
167
|
+
args = parseArgs(rawArgs);
|
|
168
|
+
} catch (err) {
|
|
169
|
+
console.error(err.message);
|
|
170
|
+
printHelp();
|
|
171
|
+
process.exit(2);
|
|
172
|
+
}
|
|
173
|
+
if (args.help) {
|
|
174
|
+
printHelp();
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const outDir = path.resolve(
|
|
179
|
+
args.outDir || path.join(process.cwd(), "out", timestampSlug()),
|
|
180
|
+
);
|
|
181
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
182
|
+
log("info", "output directory ready", { outDir });
|
|
183
|
+
|
|
184
|
+
const sup = makeSupervisor();
|
|
185
|
+
// Stream sidecar pino-style logs out as ndjson so the user sees timing.
|
|
186
|
+
sup.on("log", (line) => process.stderr.write(`[sidecar] ${line}\n`));
|
|
187
|
+
|
|
188
|
+
await sup.start({ readyTimeoutMs: 10_000 });
|
|
189
|
+
log("info", "sidecar ready");
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
// ---------- list-only path ----------
|
|
193
|
+
if (args.list) {
|
|
194
|
+
const devices = await sup.invoke("android.list_devices");
|
|
195
|
+
log("info", "adb devices", devices);
|
|
196
|
+
console.log(JSON.stringify(devices, null, 2));
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ---------- choose source for contacts2.db ----------
|
|
201
|
+
let dbPath = args.db ? path.resolve(args.db) : null;
|
|
202
|
+
|
|
203
|
+
if (!dbPath) {
|
|
204
|
+
if (!args.serial) {
|
|
205
|
+
throw new Error(
|
|
206
|
+
"neither --db nor --serial provided; nothing to extract",
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
const remotePath =
|
|
210
|
+
args.workaround === "sdcard" ? SDCARD_WORKAROUND_PATH : SYSTEM_PROVIDER_PATH;
|
|
211
|
+
log("info", "pulling from device", { serial: args.serial, remotePath });
|
|
212
|
+
const pulled = await sup.invoke(
|
|
213
|
+
"android.pull_file",
|
|
214
|
+
{
|
|
215
|
+
serial: args.serial,
|
|
216
|
+
remote_path: remotePath,
|
|
217
|
+
local_dir: outDir,
|
|
218
|
+
},
|
|
219
|
+
{ timeoutMs: 60_000 },
|
|
220
|
+
);
|
|
221
|
+
log("info", "pull completed", pulled);
|
|
222
|
+
dbPath = pulled.local;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (!fs.existsSync(dbPath)) {
|
|
226
|
+
throw new Error(`contacts db not found at ${dbPath}`);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// ---------- parse + validate ----------
|
|
230
|
+
const persons = [];
|
|
231
|
+
let chunks = 0;
|
|
232
|
+
const t0 = Date.now();
|
|
233
|
+
const parseResult = await sup.invoke(
|
|
234
|
+
"system.parse_contacts",
|
|
235
|
+
{
|
|
236
|
+
data_path: dbPath,
|
|
237
|
+
device_serial: args.serial || null,
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
timeoutMs: 120_000,
|
|
241
|
+
onProgress: (p) => log("info", "progress", p),
|
|
242
|
+
onChunk: (batch) => {
|
|
243
|
+
chunks += 1;
|
|
244
|
+
for (const person of batch.persons || []) persons.push(person);
|
|
245
|
+
},
|
|
246
|
+
},
|
|
247
|
+
);
|
|
248
|
+
const wallMs = Date.now() - t0;
|
|
249
|
+
log("info", "parse completed", {
|
|
250
|
+
...parseResult,
|
|
251
|
+
chunks,
|
|
252
|
+
wallMs,
|
|
253
|
+
personsCollected: persons.length,
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
// ---------- hub-side schema check ----------
|
|
257
|
+
const invalid = [];
|
|
258
|
+
for (const p of persons) {
|
|
259
|
+
const v = validatePerson(p);
|
|
260
|
+
if (!v.valid) invalid.push({ id: p.id, errors: v.errors });
|
|
261
|
+
}
|
|
262
|
+
if (invalid.length) {
|
|
263
|
+
log("error", "validation failed", { count: invalid.length });
|
|
264
|
+
fs.writeFileSync(
|
|
265
|
+
path.join(outDir, "validation-errors.json"),
|
|
266
|
+
JSON.stringify(invalid, null, 2),
|
|
267
|
+
);
|
|
268
|
+
process.exitCode = 2;
|
|
269
|
+
} else {
|
|
270
|
+
log("info", "all persons passed UnifiedSchema validation");
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ---------- persist for inspection ----------
|
|
274
|
+
const dump = {
|
|
275
|
+
schemaVersion: "0.1.0",
|
|
276
|
+
generatedAt: new Date().toISOString(),
|
|
277
|
+
sidecar: { pythonRoot: SIDECAR_ROOT },
|
|
278
|
+
input: {
|
|
279
|
+
dbPath,
|
|
280
|
+
serial: args.serial || null,
|
|
281
|
+
workaround: args.workaround || null,
|
|
282
|
+
},
|
|
283
|
+
parseResult,
|
|
284
|
+
wallMs,
|
|
285
|
+
persons,
|
|
286
|
+
};
|
|
287
|
+
const dumpPath = path.join(outDir, "contacts-normalized-batch.json");
|
|
288
|
+
fs.writeFileSync(dumpPath, JSON.stringify(dump, null, 2));
|
|
289
|
+
log("info", "wrote dump", { dumpPath, bytes: fs.statSync(dumpPath).size });
|
|
290
|
+
|
|
291
|
+
// ---------- compact summary ----------
|
|
292
|
+
log("info", "summary", {
|
|
293
|
+
totalPersons: parseResult.totalPersons,
|
|
294
|
+
withPhone: parseResult.stats?.with_phone,
|
|
295
|
+
withEmail: parseResult.stats?.with_email,
|
|
296
|
+
starred: parseResult.stats?.starred,
|
|
297
|
+
invalidPersons: invalid.length,
|
|
298
|
+
outDir,
|
|
299
|
+
});
|
|
300
|
+
} finally {
|
|
301
|
+
await sup.stop({ graceMs: 2000 });
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
main(process.argv.slice(2)).catch((err) => {
|
|
306
|
+
log("error", "fatal", { name: err.name, message: err.message, code: err.code });
|
|
307
|
+
if (err.stack) process.stderr.write(err.stack + "\n");
|
|
308
|
+
process.exit(1);
|
|
309
|
+
});
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Full system-data smoke / real-device runner — Phase 4.5.7.
|
|
4
|
+
*
|
|
5
|
+
* Drives the full SystemDataAdapter end-to-end across all 4 sources
|
|
6
|
+
* (contacts / calllog / sms / wifi), exercising:
|
|
7
|
+
*
|
|
8
|
+
* - PythonSidecarAdapter base class
|
|
9
|
+
* - SidecarSupervisor lifecycle
|
|
10
|
+
* - SystemDataAdapter.authenticate
|
|
11
|
+
* - 4 sidecar parse_* methods
|
|
12
|
+
* - per-entity hub-side UnifiedSchema validation
|
|
13
|
+
* - dataDisclosure metadata sanitization
|
|
14
|
+
*
|
|
15
|
+
* Replaces the contacts-only `smoke-system-data-contacts.js`. The older
|
|
16
|
+
* script remains for users who only want to exercise the Phase 4.5.2 slice.
|
|
17
|
+
*
|
|
18
|
+
* Usage:
|
|
19
|
+
*
|
|
20
|
+
* # Offline mode — pre-extracted host files (no ADB)
|
|
21
|
+
* node scripts/smoke-system-data.js \
|
|
22
|
+
* --contacts-db ./fixtures/contacts2.db \
|
|
23
|
+
* --calllog-db ./fixtures/contacts2.db \
|
|
24
|
+
* --wifi-dir ./fixtures/wifi/
|
|
25
|
+
*
|
|
26
|
+
* # Live device, /sdcard workaround (non-root)
|
|
27
|
+
* node scripts/smoke-system-data.js \
|
|
28
|
+
* --serial 24115RA8ECabc123 --extract-mode sdcard
|
|
29
|
+
*
|
|
30
|
+
* # Live device with adb root
|
|
31
|
+
* node scripts/smoke-system-data.js --serial 24115RA8ECabc123
|
|
32
|
+
*
|
|
33
|
+
* # Include SMS (default off — explicit opt-in for legality)
|
|
34
|
+
* node scripts/smoke-system-data.js --serial XYZ --extract-mode sdcard --include sms
|
|
35
|
+
*
|
|
36
|
+
* # Disable contacts but include sms
|
|
37
|
+
* node scripts/smoke-system-data.js --db ... --include sms --exclude contacts
|
|
38
|
+
*
|
|
39
|
+
* Exit codes:
|
|
40
|
+
* 0 success
|
|
41
|
+
* 1 sidecar / hub error
|
|
42
|
+
* 2 invalid entities (schema validation failed)
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
"use strict";
|
|
46
|
+
|
|
47
|
+
const path = require("node:path");
|
|
48
|
+
const fs = require("node:fs");
|
|
49
|
+
const os = require("node:os");
|
|
50
|
+
|
|
51
|
+
const { SidecarSupervisor } = require("../lib/sidecar");
|
|
52
|
+
const {
|
|
53
|
+
SystemDataAdapter,
|
|
54
|
+
sanitizeInclude,
|
|
55
|
+
DEFAULT_INCLUDE,
|
|
56
|
+
} = require("../lib/adapters/system-data");
|
|
57
|
+
const { validate } = require("../lib/schemas");
|
|
58
|
+
|
|
59
|
+
const SIDECAR_ROOT = path.resolve(__dirname, "..", "..", "personal-data-hub-bridge");
|
|
60
|
+
const PYTHON = process.env.FORENSICS_BRIDGE_PYTHON || "python";
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
function parseArgs(argv) {
|
|
65
|
+
const out = {
|
|
66
|
+
serial: null,
|
|
67
|
+
extractMode: "normal",
|
|
68
|
+
dataPaths: {},
|
|
69
|
+
include: [],
|
|
70
|
+
exclude: [],
|
|
71
|
+
list: false,
|
|
72
|
+
outDir: null,
|
|
73
|
+
help: false,
|
|
74
|
+
};
|
|
75
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
76
|
+
const a = argv[i];
|
|
77
|
+
switch (a) {
|
|
78
|
+
case "--serial": out.serial = argv[++i]; break;
|
|
79
|
+
case "--extract-mode": out.extractMode = argv[++i]; break;
|
|
80
|
+
case "--contacts-db": out.dataPaths.contacts = path.resolve(argv[++i]); break;
|
|
81
|
+
case "--calllog-db": out.dataPaths.calllog = path.resolve(argv[++i]); break;
|
|
82
|
+
case "--sms-db": out.dataPaths.sms = path.resolve(argv[++i]); break;
|
|
83
|
+
case "--wifi-dir": out.dataPaths.wifi = path.resolve(argv[++i]); break;
|
|
84
|
+
case "--include": out.include.push(argv[++i]); break;
|
|
85
|
+
case "--exclude": out.exclude.push(argv[++i]); break;
|
|
86
|
+
case "--list": out.list = true; break;
|
|
87
|
+
case "--out": out.outDir = argv[++i]; break;
|
|
88
|
+
case "-h": case "--help": out.help = true; break;
|
|
89
|
+
default:
|
|
90
|
+
if (a.startsWith("--")) throw new Error(`unknown flag: ${a}`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return out;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function printHelp() {
|
|
97
|
+
process.stdout.write(`
|
|
98
|
+
smoke-system-data — drive SystemDataAdapter end-to-end for all 4 sources.
|
|
99
|
+
|
|
100
|
+
Modes (mutually exclusive):
|
|
101
|
+
Pre-extracted (offline):
|
|
102
|
+
--contacts-db <path> contacts2.db on disk
|
|
103
|
+
--calllog-db <path> calls db on disk (may be same as --contacts-db)
|
|
104
|
+
--sms-db <path> mmssms.db on disk
|
|
105
|
+
--wifi-dir <path> directory with WifiConfigStore.xml / wpa_supplicant.conf
|
|
106
|
+
|
|
107
|
+
Live device (ADB):
|
|
108
|
+
--serial <serial> target device
|
|
109
|
+
--extract-mode normal pull from /data/data (requires adb root)
|
|
110
|
+
--extract-mode sdcard pull from /sdcard/Download/ (Termux+tsu workaround)
|
|
111
|
+
|
|
112
|
+
Source gating:
|
|
113
|
+
--include <key> force-enable a source (key: contacts/calllog/sms/wifi)
|
|
114
|
+
--exclude <key> force-disable a source
|
|
115
|
+
(defaults per adapter.dataDisclosure: contacts=on / calllog=on / sms=OFF / wifi=on)
|
|
116
|
+
|
|
117
|
+
Misc:
|
|
118
|
+
--list list ADB devices and exit
|
|
119
|
+
--out <dir> output directory (default: ./out/<timestamp>)
|
|
120
|
+
-h, --help show this help
|
|
121
|
+
|
|
122
|
+
Env:
|
|
123
|
+
FORENSICS_BRIDGE_PYTHON python interpreter (default: python)
|
|
124
|
+
`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function timestampSlug() {
|
|
128
|
+
const d = new Date();
|
|
129
|
+
const z = (n) => String(n).padStart(2, "0");
|
|
130
|
+
return `${d.getFullYear()}${z(d.getMonth() + 1)}${z(d.getDate())}-${z(d.getHours())}${z(d.getMinutes())}${z(d.getSeconds())}`;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function log(level, msg, extra = {}) {
|
|
134
|
+
const line = JSON.stringify({ ts: new Date().toISOString(), level, msg, ...extra });
|
|
135
|
+
if (level === "error") process.stderr.write(line + "\n");
|
|
136
|
+
else process.stdout.write(line + "\n");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function resolveInclude(args) {
|
|
140
|
+
const include = { ...DEFAULT_INCLUDE };
|
|
141
|
+
for (const k of args.include) {
|
|
142
|
+
if (!Object.prototype.hasOwnProperty.call(include, k)) {
|
|
143
|
+
throw new Error(`unknown source for --include: ${k}`);
|
|
144
|
+
}
|
|
145
|
+
include[k] = true;
|
|
146
|
+
}
|
|
147
|
+
for (const k of args.exclude) {
|
|
148
|
+
if (!Object.prototype.hasOwnProperty.call(include, k)) {
|
|
149
|
+
throw new Error(`unknown source for --exclude: ${k}`);
|
|
150
|
+
}
|
|
151
|
+
include[k] = false;
|
|
152
|
+
}
|
|
153
|
+
return sanitizeInclude(include);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
async function main(rawArgs) {
|
|
159
|
+
let args;
|
|
160
|
+
try {
|
|
161
|
+
args = parseArgs(rawArgs);
|
|
162
|
+
} catch (err) {
|
|
163
|
+
console.error(err.message);
|
|
164
|
+
printHelp();
|
|
165
|
+
process.exit(2);
|
|
166
|
+
}
|
|
167
|
+
if (args.help) { printHelp(); return; }
|
|
168
|
+
|
|
169
|
+
const outDir = path.resolve(args.outDir || path.join(process.cwd(), "out", timestampSlug()));
|
|
170
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
171
|
+
log("info", "output directory ready", { outDir });
|
|
172
|
+
|
|
173
|
+
const supervisor = new SidecarSupervisor({
|
|
174
|
+
command: PYTHON,
|
|
175
|
+
args: ["-u", "-m", "forensics_bridge.ipc_server"],
|
|
176
|
+
cwd: SIDECAR_ROOT,
|
|
177
|
+
healthCheckIntervalMs: 0,
|
|
178
|
+
env: { PYTHONPATH: SIDECAR_ROOT },
|
|
179
|
+
});
|
|
180
|
+
supervisor.on("log", (line) => process.stderr.write(`[sidecar] ${line}\n`));
|
|
181
|
+
|
|
182
|
+
await supervisor.start({ readyTimeoutMs: 10_000 });
|
|
183
|
+
log("info", "sidecar ready");
|
|
184
|
+
|
|
185
|
+
const adapter = new SystemDataAdapter({ supervisor });
|
|
186
|
+
|
|
187
|
+
try {
|
|
188
|
+
if (args.list) {
|
|
189
|
+
const devices = await supervisor.invoke("android.list_devices");
|
|
190
|
+
console.log(JSON.stringify(devices, null, 2));
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const include = resolveInclude(args);
|
|
195
|
+
log("info", "include resolved", include);
|
|
196
|
+
|
|
197
|
+
// 1. authenticate
|
|
198
|
+
const auth = await adapter.authenticate({
|
|
199
|
+
dataPaths: Object.keys(args.dataPaths).length ? args.dataPaths : undefined,
|
|
200
|
+
serial: args.serial || undefined,
|
|
201
|
+
});
|
|
202
|
+
log("info", "authenticated", auth);
|
|
203
|
+
if (!auth.ok) {
|
|
204
|
+
log("error", "authentication failed", auth);
|
|
205
|
+
process.exit(1);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// 2. drain sync stream + validate every entity
|
|
209
|
+
const entitiesByType = { person: [], event: [], place: [], item: [], topic: [] };
|
|
210
|
+
const invalid = [];
|
|
211
|
+
let total = 0;
|
|
212
|
+
const t0 = Date.now();
|
|
213
|
+
|
|
214
|
+
const scratchDir = path.join(outDir, "scratch");
|
|
215
|
+
fs.mkdirSync(scratchDir, { recursive: true });
|
|
216
|
+
|
|
217
|
+
const progressEvents = [];
|
|
218
|
+
for await (const raw of adapter.sync({
|
|
219
|
+
include,
|
|
220
|
+
serial: args.serial || undefined,
|
|
221
|
+
extractMode: args.extractMode,
|
|
222
|
+
dataPaths: Object.keys(args.dataPaths).length ? args.dataPaths : undefined,
|
|
223
|
+
scratchDir,
|
|
224
|
+
onProgress: (msg) => {
|
|
225
|
+
progressEvents.push(msg);
|
|
226
|
+
if (msg.phase === "progress") return; // too chatty
|
|
227
|
+
log("info", `adapter:${msg.source}`, msg);
|
|
228
|
+
},
|
|
229
|
+
})) {
|
|
230
|
+
total += 1;
|
|
231
|
+
const bucket = entitiesByType[raw.entityType];
|
|
232
|
+
if (bucket) bucket.push(raw.payload);
|
|
233
|
+
|
|
234
|
+
// Cross-source schema validation
|
|
235
|
+
const v = validate(raw.payload);
|
|
236
|
+
if (!v.valid) {
|
|
237
|
+
invalid.push({
|
|
238
|
+
id: raw.payload && raw.payload.id,
|
|
239
|
+
entityType: raw.entityType,
|
|
240
|
+
errors: v.errors,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
const wallMs = Date.now() - t0;
|
|
245
|
+
|
|
246
|
+
log("info", "sync drained", {
|
|
247
|
+
wallMs,
|
|
248
|
+
total,
|
|
249
|
+
persons: entitiesByType.person.length,
|
|
250
|
+
events: entitiesByType.event.length,
|
|
251
|
+
places: entitiesByType.place.length,
|
|
252
|
+
invalidCount: invalid.length,
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
if (invalid.length) {
|
|
256
|
+
log("error", "validation failed", { count: invalid.length });
|
|
257
|
+
fs.writeFileSync(
|
|
258
|
+
path.join(outDir, "validation-errors.json"),
|
|
259
|
+
JSON.stringify(invalid, null, 2),
|
|
260
|
+
);
|
|
261
|
+
process.exitCode = 2;
|
|
262
|
+
} else {
|
|
263
|
+
log("info", "all entities passed UnifiedSchema validation");
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// 3. write per-source NormalizedBatch JSON dumps for inspection
|
|
267
|
+
const dump = {
|
|
268
|
+
schemaVersion: "0.1.0",
|
|
269
|
+
generatedAt: new Date().toISOString(),
|
|
270
|
+
adapter: "system-data",
|
|
271
|
+
adapterVersion: adapter.version,
|
|
272
|
+
include,
|
|
273
|
+
input: {
|
|
274
|
+
serial: args.serial || null,
|
|
275
|
+
extractMode: args.extractMode,
|
|
276
|
+
dataPaths: args.dataPaths,
|
|
277
|
+
},
|
|
278
|
+
wallMs,
|
|
279
|
+
totals: {
|
|
280
|
+
persons: entitiesByType.person.length,
|
|
281
|
+
events: entitiesByType.event.length,
|
|
282
|
+
places: entitiesByType.place.length,
|
|
283
|
+
invalid: invalid.length,
|
|
284
|
+
},
|
|
285
|
+
progressEvents,
|
|
286
|
+
persons: entitiesByType.person,
|
|
287
|
+
events: entitiesByType.event,
|
|
288
|
+
places: entitiesByType.place,
|
|
289
|
+
};
|
|
290
|
+
const dumpPath = path.join(outDir, "system-data-batch.json");
|
|
291
|
+
fs.writeFileSync(dumpPath, JSON.stringify(dump, null, 2));
|
|
292
|
+
log("info", "wrote dump", { dumpPath, bytes: fs.statSync(dumpPath).size });
|
|
293
|
+
|
|
294
|
+
log("info", "summary", {
|
|
295
|
+
total,
|
|
296
|
+
persons: entitiesByType.person.length,
|
|
297
|
+
events: entitiesByType.event.length,
|
|
298
|
+
places: entitiesByType.place.length,
|
|
299
|
+
invalid: invalid.length,
|
|
300
|
+
wallMs,
|
|
301
|
+
outDir,
|
|
302
|
+
});
|
|
303
|
+
} finally {
|
|
304
|
+
await supervisor.stop({ graceMs: 2000 });
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
main(process.argv.slice(2)).catch((err) => {
|
|
309
|
+
log("error", "fatal", { name: err.name, message: err.message, code: err.code });
|
|
310
|
+
if (err.stack) process.stderr.write(err.stack + "\n");
|
|
311
|
+
process.exit(1);
|
|
312
|
+
});
|