@chainlesschain/personal-data-hub 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-history.test.js +395 -0
- package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
- package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
- package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
- package/__tests__/adapters/email-adapter.test.js +138 -1
- package/__tests__/adapters/email-classifier.test.js +347 -0
- package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
- package/__tests__/adapters/email-retry-progress.test.js +294 -0
- package/__tests__/adapters/email-templates.test.js +699 -0
- package/__tests__/adapters/system-data-adapter.test.js +440 -0
- package/__tests__/adapters/system-data-disclosure.test.js +153 -0
- package/__tests__/analysis-skills.test.js +409 -0
- package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
- package/__tests__/entity-resolver-stages.test.js +411 -0
- package/__tests__/entity-resolver-vault.test.js +246 -0
- package/__tests__/entity-resolver.test.js +526 -0
- package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
- package/__tests__/longtail-adapters.test.js +217 -0
- package/__tests__/mobile-extractor.test.js +288 -0
- package/__tests__/shopping-adapters.test.js +296 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
- package/__tests__/sidecar-supervisor.test.js +120 -0
- package/__tests__/social-adapters.test.js +206 -0
- package/__tests__/travel-adapters.test.js +325 -0
- package/__tests__/vault.test.js +3 -3
- package/__tests__/wechat-adapter.test.js +476 -0
- package/__tests__/whatsapp-adapter.test.js +135 -0
- package/lib/adapter-spec.js +12 -0
- package/lib/adapters/_python-sidecar-base.js +207 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
- package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
- package/lib/adapters/ai-chat-history/http-client.js +211 -0
- package/lib/adapters/ai-chat-history/index.js +28 -0
- package/lib/adapters/ai-chat-history/schema-map.js +221 -0
- package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
- package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
- package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
- package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
- package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
- package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
- package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
- package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
- package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
- package/lib/adapters/alipay-bill/counterparty.js +129 -0
- package/lib/adapters/alipay-bill/csv-parser.js +217 -0
- package/lib/adapters/alipay-bill/index.js +41 -0
- package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
- package/lib/adapters/email-imap/classifier.js +495 -0
- package/lib/adapters/email-imap/email-adapter.js +419 -8
- package/lib/adapters/email-imap/index.js +42 -0
- package/lib/adapters/email-imap/pdf-extractor.js +192 -0
- package/lib/adapters/email-imap/templates/bill.js +232 -0
- package/lib/adapters/email-imap/templates/government.js +120 -0
- package/lib/adapters/email-imap/templates/index.js +78 -0
- package/lib/adapters/email-imap/templates/order.js +186 -0
- package/lib/adapters/email-imap/templates/other.js +114 -0
- package/lib/adapters/email-imap/templates/register.js +113 -0
- package/lib/adapters/email-imap/templates/travel.js +157 -0
- package/lib/adapters/email-imap/templates/utils.js +275 -0
- package/lib/adapters/email-imap/transactions.js +234 -0
- package/lib/adapters/messaging-qq/index.js +158 -0
- package/lib/adapters/messaging-telegram/index.js +142 -0
- package/lib/adapters/messaging-whatsapp/index.js +189 -0
- package/lib/adapters/shopping-base/index.js +208 -0
- package/lib/adapters/shopping-jd/index.js +150 -0
- package/lib/adapters/shopping-meituan/index.js +154 -0
- package/lib/adapters/shopping-taobao/index.js +176 -0
- package/lib/adapters/social-bilibili/index.js +171 -0
- package/lib/adapters/social-douyin/index.js +116 -0
- package/lib/adapters/social-weibo/index.js +164 -0
- package/lib/adapters/social-xiaohongshu/index.js +96 -0
- package/lib/adapters/system-data/disclosure.js +166 -0
- package/lib/adapters/system-data/index.js +34 -0
- package/lib/adapters/system-data/system-data-adapter.js +344 -0
- package/lib/adapters/travel-12306/index.js +151 -0
- package/lib/adapters/travel-amap/index.js +164 -0
- package/lib/adapters/travel-baidu-map/index.js +162 -0
- package/lib/adapters/travel-base/index.js +240 -0
- package/lib/adapters/travel-ctrip/index.js +151 -0
- package/lib/adapters/wechat/content-parser.js +326 -0
- package/lib/adapters/wechat/db-reader.js +209 -0
- package/lib/adapters/wechat/index.js +28 -0
- package/lib/adapters/wechat/key-extractor.js +158 -0
- package/lib/adapters/wechat/normalize.js +220 -0
- package/lib/adapters/wechat/wechat-adapter.js +205 -0
- package/lib/analysis-skills/base.js +113 -0
- package/lib/analysis-skills/footprint.js +167 -0
- package/lib/analysis-skills/index.js +58 -0
- package/lib/analysis-skills/interests.js +161 -0
- package/lib/analysis-skills/relations.js +226 -0
- package/lib/analysis-skills/spending.js +216 -0
- package/lib/analysis-skills/timeline.js +167 -0
- package/lib/entity-resolver/embedding-stage.js +198 -0
- package/lib/entity-resolver/entity-resolver.js +384 -0
- package/lib/entity-resolver/index.js +42 -0
- package/lib/entity-resolver/llm-stage.js +191 -0
- package/lib/entity-resolver/rule-stage.js +208 -0
- package/lib/entity-resolver/worker.js +149 -0
- package/lib/index.js +115 -0
- package/lib/migrations.js +73 -0
- package/lib/mobile-extractor/android.js +193 -0
- package/lib/mobile-extractor/index.js +9 -0
- package/lib/mobile-extractor/ios.js +223 -0
- package/lib/registry.js +42 -0
- package/lib/sidecar/index.js +15 -0
- package/lib/sidecar/supervisor.js +359 -0
- package/lib/vault.js +266 -0
- package/package.json +29 -3
- package/scripts/_make-fixture-all.js +126 -0
- package/scripts/_make-fixture-contacts.js +84 -0
- package/scripts/evaluate-entity-resolver.js +213 -0
- package/scripts/smoke-phase-5-5.js +196 -0
- package/scripts/smoke-phase-5-7.js +181 -0
- package/scripts/smoke-system-data-contacts.js +309 -0
- package/scripts/smoke-system-data.js +312 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Phase 5.7 smoke — exercises retry-with-backoff + onProgress through a
|
|
4
|
+
* flaky session. No real IMAP needed.
|
|
5
|
+
*
|
|
6
|
+
* 1. First 2 connect attempts throw ECONNRESET (transient)
|
|
7
|
+
* 2. 3rd attempt succeeds, yields 5 envelopes
|
|
8
|
+
* 3. Verify progress events fire in order: connecting → error → connecting
|
|
9
|
+
* → error → connecting → connected → mailbox-opened → fetching × 5 → done
|
|
10
|
+
* 4. Verify retry was capped (3 attempts total)
|
|
11
|
+
* 5. Verify AUTH_FAILED does NOT retry (separate run)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
"use strict";
|
|
15
|
+
|
|
16
|
+
const { EmailAdapter } = require("../lib/adapters/email-imap/email-adapter");
|
|
17
|
+
const { ImapAuthFailedError } = require("../lib/adapters/email-imap/imap-session");
|
|
18
|
+
|
|
19
|
+
function makeFlakyFactory(failuresFirst, envelopes) {
|
|
20
|
+
const failures = failuresFirst.slice();
|
|
21
|
+
const recorder = { attempts: 0 };
|
|
22
|
+
const factory = () => {
|
|
23
|
+
let openMb = null;
|
|
24
|
+
return {
|
|
25
|
+
async connect() {
|
|
26
|
+
recorder.attempts += 1;
|
|
27
|
+
if (failures.length > 0) {
|
|
28
|
+
throw failures.shift();
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
async openMailbox(name) {
|
|
32
|
+
openMb = name;
|
|
33
|
+
return { uidValidity: 1, uidNext: 9999, exists: envelopes.length };
|
|
34
|
+
},
|
|
35
|
+
async *fetchFullSince(sinceUid = 0) {
|
|
36
|
+
for (const env of envelopes) {
|
|
37
|
+
if (env.uid > sinceUid) yield { ...env, source: env.source || Buffer.alloc(0) };
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
async close() {},
|
|
41
|
+
};
|
|
42
|
+
};
|
|
43
|
+
return { factory, recorder };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function makeEnv(uid) {
|
|
47
|
+
return {
|
|
48
|
+
uid,
|
|
49
|
+
internalDate: new Date(`2026-05-${String(uid).padStart(2, "0")}T10:00:00Z`),
|
|
50
|
+
flags: ["\\Seen"],
|
|
51
|
+
messageId: `<m-${uid}@x>`,
|
|
52
|
+
subject: `Subject ${uid}`,
|
|
53
|
+
from: [{ address: `s${uid}@example.com` }],
|
|
54
|
+
to: [{ address: "me@example.com" }],
|
|
55
|
+
cc: [],
|
|
56
|
+
date: new Date(`2026-05-${String(uid).padStart(2, "0")}T10:00:00Z`),
|
|
57
|
+
size: 1024,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async function scenarioRetrySucceeds() {
|
|
62
|
+
console.log("\n=== Scenario A — 2 fails then success, 5 envelopes ===");
|
|
63
|
+
const transient = Object.assign(new Error("ECONNRESET"), { code: "ECONNRESET" });
|
|
64
|
+
const envs = [1, 2, 3, 4, 5].map(makeEnv);
|
|
65
|
+
const { factory, recorder } = makeFlakyFactory([transient, transient], envs);
|
|
66
|
+
|
|
67
|
+
const events = [];
|
|
68
|
+
const a = new EmailAdapter({
|
|
69
|
+
account: { provider: "qq", email: "me@qq.com", authCode: "x", folders: ["INBOX"] },
|
|
70
|
+
sessionFactory: factory,
|
|
71
|
+
parser: async () => ({ textBody: "", attachments: [] }),
|
|
72
|
+
maxConnectRetries: 3,
|
|
73
|
+
retryBaseDelayMs: 5,
|
|
74
|
+
onProgress: (e) => events.push(e),
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
console.log("adapter.version =", a.version);
|
|
78
|
+
console.log("adapter.capabilities (Phase 5.7) =", a.capabilities.filter((c) => c.startsWith("sync:")).join(", "));
|
|
79
|
+
|
|
80
|
+
const raws = [];
|
|
81
|
+
for await (const r of a.sync()) raws.push(r);
|
|
82
|
+
console.log(`emitted ${raws.length} raws (expected 5)`);
|
|
83
|
+
console.log(`connect attempts: ${recorder.attempts} (expected 3)`);
|
|
84
|
+
|
|
85
|
+
const phaseSeq = events.map((e) => `${e.phase}${e.attempt ? "(" + e.attempt + ")" : ""}`);
|
|
86
|
+
console.log("phase sequence:");
|
|
87
|
+
for (const p of phaseSeq) console.log(" -", p);
|
|
88
|
+
|
|
89
|
+
// Verify expected phase order
|
|
90
|
+
const errs = events.filter((e) => e.phase === "error");
|
|
91
|
+
if (errs.length !== 2) {
|
|
92
|
+
console.error(`FAIL: expected 2 error events, got ${errs.length}`);
|
|
93
|
+
process.exitCode = 1;
|
|
94
|
+
} else if (!errs.every((e) => e.retriable === true)) {
|
|
95
|
+
console.error("FAIL: error events should be retriable=true during first 2 attempts");
|
|
96
|
+
process.exitCode = 1;
|
|
97
|
+
} else {
|
|
98
|
+
console.log("error events: ✓ both marked retriable");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const done = events.find((e) => e.phase === "done");
|
|
102
|
+
if (!done || done.emitted !== 5) {
|
|
103
|
+
console.error(`FAIL: expected done event with emitted=5, got ${JSON.stringify(done)}`);
|
|
104
|
+
process.exitCode = 1;
|
|
105
|
+
} else {
|
|
106
|
+
console.log(`done event: ✓ emitted=${done.emitted} durationMs=${done.durationMs}`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const fetches = events.filter((e) => e.phase === "fetching");
|
|
110
|
+
if (fetches.length !== 5) {
|
|
111
|
+
console.error(`FAIL: expected 5 fetching events, got ${fetches.length}`);
|
|
112
|
+
process.exitCode = 1;
|
|
113
|
+
} else if (fetches[0].total !== 5 || fetches[4].current !== 5) {
|
|
114
|
+
console.error(`FAIL: fetching events should run 1..5 of 5`);
|
|
115
|
+
process.exitCode = 1;
|
|
116
|
+
} else {
|
|
117
|
+
console.log("fetching events: ✓ 5 events with current/total");
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async function scenarioAuthFailedNoRetry() {
|
|
122
|
+
console.log("\n=== Scenario B — AUTH_FAILED never retries ===");
|
|
123
|
+
const authErr = new ImapAuthFailedError("bad creds");
|
|
124
|
+
const { factory, recorder } = makeFlakyFactory(
|
|
125
|
+
[authErr, authErr, authErr], // shouldn't matter — first one stops us
|
|
126
|
+
[],
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
const events = [];
|
|
130
|
+
const a = new EmailAdapter({
|
|
131
|
+
account: { provider: "qq", email: "me@qq.com", authCode: "x", folders: ["INBOX"] },
|
|
132
|
+
sessionFactory: factory,
|
|
133
|
+
parser: async () => ({}),
|
|
134
|
+
maxConnectRetries: 3,
|
|
135
|
+
retryBaseDelayMs: 1,
|
|
136
|
+
onProgress: (e) => events.push(e),
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
let caught = null;
|
|
140
|
+
try {
|
|
141
|
+
for await (const _r of a.sync()) { /* drain */ }
|
|
142
|
+
} catch (err) {
|
|
143
|
+
caught = err;
|
|
144
|
+
}
|
|
145
|
+
console.log(`connect attempts: ${recorder.attempts} (expected 1)`);
|
|
146
|
+
console.log(`caught.code: ${caught && caught.code}`);
|
|
147
|
+
if (recorder.attempts !== 1) {
|
|
148
|
+
console.error("FAIL: AUTH_FAILED should not retry");
|
|
149
|
+
process.exitCode = 1;
|
|
150
|
+
} else if (!caught || caught.code !== "AUTH_FAILED") {
|
|
151
|
+
console.error("FAIL: error should propagate as AUTH_FAILED");
|
|
152
|
+
process.exitCode = 1;
|
|
153
|
+
} else {
|
|
154
|
+
console.log("AUTH short-circuit: ✓");
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const errEvent = events.find((e) => e.phase === "error");
|
|
158
|
+
if (!errEvent) {
|
|
159
|
+
console.error("FAIL: error progress event missing");
|
|
160
|
+
process.exitCode = 1;
|
|
161
|
+
} else if (errEvent.retriable !== false) {
|
|
162
|
+
console.error("FAIL: AUTH_FAILED error event should have retriable=false");
|
|
163
|
+
process.exitCode = 1;
|
|
164
|
+
} else {
|
|
165
|
+
console.log("AUTH error event: ✓ retriable=false");
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
async function main() {
|
|
170
|
+
console.log("== Phase 5.7 smoke (retry + progress) ==");
|
|
171
|
+
await scenarioRetrySucceeds();
|
|
172
|
+
await scenarioAuthFailedNoRetry();
|
|
173
|
+
if (!process.exitCode) {
|
|
174
|
+
console.log("\n== Phase 5.7 smoke PASSED ==");
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
main().catch((err) => {
|
|
179
|
+
console.error("smoke crashed:", err);
|
|
180
|
+
process.exitCode = 1;
|
|
181
|
+
});
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Smoke / real-device runner for Phase 4.5.2 — Contacts extraction.
|
|
4
|
+
*
|
|
5
|
+
* Drives the full vertical:
|
|
6
|
+
*
|
|
7
|
+
* ┌──────────────────────────────────────────────────────────────┐
|
|
8
|
+
* │ 1. (optional) android.list_devices │
|
|
9
|
+
* │ 2. (optional) android.pull_file /data/.../contacts2.db │
|
|
10
|
+
* │ 3. system.parse_contacts → Persons │
|
|
11
|
+
* │ 4. hub-side UnifiedSchema validatePerson() on every row │
|
|
12
|
+
* │ 5. write NormalizedBatch JSON to ./out/<timestamp>/ │
|
|
13
|
+
* └──────────────────────────────────────────────────────────────┘
|
|
14
|
+
*
|
|
15
|
+
* Three modes:
|
|
16
|
+
*
|
|
17
|
+
* --db <path> Skip ADB entirely; parse a contacts2.db already
|
|
18
|
+
* on disk. Best for first-run sanity on the dev box.
|
|
19
|
+
*
|
|
20
|
+
* --serial <serial> Run `adb pull` first. Requires `adb root` (most
|
|
21
|
+
* retail builds reject this) OR a userdebug build.
|
|
22
|
+
* On a stock Redmi 24115RA8EC use --workaround.
|
|
23
|
+
*
|
|
24
|
+
* --workaround sdcard Look for a contacts2.db copy at
|
|
25
|
+
* /sdcard/Download/contacts2.db (you copied it
|
|
26
|
+
* out via Termux + tsu, or via Mi cloud export,
|
|
27
|
+
* per docs/design/Adapter_System_Data.md §2.1).
|
|
28
|
+
*
|
|
29
|
+
* Usage examples:
|
|
30
|
+
*
|
|
31
|
+
* # Local fixture
|
|
32
|
+
* node scripts/smoke-system-data-contacts.js --db ./fixtures/contacts2.db
|
|
33
|
+
*
|
|
34
|
+
* # List devices, then prompt for serial
|
|
35
|
+
* node scripts/smoke-system-data-contacts.js --list
|
|
36
|
+
*
|
|
37
|
+
* # Real device with /sdcard workaround
|
|
38
|
+
* node scripts/smoke-system-data-contacts.js \
|
|
39
|
+
* --serial 24115RA8ECabc123 --workaround sdcard
|
|
40
|
+
*
|
|
41
|
+
* Exits non-zero on any sidecar error or schema validation failure.
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
"use strict";
|
|
45
|
+
|
|
46
|
+
const path = require("node:path");
|
|
47
|
+
const fs = require("node:fs");
|
|
48
|
+
const os = require("node:os");
|
|
49
|
+
|
|
50
|
+
const { SidecarSupervisor } = require("../lib/sidecar");
|
|
51
|
+
const { validatePerson } = require("../lib/schemas");
|
|
52
|
+
|
|
53
|
+
const SIDECAR_ROOT = path.resolve(__dirname, "..", "..", "personal-data-hub-bridge");
|
|
54
|
+
const PYTHON = process.env.FORENSICS_BRIDGE_PYTHON || "python";
|
|
55
|
+
|
|
56
|
+
const SDCARD_WORKAROUND_PATH = "/sdcard/Download/contacts2.db";
|
|
57
|
+
const SYSTEM_PROVIDER_PATH =
|
|
58
|
+
"/data/data/com.android.providers.contacts/databases/contacts2.db";
|
|
59
|
+
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// CLI parsing — kept dependency-free
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
function parseArgs(argv) {
|
|
65
|
+
const out = {
|
|
66
|
+
db: null,
|
|
67
|
+
serial: null,
|
|
68
|
+
workaround: null,
|
|
69
|
+
list: false,
|
|
70
|
+
outDir: null,
|
|
71
|
+
help: false,
|
|
72
|
+
};
|
|
73
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
74
|
+
const a = argv[i];
|
|
75
|
+
switch (a) {
|
|
76
|
+
case "--db":
|
|
77
|
+
out.db = argv[++i];
|
|
78
|
+
break;
|
|
79
|
+
case "--serial":
|
|
80
|
+
out.serial = argv[++i];
|
|
81
|
+
break;
|
|
82
|
+
case "--workaround":
|
|
83
|
+
out.workaround = argv[++i];
|
|
84
|
+
break;
|
|
85
|
+
case "--list":
|
|
86
|
+
out.list = true;
|
|
87
|
+
break;
|
|
88
|
+
case "--out":
|
|
89
|
+
out.outDir = argv[++i];
|
|
90
|
+
break;
|
|
91
|
+
case "-h":
|
|
92
|
+
case "--help":
|
|
93
|
+
out.help = true;
|
|
94
|
+
break;
|
|
95
|
+
default:
|
|
96
|
+
if (a.startsWith("--")) {
|
|
97
|
+
throw new Error(`unknown flag: ${a}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return out;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function printHelp() {
|
|
105
|
+
process.stdout.write(`
|
|
106
|
+
smoke-system-data-contacts — drive sidecar end-to-end for contacts.
|
|
107
|
+
|
|
108
|
+
--db <path> Parse a contacts2.db already on disk (skip ADB).
|
|
109
|
+
--serial <serial> Target this ADB device for the pull step.
|
|
110
|
+
--workaround sdcard Pull from ${SDCARD_WORKAROUND_PATH} instead of /data/data.
|
|
111
|
+
Required on stock Android (no adb root).
|
|
112
|
+
--list Just list ADB devices and exit.
|
|
113
|
+
--out <dir> Write NormalizedBatch JSON here. Default: ./out/<ts>.
|
|
114
|
+
-h, --help Show this help.
|
|
115
|
+
|
|
116
|
+
Env:
|
|
117
|
+
FORENSICS_BRIDGE_PYTHON override Python interpreter (default: python).
|
|
118
|
+
|
|
119
|
+
Exit codes:
|
|
120
|
+
0 success
|
|
121
|
+
1 sidecar / hub error
|
|
122
|
+
2 invalid Persons (schema validation failed)
|
|
123
|
+
`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Helpers
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
function timestampSlug() {
|
|
131
|
+
const d = new Date();
|
|
132
|
+
const z = (n) => String(n).padStart(2, "0");
|
|
133
|
+
return (
|
|
134
|
+
`${d.getFullYear()}${z(d.getMonth() + 1)}${z(d.getDate())}-` +
|
|
135
|
+
`${z(d.getHours())}${z(d.getMinutes())}${z(d.getSeconds())}`
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function makeSupervisor() {
|
|
140
|
+
return new SidecarSupervisor({
|
|
141
|
+
command: PYTHON,
|
|
142
|
+
args: ["-u", "-m", "forensics_bridge.ipc_server"],
|
|
143
|
+
cwd: SIDECAR_ROOT,
|
|
144
|
+
healthCheckIntervalMs: 0,
|
|
145
|
+
env: { PYTHONPATH: SIDECAR_ROOT },
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function log(level, msg, extra = {}) {
|
|
150
|
+
const line = JSON.stringify({
|
|
151
|
+
ts: new Date().toISOString(),
|
|
152
|
+
level,
|
|
153
|
+
msg,
|
|
154
|
+
...extra,
|
|
155
|
+
});
|
|
156
|
+
if (level === "error") process.stderr.write(line + "\n");
|
|
157
|
+
else process.stdout.write(line + "\n");
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
// Main
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
async function main(rawArgs) {
|
|
165
|
+
let args;
|
|
166
|
+
try {
|
|
167
|
+
args = parseArgs(rawArgs);
|
|
168
|
+
} catch (err) {
|
|
169
|
+
console.error(err.message);
|
|
170
|
+
printHelp();
|
|
171
|
+
process.exit(2);
|
|
172
|
+
}
|
|
173
|
+
if (args.help) {
|
|
174
|
+
printHelp();
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const outDir = path.resolve(
|
|
179
|
+
args.outDir || path.join(process.cwd(), "out", timestampSlug()),
|
|
180
|
+
);
|
|
181
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
182
|
+
log("info", "output directory ready", { outDir });
|
|
183
|
+
|
|
184
|
+
const sup = makeSupervisor();
|
|
185
|
+
// Stream sidecar pino-style logs out as ndjson so the user sees timing.
|
|
186
|
+
sup.on("log", (line) => process.stderr.write(`[sidecar] ${line}\n`));
|
|
187
|
+
|
|
188
|
+
await sup.start({ readyTimeoutMs: 10_000 });
|
|
189
|
+
log("info", "sidecar ready");
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
// ---------- list-only path ----------
|
|
193
|
+
if (args.list) {
|
|
194
|
+
const devices = await sup.invoke("android.list_devices");
|
|
195
|
+
log("info", "adb devices", devices);
|
|
196
|
+
console.log(JSON.stringify(devices, null, 2));
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ---------- choose source for contacts2.db ----------
|
|
201
|
+
let dbPath = args.db ? path.resolve(args.db) : null;
|
|
202
|
+
|
|
203
|
+
if (!dbPath) {
|
|
204
|
+
if (!args.serial) {
|
|
205
|
+
throw new Error(
|
|
206
|
+
"neither --db nor --serial provided; nothing to extract",
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
const remotePath =
|
|
210
|
+
args.workaround === "sdcard" ? SDCARD_WORKAROUND_PATH : SYSTEM_PROVIDER_PATH;
|
|
211
|
+
log("info", "pulling from device", { serial: args.serial, remotePath });
|
|
212
|
+
const pulled = await sup.invoke(
|
|
213
|
+
"android.pull_file",
|
|
214
|
+
{
|
|
215
|
+
serial: args.serial,
|
|
216
|
+
remote_path: remotePath,
|
|
217
|
+
local_dir: outDir,
|
|
218
|
+
},
|
|
219
|
+
{ timeoutMs: 60_000 },
|
|
220
|
+
);
|
|
221
|
+
log("info", "pull completed", pulled);
|
|
222
|
+
dbPath = pulled.local;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (!fs.existsSync(dbPath)) {
|
|
226
|
+
throw new Error(`contacts db not found at ${dbPath}`);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// ---------- parse + validate ----------
|
|
230
|
+
const persons = [];
|
|
231
|
+
let chunks = 0;
|
|
232
|
+
const t0 = Date.now();
|
|
233
|
+
const parseResult = await sup.invoke(
|
|
234
|
+
"system.parse_contacts",
|
|
235
|
+
{
|
|
236
|
+
data_path: dbPath,
|
|
237
|
+
device_serial: args.serial || null,
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
timeoutMs: 120_000,
|
|
241
|
+
onProgress: (p) => log("info", "progress", p),
|
|
242
|
+
onChunk: (batch) => {
|
|
243
|
+
chunks += 1;
|
|
244
|
+
for (const person of batch.persons || []) persons.push(person);
|
|
245
|
+
},
|
|
246
|
+
},
|
|
247
|
+
);
|
|
248
|
+
const wallMs = Date.now() - t0;
|
|
249
|
+
log("info", "parse completed", {
|
|
250
|
+
...parseResult,
|
|
251
|
+
chunks,
|
|
252
|
+
wallMs,
|
|
253
|
+
personsCollected: persons.length,
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
// ---------- hub-side schema check ----------
|
|
257
|
+
const invalid = [];
|
|
258
|
+
for (const p of persons) {
|
|
259
|
+
const v = validatePerson(p);
|
|
260
|
+
if (!v.valid) invalid.push({ id: p.id, errors: v.errors });
|
|
261
|
+
}
|
|
262
|
+
if (invalid.length) {
|
|
263
|
+
log("error", "validation failed", { count: invalid.length });
|
|
264
|
+
fs.writeFileSync(
|
|
265
|
+
path.join(outDir, "validation-errors.json"),
|
|
266
|
+
JSON.stringify(invalid, null, 2),
|
|
267
|
+
);
|
|
268
|
+
process.exitCode = 2;
|
|
269
|
+
} else {
|
|
270
|
+
log("info", "all persons passed UnifiedSchema validation");
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// ---------- persist for inspection ----------
|
|
274
|
+
const dump = {
|
|
275
|
+
schemaVersion: "0.1.0",
|
|
276
|
+
generatedAt: new Date().toISOString(),
|
|
277
|
+
sidecar: { pythonRoot: SIDECAR_ROOT },
|
|
278
|
+
input: {
|
|
279
|
+
dbPath,
|
|
280
|
+
serial: args.serial || null,
|
|
281
|
+
workaround: args.workaround || null,
|
|
282
|
+
},
|
|
283
|
+
parseResult,
|
|
284
|
+
wallMs,
|
|
285
|
+
persons,
|
|
286
|
+
};
|
|
287
|
+
const dumpPath = path.join(outDir, "contacts-normalized-batch.json");
|
|
288
|
+
fs.writeFileSync(dumpPath, JSON.stringify(dump, null, 2));
|
|
289
|
+
log("info", "wrote dump", { dumpPath, bytes: fs.statSync(dumpPath).size });
|
|
290
|
+
|
|
291
|
+
// ---------- compact summary ----------
|
|
292
|
+
log("info", "summary", {
|
|
293
|
+
totalPersons: parseResult.totalPersons,
|
|
294
|
+
withPhone: parseResult.stats?.with_phone,
|
|
295
|
+
withEmail: parseResult.stats?.with_email,
|
|
296
|
+
starred: parseResult.stats?.starred,
|
|
297
|
+
invalidPersons: invalid.length,
|
|
298
|
+
outDir,
|
|
299
|
+
});
|
|
300
|
+
} finally {
|
|
301
|
+
await sup.stop({ graceMs: 2000 });
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
main(process.argv.slice(2)).catch((err) => {
|
|
306
|
+
log("error", "fatal", { name: err.name, message: err.message, code: err.code });
|
|
307
|
+
if (err.stack) process.stderr.write(err.stack + "\n");
|
|
308
|
+
process.exit(1);
|
|
309
|
+
});
|