@chainlesschain/personal-data-hub 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-history.test.js +395 -0
- package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
- package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
- package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
- package/__tests__/adapters/email-adapter.test.js +138 -1
- package/__tests__/adapters/email-classifier.test.js +347 -0
- package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
- package/__tests__/adapters/email-retry-progress.test.js +294 -0
- package/__tests__/adapters/email-templates.test.js +699 -0
- package/__tests__/adapters/system-data-adapter.test.js +440 -0
- package/__tests__/adapters/system-data-disclosure.test.js +153 -0
- package/__tests__/analysis-skills.test.js +409 -0
- package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
- package/__tests__/entity-resolver-stages.test.js +411 -0
- package/__tests__/entity-resolver-vault.test.js +246 -0
- package/__tests__/entity-resolver.test.js +526 -0
- package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
- package/__tests__/longtail-adapters.test.js +217 -0
- package/__tests__/mobile-extractor.test.js +288 -0
- package/__tests__/shopping-adapters.test.js +296 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
- package/__tests__/sidecar-supervisor.test.js +120 -0
- package/__tests__/social-adapters.test.js +206 -0
- package/__tests__/travel-adapters.test.js +325 -0
- package/__tests__/vault.test.js +3 -3
- package/__tests__/wechat-adapter.test.js +476 -0
- package/__tests__/whatsapp-adapter.test.js +135 -0
- package/lib/adapter-spec.js +12 -0
- package/lib/adapters/_python-sidecar-base.js +207 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
- package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
- package/lib/adapters/ai-chat-history/http-client.js +211 -0
- package/lib/adapters/ai-chat-history/index.js +28 -0
- package/lib/adapters/ai-chat-history/schema-map.js +221 -0
- package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
- package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
- package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
- package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
- package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
- package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
- package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
- package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
- package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
- package/lib/adapters/alipay-bill/counterparty.js +129 -0
- package/lib/adapters/alipay-bill/csv-parser.js +217 -0
- package/lib/adapters/alipay-bill/index.js +41 -0
- package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
- package/lib/adapters/email-imap/classifier.js +495 -0
- package/lib/adapters/email-imap/email-adapter.js +419 -8
- package/lib/adapters/email-imap/index.js +42 -0
- package/lib/adapters/email-imap/pdf-extractor.js +192 -0
- package/lib/adapters/email-imap/templates/bill.js +232 -0
- package/lib/adapters/email-imap/templates/government.js +120 -0
- package/lib/adapters/email-imap/templates/index.js +78 -0
- package/lib/adapters/email-imap/templates/order.js +186 -0
- package/lib/adapters/email-imap/templates/other.js +114 -0
- package/lib/adapters/email-imap/templates/register.js +113 -0
- package/lib/adapters/email-imap/templates/travel.js +157 -0
- package/lib/adapters/email-imap/templates/utils.js +275 -0
- package/lib/adapters/email-imap/transactions.js +234 -0
- package/lib/adapters/messaging-qq/index.js +158 -0
- package/lib/adapters/messaging-telegram/index.js +142 -0
- package/lib/adapters/messaging-whatsapp/index.js +189 -0
- package/lib/adapters/shopping-base/index.js +208 -0
- package/lib/adapters/shopping-jd/index.js +150 -0
- package/lib/adapters/shopping-meituan/index.js +154 -0
- package/lib/adapters/shopping-taobao/index.js +176 -0
- package/lib/adapters/social-bilibili/index.js +171 -0
- package/lib/adapters/social-douyin/index.js +116 -0
- package/lib/adapters/social-weibo/index.js +164 -0
- package/lib/adapters/social-xiaohongshu/index.js +96 -0
- package/lib/adapters/system-data/disclosure.js +166 -0
- package/lib/adapters/system-data/index.js +34 -0
- package/lib/adapters/system-data/system-data-adapter.js +344 -0
- package/lib/adapters/travel-12306/index.js +151 -0
- package/lib/adapters/travel-amap/index.js +164 -0
- package/lib/adapters/travel-baidu-map/index.js +162 -0
- package/lib/adapters/travel-base/index.js +240 -0
- package/lib/adapters/travel-ctrip/index.js +151 -0
- package/lib/adapters/wechat/content-parser.js +326 -0
- package/lib/adapters/wechat/db-reader.js +209 -0
- package/lib/adapters/wechat/index.js +28 -0
- package/lib/adapters/wechat/key-extractor.js +158 -0
- package/lib/adapters/wechat/normalize.js +220 -0
- package/lib/adapters/wechat/wechat-adapter.js +205 -0
- package/lib/analysis-skills/base.js +113 -0
- package/lib/analysis-skills/footprint.js +167 -0
- package/lib/analysis-skills/index.js +58 -0
- package/lib/analysis-skills/interests.js +161 -0
- package/lib/analysis-skills/relations.js +226 -0
- package/lib/analysis-skills/spending.js +216 -0
- package/lib/analysis-skills/timeline.js +167 -0
- package/lib/entity-resolver/embedding-stage.js +198 -0
- package/lib/entity-resolver/entity-resolver.js +384 -0
- package/lib/entity-resolver/index.js +42 -0
- package/lib/entity-resolver/llm-stage.js +191 -0
- package/lib/entity-resolver/rule-stage.js +208 -0
- package/lib/entity-resolver/worker.js +149 -0
- package/lib/index.js +115 -0
- package/lib/migrations.js +73 -0
- package/lib/mobile-extractor/android.js +193 -0
- package/lib/mobile-extractor/index.js +9 -0
- package/lib/mobile-extractor/ios.js +223 -0
- package/lib/registry.js +42 -0
- package/lib/sidecar/index.js +15 -0
- package/lib/sidecar/supervisor.js +359 -0
- package/lib/vault.js +266 -0
- package/package.json +29 -3
- package/scripts/_make-fixture-all.js +126 -0
- package/scripts/_make-fixture-contacts.js +84 -0
- package/scripts/evaluate-entity-resolver.js +213 -0
- package/scripts/smoke-phase-5-5.js +196 -0
- package/scripts/smoke-phase-5-7.js +181 -0
- package/scripts/smoke-system-data-contacts.js +309 -0
- package/scripts/smoke-system-data.js +312 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SystemDataAdapter — Android system data (contacts / call log / SMS / WiFi).
|
|
3
|
+
*
|
|
4
|
+
* Phase 4.5.5. Sits on top of the forensics-bridge sidecar.
|
|
5
|
+
*
|
|
6
|
+
* Per-source pipeline (each one independent — disabling SMS doesn't break the
|
|
7
|
+
* others):
|
|
8
|
+
*
|
|
9
|
+
* contacts: android.pull_file → system.parse_contacts → Person stream
|
|
10
|
+
* calllog: android.pull_file → system.parse_calllog → Event(call) + Person stream
|
|
11
|
+
* sms: android.pull_file → system.parse_sms → Event(message) + Person stream
|
|
12
|
+
* wifi: android.pull_file → system.parse_wifi → Place stream
|
|
13
|
+
*
|
|
14
|
+
* Or, when `opts.dataPaths` is provided (e.g. user already adb-pulled files
|
|
15
|
+
* manually, or testing with a local fixture), skip the pull step.
|
|
16
|
+
*
|
|
17
|
+
* Privacy gating: `opts.include` decides which sub-sources run. Default per
|
|
18
|
+
* Adapter_System_Data.md §5.1 + OQ-SD1: contacts ON / calllog ON / sms OFF /
|
|
19
|
+
* wifi ON. The UI dialog re-confirms this on each sync.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
"use strict";
|
|
23
|
+
|
|
24
|
+
const path = require("node:path");
|
|
25
|
+
const os = require("node:os");
|
|
26
|
+
const fs = require("node:fs");
|
|
27
|
+
|
|
28
|
+
const { PythonSidecarAdapter } = require("../_python-sidecar-base");
|
|
29
|
+
|
|
30
|
+
const NAME = "system-data";
|
|
31
|
+
const VERSION = "0.1.0";
|
|
32
|
+
|
|
33
|
+
const DEFAULT_INCLUDE = Object.freeze({
|
|
34
|
+
contacts: true,
|
|
35
|
+
calllog: true,
|
|
36
|
+
sms: false, // opt-out by default — see Adapter_System_Data.md §5.1
|
|
37
|
+
wifi: true,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Default Android system provider paths. Override via opts.remotePaths when
|
|
42
|
+
* a device uses a non-stock layout.
|
|
43
|
+
*/
|
|
44
|
+
const DEFAULT_REMOTE_PATHS = Object.freeze({
|
|
45
|
+
contacts:
|
|
46
|
+
"/data/data/com.android.providers.contacts/databases/contacts2.db",
|
|
47
|
+
calllog: "/data/data/com.android.providers.contacts/databases/calllog.db",
|
|
48
|
+
sms: "/data/data/com.android.providers.telephony/databases/mmssms.db",
|
|
49
|
+
wifi: "/data/misc/wifi/", // directory — pull_file works for one file, so wifi mode-A is dataPaths
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Per-source workaround paths under /sdcard/Download/ for stock Android
|
|
54
|
+
* (no `adb root`) — user copies files via Termux + tsu or MT Manager.
|
|
55
|
+
*/
|
|
56
|
+
const SDCARD_WORKAROUND_PATHS = Object.freeze({
|
|
57
|
+
contacts: "/sdcard/Download/contacts2.db",
|
|
58
|
+
calllog: "/sdcard/Download/calllog.db",
|
|
59
|
+
sms: "/sdcard/Download/mmssms.db",
|
|
60
|
+
wifi_xml: "/sdcard/Download/WifiConfigStore.xml",
|
|
61
|
+
wifi_conf: "/sdcard/Download/wpa_supplicant.conf",
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
class SystemDataAdapter extends PythonSidecarAdapter {
|
|
65
|
+
constructor(opts) {
|
|
66
|
+
super(opts);
|
|
67
|
+
this.name = NAME;
|
|
68
|
+
this.version = VERSION;
|
|
69
|
+
this.capabilities = [
|
|
70
|
+
"sync:android-adb",
|
|
71
|
+
"sync:android-sdcard-workaround",
|
|
72
|
+
"sync:host-dataPaths",
|
|
73
|
+
];
|
|
74
|
+
this.rateLimits = { perDay: 12 }; // system data day-to-day churn is small
|
|
75
|
+
this.dataDisclosure = {
|
|
76
|
+
fields: [
|
|
77
|
+
"contacts:name,phone,email,organization,notes,starred,photoUri",
|
|
78
|
+
"calllog:number,duration,timestamp,type,name",
|
|
79
|
+
"sms:address,body,timestamp,type,threadId,isRead",
|
|
80
|
+
"wifi:ssid,securityType,hidden",
|
|
81
|
+
// Explicitly NOT collected:
|
|
82
|
+
// - wifi:password (never written to vault, even when present in source)
|
|
83
|
+
],
|
|
84
|
+
sensitivity: "high", // SMS may include third-party content
|
|
85
|
+
legalGate: true, // requires explicit user agreement on third-party content
|
|
86
|
+
retentionDays: undefined, // user-controlled (no default cap)
|
|
87
|
+
notice:
|
|
88
|
+
"短信和通话记录可能包含他人电话号码或对话内容;所有数据在本机加密存储,不向任何服务器上传(含 AI 分析)。",
|
|
89
|
+
defaultInclude: { ...DEFAULT_INCLUDE },
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// -----------------------------------------------------------------------
|
|
94
|
+
// PersonalDataAdapter — authenticate / healthCheck override
|
|
95
|
+
// -----------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Verify the sidecar is reachable AND there is at least one usable ADB
|
|
99
|
+
* device (unless caller signals offline-import mode by passing dataPaths).
|
|
100
|
+
*
|
|
101
|
+
* @param {object} ctx
|
|
102
|
+
* @param {object} [ctx.dataPaths] If set, ADB presence is not required.
|
|
103
|
+
* @param {string} [ctx.serial] Optional serial; auth checks just that device.
|
|
104
|
+
*/
|
|
105
|
+
async authenticate(ctx = {}) {
|
|
106
|
+
const pong = await this.supervisor.invoke("sidecar.ping", {}, { timeoutMs: 3000 });
|
|
107
|
+
if (ctx.dataPaths && Object.keys(ctx.dataPaths).length > 0) {
|
|
108
|
+
return { ok: true, mode: "offline", sidecarVersion: pong.version };
|
|
109
|
+
}
|
|
110
|
+
let devices;
|
|
111
|
+
try {
|
|
112
|
+
const out = await this.supervisor.invoke("android.list_devices", {}, { timeoutMs: 5000 });
|
|
113
|
+
devices = out.devices || [];
|
|
114
|
+
} catch (err) {
|
|
115
|
+
return {
|
|
116
|
+
ok: false,
|
|
117
|
+
reason: `android.list_devices failed: ${err.code || err.message}`,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
const wanted = ctx.serial
|
|
121
|
+
? devices.filter((d) => d.serial === ctx.serial)
|
|
122
|
+
: devices.filter((d) => d.state === "device");
|
|
123
|
+
if (wanted.length === 0) {
|
|
124
|
+
return {
|
|
125
|
+
ok: false,
|
|
126
|
+
reason: ctx.serial
|
|
127
|
+
? `device "${ctx.serial}" not found or not authorized`
|
|
128
|
+
: "no authorized ADB devices attached",
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
return { ok: true, mode: "device", devices: wanted };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// -----------------------------------------------------------------------
|
|
135
|
+
// Orchestration (subclass hook)
|
|
136
|
+
// -----------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Orchestrate the 4 sub-sources sequentially.
|
|
140
|
+
*
|
|
141
|
+
* @param {object} opts
|
|
142
|
+
* @param {object} [opts.include] Per-source enable flags (defaults: DEFAULT_INCLUDE).
|
|
143
|
+
* @param {string} [opts.serial] Required when pulling from a live device.
|
|
144
|
+
* @param {object} [opts.dataPaths] Pre-extracted host paths, keys:
|
|
145
|
+
* {contacts, calllog, sms, wifi}.
|
|
146
|
+
* @param {object} [opts.remotePaths] Override default device paths.
|
|
147
|
+
* @param {"normal"|"sdcard"} [opts.extractMode]
|
|
148
|
+
* "normal" = pull from /data/data (root only),
|
|
149
|
+
* "sdcard" = pull from /sdcard/Download (workaround).
|
|
150
|
+
* @param {string} [opts.scratchDir] Directory for pulled DBs. Default: hub tmp.
|
|
151
|
+
* @param {(msg: object) => void} [opts.onProgress] Forwarded as adapter-progress.
|
|
152
|
+
*/
|
|
153
|
+
async _runSidecar(opts, emit) {
|
|
154
|
+
const include = { ...DEFAULT_INCLUDE, ...(opts.include || {}) };
|
|
155
|
+
const dataPaths = opts.dataPaths || {};
|
|
156
|
+
const extractMode = opts.extractMode || "normal";
|
|
157
|
+
const remotePaths =
|
|
158
|
+
extractMode === "sdcard"
|
|
159
|
+
? {
|
|
160
|
+
contacts: SDCARD_WORKAROUND_PATHS.contacts,
|
|
161
|
+
calllog: SDCARD_WORKAROUND_PATHS.calllog,
|
|
162
|
+
sms: SDCARD_WORKAROUND_PATHS.sms,
|
|
163
|
+
wifi: SDCARD_WORKAROUND_PATHS.wifi_xml,
|
|
164
|
+
}
|
|
165
|
+
: { ...DEFAULT_REMOTE_PATHS, ...(opts.remotePaths || {}) };
|
|
166
|
+
|
|
167
|
+
const scratchDir =
|
|
168
|
+
opts.scratchDir ||
|
|
169
|
+
fs.mkdtempSync(path.join(os.tmpdir(), "system-data-sync-"));
|
|
170
|
+
fs.mkdirSync(scratchDir, { recursive: true });
|
|
171
|
+
|
|
172
|
+
const onProgress = typeof opts.onProgress === "function" ? opts.onProgress : null;
|
|
173
|
+
const tellProgress = (source, phase, extra = {}) => {
|
|
174
|
+
if (onProgress) onProgress({ source, phase, ...extra });
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
const sourcesRun = [];
|
|
178
|
+
|
|
179
|
+
// ─── Contacts ────────────────────────────────────────────────────────
|
|
180
|
+
let contactsLocal = dataPaths.contacts || null;
|
|
181
|
+
if (include.contacts) {
|
|
182
|
+
if (!contactsLocal) {
|
|
183
|
+
if (!opts.serial) {
|
|
184
|
+
throw new Error(
|
|
185
|
+
"system-data: contacts enabled but no serial/dataPaths.contacts provided",
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
tellProgress("contacts", "pulling");
|
|
189
|
+
const pulled = await this.supervisor.invoke(
|
|
190
|
+
"android.pull_file",
|
|
191
|
+
{
|
|
192
|
+
serial: opts.serial,
|
|
193
|
+
remote_path: remotePaths.contacts,
|
|
194
|
+
local_dir: scratchDir,
|
|
195
|
+
},
|
|
196
|
+
{ timeoutMs: 60_000 },
|
|
197
|
+
);
|
|
198
|
+
contactsLocal = pulled.local;
|
|
199
|
+
}
|
|
200
|
+
tellProgress("contacts", "parsing", { dbPath: contactsLocal });
|
|
201
|
+
const r = await this.supervisor.invoke(
|
|
202
|
+
"system.parse_contacts",
|
|
203
|
+
{ data_path: contactsLocal, device_serial: opts.serial || null },
|
|
204
|
+
{
|
|
205
|
+
timeoutMs: 120_000,
|
|
206
|
+
onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
|
|
207
|
+
onProgress: (p) => tellProgress("contacts", "progress", p),
|
|
208
|
+
},
|
|
209
|
+
);
|
|
210
|
+
sourcesRun.push({ source: "contacts", ...r });
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ─── Call log ────────────────────────────────────────────────────────
|
|
214
|
+
if (include.calllog) {
|
|
215
|
+
let calllogLocal = dataPaths.calllog || null;
|
|
216
|
+
if (!calllogLocal) {
|
|
217
|
+
if (!opts.serial) {
|
|
218
|
+
throw new Error("system-data: calllog enabled but no serial/dataPaths.calllog");
|
|
219
|
+
}
|
|
220
|
+
tellProgress("calllog", "pulling");
|
|
221
|
+
try {
|
|
222
|
+
const pulled = await this.supervisor.invoke(
|
|
223
|
+
"android.pull_file",
|
|
224
|
+
{
|
|
225
|
+
serial: opts.serial,
|
|
226
|
+
remote_path: remotePaths.calllog,
|
|
227
|
+
local_dir: scratchDir,
|
|
228
|
+
},
|
|
229
|
+
{ timeoutMs: 60_000 },
|
|
230
|
+
);
|
|
231
|
+
calllogLocal = pulled.local;
|
|
232
|
+
} catch (err) {
|
|
233
|
+
// Calls table may live in contacts2.db on pre-Android-11 builds.
|
|
234
|
+
if (err.code === "EXTRACT_PERMISSION_DENIED" && contactsLocal) {
|
|
235
|
+
calllogLocal = contactsLocal;
|
|
236
|
+
} else {
|
|
237
|
+
throw err;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
tellProgress("calllog", "parsing", { dbPath: calllogLocal });
|
|
242
|
+
const r = await this.supervisor.invoke(
|
|
243
|
+
"system.parse_calllog",
|
|
244
|
+
{
|
|
245
|
+
data_path: calllogLocal,
|
|
246
|
+
contacts_db_path: contactsLocal,
|
|
247
|
+
device_serial: opts.serial || null,
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
timeoutMs: 180_000,
|
|
251
|
+
onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
|
|
252
|
+
onProgress: (p) => tellProgress("calllog", "progress", p),
|
|
253
|
+
},
|
|
254
|
+
);
|
|
255
|
+
sourcesRun.push({ source: "calllog", ...r });
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// ─── SMS ────────────────────────────────────────────────────────────
|
|
259
|
+
if (include.sms) {
|
|
260
|
+
let smsLocal = dataPaths.sms || null;
|
|
261
|
+
if (!smsLocal) {
|
|
262
|
+
if (!opts.serial) {
|
|
263
|
+
throw new Error("system-data: sms enabled but no serial/dataPaths.sms");
|
|
264
|
+
}
|
|
265
|
+
tellProgress("sms", "pulling");
|
|
266
|
+
const pulled = await this.supervisor.invoke(
|
|
267
|
+
"android.pull_file",
|
|
268
|
+
{
|
|
269
|
+
serial: opts.serial,
|
|
270
|
+
remote_path: remotePaths.sms,
|
|
271
|
+
local_dir: scratchDir,
|
|
272
|
+
},
|
|
273
|
+
{ timeoutMs: 60_000 },
|
|
274
|
+
);
|
|
275
|
+
smsLocal = pulled.local;
|
|
276
|
+
}
|
|
277
|
+
tellProgress("sms", "parsing", { dbPath: smsLocal });
|
|
278
|
+
const r = await this.supervisor.invoke(
|
|
279
|
+
"system.parse_sms",
|
|
280
|
+
{
|
|
281
|
+
data_path: smsLocal,
|
|
282
|
+
contacts_db_path: contactsLocal,
|
|
283
|
+
device_serial: opts.serial || null,
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
timeoutMs: 300_000, // SMS can be 10K+ rows on long-term devices
|
|
287
|
+
onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
|
|
288
|
+
onProgress: (p) => tellProgress("sms", "progress", p),
|
|
289
|
+
},
|
|
290
|
+
);
|
|
291
|
+
sourcesRun.push({ source: "sms", ...r });
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// ─── WiFi ───────────────────────────────────────────────────────────
|
|
295
|
+
if (include.wifi) {
|
|
296
|
+
let wifiLocal = dataPaths.wifi || null;
|
|
297
|
+
if (!wifiLocal) {
|
|
298
|
+
// WiFi config is a single file, but two possible names. Prefer XML.
|
|
299
|
+
if (!opts.serial) {
|
|
300
|
+
throw new Error("system-data: wifi enabled but no serial/dataPaths.wifi");
|
|
301
|
+
}
|
|
302
|
+
tellProgress("wifi", "pulling");
|
|
303
|
+
try {
|
|
304
|
+
const pulled = await this.supervisor.invoke(
|
|
305
|
+
"android.pull_file",
|
|
306
|
+
{
|
|
307
|
+
serial: opts.serial,
|
|
308
|
+
remote_path: remotePaths.wifi,
|
|
309
|
+
local_dir: scratchDir,
|
|
310
|
+
},
|
|
311
|
+
{ timeoutMs: 30_000 },
|
|
312
|
+
);
|
|
313
|
+
wifiLocal = path.dirname(pulled.local);
|
|
314
|
+
} catch (err) {
|
|
315
|
+
// Non-fatal — wifi often inaccessible without root. Skip this source.
|
|
316
|
+
tellProgress("wifi", "skipped", { reason: err.code || err.message });
|
|
317
|
+
return { sources: sourcesRun, scratchDir };
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
tellProgress("wifi", "parsing", { dbPath: wifiLocal });
|
|
321
|
+
const r = await this.supervisor.invoke(
|
|
322
|
+
"system.parse_wifi",
|
|
323
|
+
{ data_path: wifiLocal, device_serial: opts.serial || null },
|
|
324
|
+
{
|
|
325
|
+
timeoutMs: 30_000,
|
|
326
|
+
onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
|
|
327
|
+
onProgress: (p) => tellProgress("wifi", "progress", p),
|
|
328
|
+
},
|
|
329
|
+
);
|
|
330
|
+
sourcesRun.push({ source: "wifi", ...r });
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
return { sources: sourcesRun, scratchDir };
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
module.exports = {
|
|
338
|
+
SystemDataAdapter,
|
|
339
|
+
SYSTEM_DATA_ADAPTER_NAME: NAME,
|
|
340
|
+
SYSTEM_DATA_ADAPTER_VERSION: VERSION,
|
|
341
|
+
DEFAULT_INCLUDE,
|
|
342
|
+
DEFAULT_REMOTE_PATHS,
|
|
343
|
+
SDCARD_WORKAROUND_PATHS,
|
|
344
|
+
};
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 9.2 — 12306 (China Railway) ticket adapter.
|
|
3
|
+
*
|
|
4
|
+
* Source format: 12306 doesn't have an official user export. We accept
|
|
5
|
+
* two file formats:
|
|
6
|
+
* 1. order-confirmation emails (already adapter-parsed by Phase 5 +
|
|
7
|
+
* Phase 5.4 travel template). Phase 9.2 reads those events back
|
|
8
|
+
* out of the vault and **re-normalizes** them into the
|
|
9
|
+
* adapter-neutral travel schema. This is the "rich vault →
|
|
10
|
+
* enrich" pattern.
|
|
11
|
+
* 2. user-uploaded JSON dump (e.g. exported from a 3rd-party 12306
|
|
12
|
+
* scraper, or hand-curated). Optional.
|
|
13
|
+
*
|
|
14
|
+
* For v0.5 we focus on (2) since (1) is purely vault-side derivation
|
|
15
|
+
* the AnalysisEngine can do at query time.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
"use strict";
|
|
19
|
+
|
|
20
|
+
const fs = require("node:fs");
|
|
21
|
+
const { normalizeTravelRecord, parseChineseDateTime } = require("../travel-base");
|
|
22
|
+
|
|
23
|
+
const NAME = "travel-12306";
|
|
24
|
+
const VERSION = "0.5.0";
|
|
25
|
+
|
|
26
|
+
class Train12306Adapter {
|
|
27
|
+
constructor(opts = {}) {
|
|
28
|
+
if (!opts.account || !opts.account.username) {
|
|
29
|
+
throw new Error("Train12306Adapter: opts.account.username required (12306 user id)");
|
|
30
|
+
}
|
|
31
|
+
this.account = opts.account;
|
|
32
|
+
this._dataPath = opts.dataPath || null;
|
|
33
|
+
|
|
34
|
+
this.name = NAME;
|
|
35
|
+
this.version = VERSION;
|
|
36
|
+
this.capabilities = ["import:json", "parse:12306-orders"];
|
|
37
|
+
this.extractMode = "file-import";
|
|
38
|
+
this.rateLimits = {};
|
|
39
|
+
this.dataDisclosure = {
|
|
40
|
+
fields: [
|
|
41
|
+
"12306:orderId / passengerName / trainNumber / fromStation / toStation / departureTime / arrivalTime / seat / price",
|
|
42
|
+
],
|
|
43
|
+
sensitivity: "medium",
|
|
44
|
+
legalGate: false,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async authenticate() {
|
|
49
|
+
return { ok: true, account: this.account.username };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async healthCheck() {
|
|
53
|
+
return { ok: true, lastChecked: Date.now() };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async *sync(opts = {}) {
|
|
57
|
+
const dataPath = opts.dataPath || this._dataPath;
|
|
58
|
+
if (!dataPath || !fs.existsSync(dataPath)) return;
|
|
59
|
+
const buf = fs.readFileSync(dataPath, "utf-8");
|
|
60
|
+
let records;
|
|
61
|
+
try {
|
|
62
|
+
records = parseRecords(buf);
|
|
63
|
+
} catch (err) {
|
|
64
|
+
throw new Error(`Train12306Adapter: parse failed: ${err.message}`);
|
|
65
|
+
}
|
|
66
|
+
for (const r of records) {
|
|
67
|
+
yield {
|
|
68
|
+
adapter: NAME,
|
|
69
|
+
originalId: String(r.recordId || r.orderId || r.ticketNumber),
|
|
70
|
+
capturedAt: r.bookedAt || r.departureMs || Date.now(),
|
|
71
|
+
payload: { record: r },
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
normalize(raw) {
|
|
77
|
+
if (!raw || !raw.payload || !raw.payload.record) {
|
|
78
|
+
throw new Error("Train12306Adapter.normalize: raw.payload.record missing");
|
|
79
|
+
}
|
|
80
|
+
return normalizeTravelRecord(raw.payload.record, {
|
|
81
|
+
adapterName: NAME,
|
|
82
|
+
adapterVersion: VERSION,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Parse a 12306 dump file. Accepts either:
|
|
89
|
+
* - JSON array of order objects
|
|
90
|
+
* - JSON object { orders: [...] }
|
|
91
|
+
* - JSONL (one order per line)
|
|
92
|
+
*/
|
|
93
|
+
function parseRecords(text) {
|
|
94
|
+
let raw;
|
|
95
|
+
try {
|
|
96
|
+
raw = JSON.parse(text);
|
|
97
|
+
} catch (_e) {
|
|
98
|
+
// Try JSONL
|
|
99
|
+
raw = text
|
|
100
|
+
.split(/\r?\n/)
|
|
101
|
+
.filter((l) => l.trim().startsWith("{"))
|
|
102
|
+
.map((l) => JSON.parse(l));
|
|
103
|
+
}
|
|
104
|
+
const orders = Array.isArray(raw) ? raw : raw.orders || [];
|
|
105
|
+
return orders.map(orderToRecord).filter(Boolean);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function orderToRecord(o) {
|
|
109
|
+
if (!o || typeof o !== "object") return null;
|
|
110
|
+
const recordId = o.orderId || o.ticketNumber || o.id || o.order_no;
|
|
111
|
+
if (!recordId) return null;
|
|
112
|
+
return {
|
|
113
|
+
vendorId: "12306",
|
|
114
|
+
recordId: String(recordId),
|
|
115
|
+
vehicleType: "train",
|
|
116
|
+
from: {
|
|
117
|
+
station: o.fromStation || o.from_station || o.from,
|
|
118
|
+
city: o.fromCity || o.from_city,
|
|
119
|
+
},
|
|
120
|
+
to: {
|
|
121
|
+
station: o.toStation || o.to_station || o.to,
|
|
122
|
+
city: o.toCity || o.to_city,
|
|
123
|
+
},
|
|
124
|
+
departureMs: numberOrParse(o.departureTime || o.departure_time || o.start_time),
|
|
125
|
+
arrivalMs: numberOrParse(o.arrivalTime || o.arrival_time || o.end_time),
|
|
126
|
+
carrier: "12306",
|
|
127
|
+
vehicleNumber: o.trainNumber || o.train_no || o.trainNo,
|
|
128
|
+
totalCost: o.price != null
|
|
129
|
+
? { value: parseFloat(o.price), currency: "CNY" }
|
|
130
|
+
: null,
|
|
131
|
+
traveler: o.passengerName || o.passenger || o.name,
|
|
132
|
+
confirmationCode: o.ticketNumber || o.ticket_no || recordId,
|
|
133
|
+
bookedAt: numberOrParse(o.bookedAt || o.order_time),
|
|
134
|
+
extras: {
|
|
135
|
+
seat: o.seat || o.seatType,
|
|
136
|
+
seatNumber: o.seatNumber || o.seat_number,
|
|
137
|
+
idCardLast6: o.idLast6 || undefined, // for cross-source EntityResolver linking
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function numberOrParse(v) {
|
|
143
|
+
if (Number.isFinite(v)) return v;
|
|
144
|
+
if (typeof v === "string") {
|
|
145
|
+
if (/^\d+$/.test(v)) return parseInt(v, 10);
|
|
146
|
+
return parseChineseDateTime(v);
|
|
147
|
+
}
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
module.exports = { Train12306Adapter, parseRecords, NAME, VERSION };
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 9.4 — Amap (高德地图) location history adapter.
|
|
3
|
+
*
|
|
4
|
+
* Source: Amap stores recent navigation / search history in app-local
|
|
5
|
+
* SQLite DBs. Per sjqz/parsers/amap.py, the relevant tables are:
|
|
6
|
+
* - history_search (search queries)
|
|
7
|
+
* - history_route (planned routes)
|
|
8
|
+
* - favourites (saved locations like 公司 / 家)
|
|
9
|
+
*
|
|
10
|
+
* Adapter extractMode is "device-pull" — relies on Phase 7.5
|
|
11
|
+
* AndroidExtractor to pull the .db files from Amap's app-private
|
|
12
|
+
* directory. For v0.5 we accept a pre-pulled local path (file-import
|
|
13
|
+
* fallback) so users without root can hand-extract via adb backup.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
"use strict";
|
|
17
|
+
|
|
18
|
+
const fs = require("node:fs");
|
|
19
|
+
const { normalizeTravelRecord, parseChineseDateTime } = require("../travel-base");
|
|
20
|
+
|
|
21
|
+
const NAME = "travel-amap";
|
|
22
|
+
const VERSION = "0.5.0";
|
|
23
|
+
|
|
24
|
+
class AmapAdapter {
|
|
25
|
+
constructor(opts = {}) {
|
|
26
|
+
if (!opts.account || !opts.account.deviceId) {
|
|
27
|
+
throw new Error("AmapAdapter: opts.account.deviceId required");
|
|
28
|
+
}
|
|
29
|
+
this.account = opts.account;
|
|
30
|
+
this._dbPath = opts.dbPath || null;
|
|
31
|
+
this._dbDriverFactory = opts.dbDriverFactory || null;
|
|
32
|
+
|
|
33
|
+
this.name = NAME;
|
|
34
|
+
this.version = VERSION;
|
|
35
|
+
this.capabilities = ["sync:sqlite", "parse:amap-history"];
|
|
36
|
+
this.extractMode = "device-pull";
|
|
37
|
+
this.rateLimits = {};
|
|
38
|
+
this.dataDisclosure = {
|
|
39
|
+
fields: [
|
|
40
|
+
"amap:search_history (query / time / location)",
|
|
41
|
+
"amap:route_history (from / to / mode / time)",
|
|
42
|
+
"amap:favourites (name / address / coords)",
|
|
43
|
+
],
|
|
44
|
+
sensitivity: "medium",
|
|
45
|
+
legalGate: false,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async authenticate() {
|
|
50
|
+
return { ok: true, account: this.account.deviceId };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async healthCheck() {
|
|
54
|
+
return { ok: true, lastChecked: Date.now() };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async *sync(opts = {}) {
|
|
58
|
+
const dbPath = opts.dbPath || this._dbPath;
|
|
59
|
+
if (!dbPath || !fs.existsSync(dbPath)) return;
|
|
60
|
+
const Database = this._dbDriverFactory || (() => require("better-sqlite3-multiple-ciphers"));
|
|
61
|
+
const Driver = typeof Database === "function" ? Database() : Database;
|
|
62
|
+
const db = new Driver(dbPath, { readonly: true });
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
// History routes (most analytically valuable)
|
|
66
|
+
const routes = trySelect(db, "SELECT * FROM history_route LIMIT 5000")
|
|
67
|
+
|| trySelect(db, "SELECT * FROM ROUTE_HISTORY LIMIT 5000")
|
|
68
|
+
|| [];
|
|
69
|
+
for (const r of routes) {
|
|
70
|
+
const rec = routeRowToRecord(r);
|
|
71
|
+
if (rec) {
|
|
72
|
+
yield {
|
|
73
|
+
adapter: NAME,
|
|
74
|
+
originalId: rec.recordId,
|
|
75
|
+
capturedAt: rec.bookedAt || Date.now(),
|
|
76
|
+
payload: { record: rec, kind: "route" },
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// History search (queries — produce trip events of type "visit")
|
|
81
|
+
const searches = trySelect(db, "SELECT * FROM history_search LIMIT 5000") || [];
|
|
82
|
+
for (const r of searches) {
|
|
83
|
+
const rec = searchRowToRecord(r);
|
|
84
|
+
if (rec) {
|
|
85
|
+
yield {
|
|
86
|
+
adapter: NAME,
|
|
87
|
+
originalId: rec.recordId,
|
|
88
|
+
capturedAt: rec.bookedAt || Date.now(),
|
|
89
|
+
payload: { record: rec, kind: "search" },
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
} finally {
|
|
94
|
+
try { db.close(); } catch (_e) {}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
normalize(raw) {
|
|
99
|
+
if (!raw || !raw.payload || !raw.payload.record) {
|
|
100
|
+
throw new Error("AmapAdapter.normalize: raw.payload.record missing");
|
|
101
|
+
}
|
|
102
|
+
return normalizeTravelRecord(raw.payload.record, {
|
|
103
|
+
adapterName: NAME,
|
|
104
|
+
adapterVersion: VERSION,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function trySelect(db, sql) {
|
|
110
|
+
try {
|
|
111
|
+
return db.prepare(sql).all();
|
|
112
|
+
} catch (_e) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function routeRowToRecord(row) {
|
|
118
|
+
if (!row) return null;
|
|
119
|
+
const id = row.id || row._id || row.uid || row.guid;
|
|
120
|
+
if (!id) return null;
|
|
121
|
+
return {
|
|
122
|
+
vendorId: "amap",
|
|
123
|
+
recordId: `route-${id}`,
|
|
124
|
+
vehicleType: row.mode === "drive" ? "car" : (row.mode || "trip"),
|
|
125
|
+
from: { name: row.from_name || row.fromName || row.start, lat: row.from_lat || null, lng: row.from_lng || null },
|
|
126
|
+
to: { name: row.to_name || row.toName || row.dest, lat: row.to_lat || null, lng: row.to_lng || null },
|
|
127
|
+
departureMs: numberOrParse(row.time || row.create_time || row.start_time),
|
|
128
|
+
carrier: "高德地图",
|
|
129
|
+
extras: { mode: row.mode },
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function searchRowToRecord(row) {
|
|
134
|
+
if (!row) return null;
|
|
135
|
+
const id = row.id || row._id || row.guid;
|
|
136
|
+
if (!id) return null;
|
|
137
|
+
// Search = a "visit" intent
|
|
138
|
+
return {
|
|
139
|
+
vendorId: "amap",
|
|
140
|
+
recordId: `search-${id}`,
|
|
141
|
+
vehicleType: "visit",
|
|
142
|
+
to: { name: row.keyword || row.query || row.poiname, lat: row.lat || null, lng: row.lng || null, city: row.city },
|
|
143
|
+
departureMs: numberOrParse(row.time || row.create_time),
|
|
144
|
+
carrier: "高德地图",
|
|
145
|
+
extras: { query: row.keyword || row.query },
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function numberOrParse(v) {
|
|
150
|
+
if (Number.isFinite(v)) {
|
|
151
|
+
// Amap timestamps are sometimes seconds — heuristic upgrade to ms
|
|
152
|
+
return v > 1e12 ? v : (v > 1e10 ? v : v * 1000);
|
|
153
|
+
}
|
|
154
|
+
if (typeof v === "string") {
|
|
155
|
+
if (/^\d+$/.test(v)) {
|
|
156
|
+
const n = parseInt(v, 10);
|
|
157
|
+
return n > 1e12 ? n : (n > 1e10 ? n : n * 1000);
|
|
158
|
+
}
|
|
159
|
+
return parseChineseDateTime(v);
|
|
160
|
+
}
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
module.exports = { AmapAdapter, NAME, VERSION };
|