@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,344 @@
1
+ /**
2
+ * SystemDataAdapter — Android system data (contacts / call log / SMS / WiFi).
3
+ *
4
+ * Phase 4.5.5. Sits on top of the forensics-bridge sidecar.
5
+ *
6
+ * Per-source pipeline (each one independent — disabling SMS doesn't break the
7
+ * others):
8
+ *
9
+ * contacts: android.pull_file → system.parse_contacts → Person stream
10
+ * calllog: android.pull_file → system.parse_calllog → Event(call) + Person stream
11
+ * sms: android.pull_file → system.parse_sms → Event(message) + Person stream
12
+ * wifi: android.pull_file → system.parse_wifi → Place stream
13
+ *
14
+ * Or, when `opts.dataPaths` is provided (e.g. user already adb-pulled files
15
+ * manually, or testing with a local fixture), skip the pull step.
16
+ *
17
+ * Privacy gating: `opts.include` decides which sub-sources run. Default per
18
+ * Adapter_System_Data.md §5.1 + OQ-SD1: contacts ON / calllog ON / sms OFF /
19
+ * wifi ON. The UI dialog re-confirms this on each sync.
20
+ */
21
+
22
+ "use strict";
23
+
24
+ const path = require("node:path");
25
+ const os = require("node:os");
26
+ const fs = require("node:fs");
27
+
28
+ const { PythonSidecarAdapter } = require("../_python-sidecar-base");
29
+
30
+ const NAME = "system-data";
31
+ const VERSION = "0.1.0";
32
+
33
+ const DEFAULT_INCLUDE = Object.freeze({
34
+ contacts: true,
35
+ calllog: true,
36
+ sms: false, // opt-out by default — see Adapter_System_Data.md §5.1
37
+ wifi: true,
38
+ });
39
+
40
+ /**
41
+ * Default Android system provider paths. Override via opts.remotePaths when
42
+ * a device uses a non-stock layout.
43
+ */
44
+ const DEFAULT_REMOTE_PATHS = Object.freeze({
45
+ contacts:
46
+ "/data/data/com.android.providers.contacts/databases/contacts2.db",
47
+ calllog: "/data/data/com.android.providers.contacts/databases/calllog.db",
48
+ sms: "/data/data/com.android.providers.telephony/databases/mmssms.db",
49
+ wifi: "/data/misc/wifi/", // directory — pull_file works for one file, so wifi mode-A is dataPaths
50
+ });
51
+
52
+ /**
53
+ * Per-source workaround paths under /sdcard/Download/ for stock Android
54
+ * (no `adb root`) — user copies files via Termux + tsu or MT Manager.
55
+ */
56
+ const SDCARD_WORKAROUND_PATHS = Object.freeze({
57
+ contacts: "/sdcard/Download/contacts2.db",
58
+ calllog: "/sdcard/Download/calllog.db",
59
+ sms: "/sdcard/Download/mmssms.db",
60
+ wifi_xml: "/sdcard/Download/WifiConfigStore.xml",
61
+ wifi_conf: "/sdcard/Download/wpa_supplicant.conf",
62
+ });
63
+
64
+ class SystemDataAdapter extends PythonSidecarAdapter {
65
+ constructor(opts) {
66
+ super(opts);
67
+ this.name = NAME;
68
+ this.version = VERSION;
69
+ this.capabilities = [
70
+ "sync:android-adb",
71
+ "sync:android-sdcard-workaround",
72
+ "sync:host-dataPaths",
73
+ ];
74
+ this.rateLimits = { perDay: 12 }; // system data day-to-day churn is small
75
+ this.dataDisclosure = {
76
+ fields: [
77
+ "contacts:name,phone,email,organization,notes,starred,photoUri",
78
+ "calllog:number,duration,timestamp,type,name",
79
+ "sms:address,body,timestamp,type,threadId,isRead",
80
+ "wifi:ssid,securityType,hidden",
81
+ // Explicitly NOT collected:
82
+ // - wifi:password (never written to vault, even when present in source)
83
+ ],
84
+ sensitivity: "high", // SMS may include third-party content
85
+ legalGate: true, // requires explicit user agreement on third-party content
86
+ retentionDays: undefined, // user-controlled (no default cap)
87
+ notice:
88
+ "短信和通话记录可能包含他人电话号码或对话内容;所有数据在本机加密存储,不向任何服务器上传(含 AI 分析)。",
89
+ defaultInclude: { ...DEFAULT_INCLUDE },
90
+ };
91
+ }
92
+
93
+ // -----------------------------------------------------------------------
94
+ // PersonalDataAdapter — authenticate / healthCheck override
95
+ // -----------------------------------------------------------------------
96
+
97
+ /**
98
+ * Verify the sidecar is reachable AND there is at least one usable ADB
99
+ * device (unless caller signals offline-import mode by passing dataPaths).
100
+ *
101
+ * @param {object} ctx
102
+ * @param {object} [ctx.dataPaths] If set, ADB presence is not required.
103
+ * @param {string} [ctx.serial] Optional serial; auth checks just that device.
104
+ */
105
+ async authenticate(ctx = {}) {
106
+ const pong = await this.supervisor.invoke("sidecar.ping", {}, { timeoutMs: 3000 });
107
+ if (ctx.dataPaths && Object.keys(ctx.dataPaths).length > 0) {
108
+ return { ok: true, mode: "offline", sidecarVersion: pong.version };
109
+ }
110
+ let devices;
111
+ try {
112
+ const out = await this.supervisor.invoke("android.list_devices", {}, { timeoutMs: 5000 });
113
+ devices = out.devices || [];
114
+ } catch (err) {
115
+ return {
116
+ ok: false,
117
+ reason: `android.list_devices failed: ${err.code || err.message}`,
118
+ };
119
+ }
120
+ const wanted = ctx.serial
121
+ ? devices.filter((d) => d.serial === ctx.serial)
122
+ : devices.filter((d) => d.state === "device");
123
+ if (wanted.length === 0) {
124
+ return {
125
+ ok: false,
126
+ reason: ctx.serial
127
+ ? `device "${ctx.serial}" not found or not authorized`
128
+ : "no authorized ADB devices attached",
129
+ };
130
+ }
131
+ return { ok: true, mode: "device", devices: wanted };
132
+ }
133
+
134
+ // -----------------------------------------------------------------------
135
+ // Orchestration (subclass hook)
136
+ // -----------------------------------------------------------------------
137
+
138
+ /**
139
+ * Orchestrate the 4 sub-sources sequentially.
140
+ *
141
+ * @param {object} opts
142
+ * @param {object} [opts.include] Per-source enable flags (defaults: DEFAULT_INCLUDE).
143
+ * @param {string} [opts.serial] Required when pulling from a live device.
144
+ * @param {object} [opts.dataPaths] Pre-extracted host paths, keys:
145
+ * {contacts, calllog, sms, wifi}.
146
+ * @param {object} [opts.remotePaths] Override default device paths.
147
+ * @param {"normal"|"sdcard"} [opts.extractMode]
148
+ * "normal" = pull from /data/data (root only),
149
+ * "sdcard" = pull from /sdcard/Download (workaround).
150
+ * @param {string} [opts.scratchDir] Directory for pulled DBs. Default: hub tmp.
151
+ * @param {(msg: object) => void} [opts.onProgress] Forwarded as adapter-progress.
152
+ */
153
+ async _runSidecar(opts, emit) {
154
+ const include = { ...DEFAULT_INCLUDE, ...(opts.include || {}) };
155
+ const dataPaths = opts.dataPaths || {};
156
+ const extractMode = opts.extractMode || "normal";
157
+ const remotePaths =
158
+ extractMode === "sdcard"
159
+ ? {
160
+ contacts: SDCARD_WORKAROUND_PATHS.contacts,
161
+ calllog: SDCARD_WORKAROUND_PATHS.calllog,
162
+ sms: SDCARD_WORKAROUND_PATHS.sms,
163
+ wifi: SDCARD_WORKAROUND_PATHS.wifi_xml,
164
+ }
165
+ : { ...DEFAULT_REMOTE_PATHS, ...(opts.remotePaths || {}) };
166
+
167
+ const scratchDir =
168
+ opts.scratchDir ||
169
+ fs.mkdtempSync(path.join(os.tmpdir(), "system-data-sync-"));
170
+ fs.mkdirSync(scratchDir, { recursive: true });
171
+
172
+ const onProgress = typeof opts.onProgress === "function" ? opts.onProgress : null;
173
+ const tellProgress = (source, phase, extra = {}) => {
174
+ if (onProgress) onProgress({ source, phase, ...extra });
175
+ };
176
+
177
+ const sourcesRun = [];
178
+
179
+ // ─── Contacts ────────────────────────────────────────────────────────
180
+ let contactsLocal = dataPaths.contacts || null;
181
+ if (include.contacts) {
182
+ if (!contactsLocal) {
183
+ if (!opts.serial) {
184
+ throw new Error(
185
+ "system-data: contacts enabled but no serial/dataPaths.contacts provided",
186
+ );
187
+ }
188
+ tellProgress("contacts", "pulling");
189
+ const pulled = await this.supervisor.invoke(
190
+ "android.pull_file",
191
+ {
192
+ serial: opts.serial,
193
+ remote_path: remotePaths.contacts,
194
+ local_dir: scratchDir,
195
+ },
196
+ { timeoutMs: 60_000 },
197
+ );
198
+ contactsLocal = pulled.local;
199
+ }
200
+ tellProgress("contacts", "parsing", { dbPath: contactsLocal });
201
+ const r = await this.supervisor.invoke(
202
+ "system.parse_contacts",
203
+ { data_path: contactsLocal, device_serial: opts.serial || null },
204
+ {
205
+ timeoutMs: 120_000,
206
+ onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
207
+ onProgress: (p) => tellProgress("contacts", "progress", p),
208
+ },
209
+ );
210
+ sourcesRun.push({ source: "contacts", ...r });
211
+ }
212
+
213
+ // ─── Call log ────────────────────────────────────────────────────────
214
+ if (include.calllog) {
215
+ let calllogLocal = dataPaths.calllog || null;
216
+ if (!calllogLocal) {
217
+ if (!opts.serial) {
218
+ throw new Error("system-data: calllog enabled but no serial/dataPaths.calllog");
219
+ }
220
+ tellProgress("calllog", "pulling");
221
+ try {
222
+ const pulled = await this.supervisor.invoke(
223
+ "android.pull_file",
224
+ {
225
+ serial: opts.serial,
226
+ remote_path: remotePaths.calllog,
227
+ local_dir: scratchDir,
228
+ },
229
+ { timeoutMs: 60_000 },
230
+ );
231
+ calllogLocal = pulled.local;
232
+ } catch (err) {
233
+ // Calls table may live in contacts2.db on pre-Android-11 builds.
234
+ if (err.code === "EXTRACT_PERMISSION_DENIED" && contactsLocal) {
235
+ calllogLocal = contactsLocal;
236
+ } else {
237
+ throw err;
238
+ }
239
+ }
240
+ }
241
+ tellProgress("calllog", "parsing", { dbPath: calllogLocal });
242
+ const r = await this.supervisor.invoke(
243
+ "system.parse_calllog",
244
+ {
245
+ data_path: calllogLocal,
246
+ contacts_db_path: contactsLocal,
247
+ device_serial: opts.serial || null,
248
+ },
249
+ {
250
+ timeoutMs: 180_000,
251
+ onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
252
+ onProgress: (p) => tellProgress("calllog", "progress", p),
253
+ },
254
+ );
255
+ sourcesRun.push({ source: "calllog", ...r });
256
+ }
257
+
258
+ // ─── SMS ────────────────────────────────────────────────────────────
259
+ if (include.sms) {
260
+ let smsLocal = dataPaths.sms || null;
261
+ if (!smsLocal) {
262
+ if (!opts.serial) {
263
+ throw new Error("system-data: sms enabled but no serial/dataPaths.sms");
264
+ }
265
+ tellProgress("sms", "pulling");
266
+ const pulled = await this.supervisor.invoke(
267
+ "android.pull_file",
268
+ {
269
+ serial: opts.serial,
270
+ remote_path: remotePaths.sms,
271
+ local_dir: scratchDir,
272
+ },
273
+ { timeoutMs: 60_000 },
274
+ );
275
+ smsLocal = pulled.local;
276
+ }
277
+ tellProgress("sms", "parsing", { dbPath: smsLocal });
278
+ const r = await this.supervisor.invoke(
279
+ "system.parse_sms",
280
+ {
281
+ data_path: smsLocal,
282
+ contacts_db_path: contactsLocal,
283
+ device_serial: opts.serial || null,
284
+ },
285
+ {
286
+ timeoutMs: 300_000, // SMS can be 10K+ rows on long-term devices
287
+ onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
288
+ onProgress: (p) => tellProgress("sms", "progress", p),
289
+ },
290
+ );
291
+ sourcesRun.push({ source: "sms", ...r });
292
+ }
293
+
294
+ // ─── WiFi ───────────────────────────────────────────────────────────
295
+ if (include.wifi) {
296
+ let wifiLocal = dataPaths.wifi || null;
297
+ if (!wifiLocal) {
298
+ // WiFi config is a single file, but two possible names. Prefer XML.
299
+ if (!opts.serial) {
300
+ throw new Error("system-data: wifi enabled but no serial/dataPaths.wifi");
301
+ }
302
+ tellProgress("wifi", "pulling");
303
+ try {
304
+ const pulled = await this.supervisor.invoke(
305
+ "android.pull_file",
306
+ {
307
+ serial: opts.serial,
308
+ remote_path: remotePaths.wifi,
309
+ local_dir: scratchDir,
310
+ },
311
+ { timeoutMs: 30_000 },
312
+ );
313
+ wifiLocal = path.dirname(pulled.local);
314
+ } catch (err) {
315
+ // Non-fatal — wifi often inaccessible without root. Skip this source.
316
+ tellProgress("wifi", "skipped", { reason: err.code || err.message });
317
+ return { sources: sourcesRun, scratchDir };
318
+ }
319
+ }
320
+ tellProgress("wifi", "parsing", { dbPath: wifiLocal });
321
+ const r = await this.supervisor.invoke(
322
+ "system.parse_wifi",
323
+ { data_path: wifiLocal, device_serial: opts.serial || null },
324
+ {
325
+ timeoutMs: 30_000,
326
+ onChunk: (batch) => this._emitChunkAsRaws(batch, emit),
327
+ onProgress: (p) => tellProgress("wifi", "progress", p),
328
+ },
329
+ );
330
+ sourcesRun.push({ source: "wifi", ...r });
331
+ }
332
+
333
+ return { sources: sourcesRun, scratchDir };
334
+ }
335
+ }
336
+
337
+ module.exports = {
338
+ SystemDataAdapter,
339
+ SYSTEM_DATA_ADAPTER_NAME: NAME,
340
+ SYSTEM_DATA_ADAPTER_VERSION: VERSION,
341
+ DEFAULT_INCLUDE,
342
+ DEFAULT_REMOTE_PATHS,
343
+ SDCARD_WORKAROUND_PATHS,
344
+ };
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Phase 9.2 — 12306 (China Railway) ticket adapter.
3
+ *
4
+ * Source format: 12306 doesn't have an official user export. We accept
5
+ * two file formats:
6
+ * 1. order-confirmation emails (already adapter-parsed by Phase 5 +
7
+ * Phase 5.4 travel template). Phase 9.2 reads those events back
8
+ * out of the vault and **re-normalizes** them into the
9
+ * adapter-neutral travel schema. This is the "rich vault →
10
+ * enrich" pattern.
11
+ * 2. user-uploaded JSON dump (e.g. exported from a 3rd-party 12306
12
+ * scraper, or hand-curated). Optional.
13
+ *
14
+ * For v0.5 we focus on (2) since (1) is purely vault-side derivation
15
+ * the AnalysisEngine can do at query time.
16
+ */
17
+
18
+ "use strict";
19
+
20
+ const fs = require("node:fs");
21
+ const { normalizeTravelRecord, parseChineseDateTime } = require("../travel-base");
22
+
23
+ const NAME = "travel-12306";
24
+ const VERSION = "0.5.0";
25
+
26
+ class Train12306Adapter {
27
+ constructor(opts = {}) {
28
+ if (!opts.account || !opts.account.username) {
29
+ throw new Error("Train12306Adapter: opts.account.username required (12306 user id)");
30
+ }
31
+ this.account = opts.account;
32
+ this._dataPath = opts.dataPath || null;
33
+
34
+ this.name = NAME;
35
+ this.version = VERSION;
36
+ this.capabilities = ["import:json", "parse:12306-orders"];
37
+ this.extractMode = "file-import";
38
+ this.rateLimits = {};
39
+ this.dataDisclosure = {
40
+ fields: [
41
+ "12306:orderId / passengerName / trainNumber / fromStation / toStation / departureTime / arrivalTime / seat / price",
42
+ ],
43
+ sensitivity: "medium",
44
+ legalGate: false,
45
+ };
46
+ }
47
+
48
+ async authenticate() {
49
+ return { ok: true, account: this.account.username };
50
+ }
51
+
52
+ async healthCheck() {
53
+ return { ok: true, lastChecked: Date.now() };
54
+ }
55
+
56
+ async *sync(opts = {}) {
57
+ const dataPath = opts.dataPath || this._dataPath;
58
+ if (!dataPath || !fs.existsSync(dataPath)) return;
59
+ const buf = fs.readFileSync(dataPath, "utf-8");
60
+ let records;
61
+ try {
62
+ records = parseRecords(buf);
63
+ } catch (err) {
64
+ throw new Error(`Train12306Adapter: parse failed: ${err.message}`);
65
+ }
66
+ for (const r of records) {
67
+ yield {
68
+ adapter: NAME,
69
+ originalId: String(r.recordId || r.orderId || r.ticketNumber),
70
+ capturedAt: r.bookedAt || r.departureMs || Date.now(),
71
+ payload: { record: r },
72
+ };
73
+ }
74
+ }
75
+
76
+ normalize(raw) {
77
+ if (!raw || !raw.payload || !raw.payload.record) {
78
+ throw new Error("Train12306Adapter.normalize: raw.payload.record missing");
79
+ }
80
+ return normalizeTravelRecord(raw.payload.record, {
81
+ adapterName: NAME,
82
+ adapterVersion: VERSION,
83
+ });
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Parse a 12306 dump file. Accepts either:
89
+ * - JSON array of order objects
90
+ * - JSON object { orders: [...] }
91
+ * - JSONL (one order per line)
92
+ */
93
+ function parseRecords(text) {
94
+ let raw;
95
+ try {
96
+ raw = JSON.parse(text);
97
+ } catch (_e) {
98
+ // Try JSONL
99
+ raw = text
100
+ .split(/\r?\n/)
101
+ .filter((l) => l.trim().startsWith("{"))
102
+ .map((l) => JSON.parse(l));
103
+ }
104
+ const orders = Array.isArray(raw) ? raw : raw.orders || [];
105
+ return orders.map(orderToRecord).filter(Boolean);
106
+ }
107
+
108
+ function orderToRecord(o) {
109
+ if (!o || typeof o !== "object") return null;
110
+ const recordId = o.orderId || o.ticketNumber || o.id || o.order_no;
111
+ if (!recordId) return null;
112
+ return {
113
+ vendorId: "12306",
114
+ recordId: String(recordId),
115
+ vehicleType: "train",
116
+ from: {
117
+ station: o.fromStation || o.from_station || o.from,
118
+ city: o.fromCity || o.from_city,
119
+ },
120
+ to: {
121
+ station: o.toStation || o.to_station || o.to,
122
+ city: o.toCity || o.to_city,
123
+ },
124
+ departureMs: numberOrParse(o.departureTime || o.departure_time || o.start_time),
125
+ arrivalMs: numberOrParse(o.arrivalTime || o.arrival_time || o.end_time),
126
+ carrier: "12306",
127
+ vehicleNumber: o.trainNumber || o.train_no || o.trainNo,
128
+ totalCost: o.price != null
129
+ ? { value: parseFloat(o.price), currency: "CNY" }
130
+ : null,
131
+ traveler: o.passengerName || o.passenger || o.name,
132
+ confirmationCode: o.ticketNumber || o.ticket_no || recordId,
133
+ bookedAt: numberOrParse(o.bookedAt || o.order_time),
134
+ extras: {
135
+ seat: o.seat || o.seatType,
136
+ seatNumber: o.seatNumber || o.seat_number,
137
+ idCardLast6: o.idLast6 || undefined, // for cross-source EntityResolver linking
138
+ },
139
+ };
140
+ }
141
+
142
+ function numberOrParse(v) {
143
+ if (Number.isFinite(v)) return v;
144
+ if (typeof v === "string") {
145
+ if (/^\d+$/.test(v)) return parseInt(v, 10);
146
+ return parseChineseDateTime(v);
147
+ }
148
+ return null;
149
+ }
150
+
151
+ module.exports = { Train12306Adapter, parseRecords, NAME, VERSION };
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Phase 9.4 — Amap (高德地图) location history adapter.
3
+ *
4
+ * Source: Amap stores recent navigation / search history in app-local
5
+ * SQLite DBs. Per sjqz/parsers/amap.py, the relevant tables are:
6
+ * - history_search (search queries)
7
+ * - history_route (planned routes)
8
+ * - favourites (saved locations like 公司 / 家)
9
+ *
10
+ * Adapter extractMode is "device-pull" — relies on Phase 7.5
11
+ * AndroidExtractor to pull the .db files from Amap's app-private
12
+ * directory. For v0.5 we accept a pre-pulled local path (file-import
13
+ * fallback) so users without root can hand-extract via adb backup.
14
+ */
15
+
16
+ "use strict";
17
+
18
+ const fs = require("node:fs");
19
+ const { normalizeTravelRecord, parseChineseDateTime } = require("../travel-base");
20
+
21
+ const NAME = "travel-amap";
22
+ const VERSION = "0.5.0";
23
+
24
+ class AmapAdapter {
25
+ constructor(opts = {}) {
26
+ if (!opts.account || !opts.account.deviceId) {
27
+ throw new Error("AmapAdapter: opts.account.deviceId required");
28
+ }
29
+ this.account = opts.account;
30
+ this._dbPath = opts.dbPath || null;
31
+ this._dbDriverFactory = opts.dbDriverFactory || null;
32
+
33
+ this.name = NAME;
34
+ this.version = VERSION;
35
+ this.capabilities = ["sync:sqlite", "parse:amap-history"];
36
+ this.extractMode = "device-pull";
37
+ this.rateLimits = {};
38
+ this.dataDisclosure = {
39
+ fields: [
40
+ "amap:search_history (query / time / location)",
41
+ "amap:route_history (from / to / mode / time)",
42
+ "amap:favourites (name / address / coords)",
43
+ ],
44
+ sensitivity: "medium",
45
+ legalGate: false,
46
+ };
47
+ }
48
+
49
+ async authenticate() {
50
+ return { ok: true, account: this.account.deviceId };
51
+ }
52
+
53
+ async healthCheck() {
54
+ return { ok: true, lastChecked: Date.now() };
55
+ }
56
+
57
+ async *sync(opts = {}) {
58
+ const dbPath = opts.dbPath || this._dbPath;
59
+ if (!dbPath || !fs.existsSync(dbPath)) return;
60
+ const Database = this._dbDriverFactory || (() => require("better-sqlite3-multiple-ciphers"));
61
+ const Driver = typeof Database === "function" ? Database() : Database;
62
+ const db = new Driver(dbPath, { readonly: true });
63
+
64
+ try {
65
+ // History routes (most analytically valuable)
66
+ const routes = trySelect(db, "SELECT * FROM history_route LIMIT 5000")
67
+ || trySelect(db, "SELECT * FROM ROUTE_HISTORY LIMIT 5000")
68
+ || [];
69
+ for (const r of routes) {
70
+ const rec = routeRowToRecord(r);
71
+ if (rec) {
72
+ yield {
73
+ adapter: NAME,
74
+ originalId: rec.recordId,
75
+ capturedAt: rec.bookedAt || Date.now(),
76
+ payload: { record: rec, kind: "route" },
77
+ };
78
+ }
79
+ }
80
+ // History search (queries — produce trip events of type "visit")
81
+ const searches = trySelect(db, "SELECT * FROM history_search LIMIT 5000") || [];
82
+ for (const r of searches) {
83
+ const rec = searchRowToRecord(r);
84
+ if (rec) {
85
+ yield {
86
+ adapter: NAME,
87
+ originalId: rec.recordId,
88
+ capturedAt: rec.bookedAt || Date.now(),
89
+ payload: { record: rec, kind: "search" },
90
+ };
91
+ }
92
+ }
93
+ } finally {
94
+ try { db.close(); } catch (_e) {}
95
+ }
96
+ }
97
+
98
+ normalize(raw) {
99
+ if (!raw || !raw.payload || !raw.payload.record) {
100
+ throw new Error("AmapAdapter.normalize: raw.payload.record missing");
101
+ }
102
+ return normalizeTravelRecord(raw.payload.record, {
103
+ adapterName: NAME,
104
+ adapterVersion: VERSION,
105
+ });
106
+ }
107
+ }
108
+
109
+ function trySelect(db, sql) {
110
+ try {
111
+ return db.prepare(sql).all();
112
+ } catch (_e) {
113
+ return null;
114
+ }
115
+ }
116
+
117
+ function routeRowToRecord(row) {
118
+ if (!row) return null;
119
+ const id = row.id || row._id || row.uid || row.guid;
120
+ if (!id) return null;
121
+ return {
122
+ vendorId: "amap",
123
+ recordId: `route-${id}`,
124
+ vehicleType: row.mode === "drive" ? "car" : (row.mode || "trip"),
125
+ from: { name: row.from_name || row.fromName || row.start, lat: row.from_lat || null, lng: row.from_lng || null },
126
+ to: { name: row.to_name || row.toName || row.dest, lat: row.to_lat || null, lng: row.to_lng || null },
127
+ departureMs: numberOrParse(row.time || row.create_time || row.start_time),
128
+ carrier: "高德地图",
129
+ extras: { mode: row.mode },
130
+ };
131
+ }
132
+
133
+ function searchRowToRecord(row) {
134
+ if (!row) return null;
135
+ const id = row.id || row._id || row.guid;
136
+ if (!id) return null;
137
+ // Search = a "visit" intent
138
+ return {
139
+ vendorId: "amap",
140
+ recordId: `search-${id}`,
141
+ vehicleType: "visit",
142
+ to: { name: row.keyword || row.query || row.poiname, lat: row.lat || null, lng: row.lng || null, city: row.city },
143
+ departureMs: numberOrParse(row.time || row.create_time),
144
+ carrier: "高德地图",
145
+ extras: { query: row.keyword || row.query },
146
+ };
147
+ }
148
+
149
+ function numberOrParse(v) {
150
+ if (Number.isFinite(v)) {
151
+ // Amap timestamps are sometimes seconds — heuristic upgrade to ms
152
+ return v > 1e12 ? v : (v > 1e10 ? v : v * 1000);
153
+ }
154
+ if (typeof v === "string") {
155
+ if (/^\d+$/.test(v)) {
156
+ const n = parseInt(v, 10);
157
+ return n > 1e12 ? n : (n > 1e10 ? n : n * 1000);
158
+ }
159
+ return parseChineseDateTime(v);
160
+ }
161
+ return null;
162
+ }
163
+
164
+ module.exports = { AmapAdapter, NAME, VERSION };