@chainlesschain/personal-data-hub 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
  2. package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
  3. package/__tests__/adapters/ai-chat-history.test.js +396 -0
  4. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  5. package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
  6. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  7. package/__tests__/adapters/email-adapter.test.js +138 -1
  8. package/__tests__/adapters/email-classifier.test.js +347 -0
  9. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  10. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  11. package/__tests__/adapters/email-templates.test.js +699 -0
  12. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
  13. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  14. package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
  15. package/__tests__/adapters/system-data-android.test.js +387 -0
  16. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  17. package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
  18. package/__tests__/adapters/wechat-env-probe.test.js +162 -0
  19. package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
  20. package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
  21. package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
  22. package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
  23. package/__tests__/analysis-skills.test.js +556 -0
  24. package/__tests__/analysis.test.js +329 -1
  25. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
  26. package/__tests__/e2e/full-user-journey.test.js +188 -0
  27. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  28. package/__tests__/entity-resolver-stages.test.js +411 -0
  29. package/__tests__/entity-resolver-vault.test.js +246 -0
  30. package/__tests__/entity-resolver.test.js +526 -0
  31. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  32. package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
  33. package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
  34. package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
  35. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
  36. package/__tests__/longtail-adapters.test.js +217 -0
  37. package/__tests__/mobile-extractor.test.js +288 -0
  38. package/__tests__/registry.test.js +4 -2
  39. package/__tests__/shopping-adapters.test.js +296 -0
  40. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  41. package/__tests__/sidecar-supervisor.test.js +120 -0
  42. package/__tests__/social-adapters.test.js +206 -0
  43. package/__tests__/travel-adapters.test.js +325 -0
  44. package/__tests__/vault.test.js +3 -3
  45. package/__tests__/wechat-adapter.test.js +476 -0
  46. package/__tests__/whatsapp-adapter.test.js +135 -0
  47. package/lib/adapter-spec.js +12 -0
  48. package/lib/adapters/_python-sidecar-base.js +207 -0
  49. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
  50. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  51. package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
  52. package/lib/adapters/ai-chat-history/health-checker.js +210 -0
  53. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  54. package/lib/adapters/ai-chat-history/index.js +28 -0
  55. package/lib/adapters/ai-chat-history/schema-map.js +258 -0
  56. package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
  57. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  58. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  59. package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
  60. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  61. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  62. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  63. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  64. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  65. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  66. package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
  67. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
  68. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  69. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  70. package/lib/adapters/alipay-bill/index.js +41 -0
  71. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  72. package/lib/adapters/email-imap/classifier.js +495 -0
  73. package/lib/adapters/email-imap/email-adapter.js +419 -8
  74. package/lib/adapters/email-imap/index.js +42 -0
  75. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  76. package/lib/adapters/email-imap/templates/bill.js +232 -0
  77. package/lib/adapters/email-imap/templates/government.js +120 -0
  78. package/lib/adapters/email-imap/templates/index.js +78 -0
  79. package/lib/adapters/email-imap/templates/order.js +186 -0
  80. package/lib/adapters/email-imap/templates/other.js +114 -0
  81. package/lib/adapters/email-imap/templates/register.js +113 -0
  82. package/lib/adapters/email-imap/templates/travel.js +157 -0
  83. package/lib/adapters/email-imap/templates/utils.js +275 -0
  84. package/lib/adapters/email-imap/transactions.js +234 -0
  85. package/lib/adapters/messaging-qq/index.js +158 -0
  86. package/lib/adapters/messaging-telegram/index.js +142 -0
  87. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  88. package/lib/adapters/shopping-base/index.js +208 -0
  89. package/lib/adapters/shopping-jd/index.js +150 -0
  90. package/lib/adapters/shopping-meituan/index.js +154 -0
  91. package/lib/adapters/shopping-taobao/index.js +176 -0
  92. package/lib/adapters/social-bilibili/index.js +171 -0
  93. package/lib/adapters/social-douyin/index.js +116 -0
  94. package/lib/adapters/social-kuaishou/index.js +237 -0
  95. package/lib/adapters/social-toutiao/index.js +236 -0
  96. package/lib/adapters/social-weibo/index.js +164 -0
  97. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  98. package/lib/adapters/system-data/disclosure.js +166 -0
  99. package/lib/adapters/system-data/index.js +34 -0
  100. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  101. package/lib/adapters/system-data-android/adapter.js +348 -0
  102. package/lib/adapters/system-data-android/index.js +76 -0
  103. package/lib/adapters/travel-12306/index.js +151 -0
  104. package/lib/adapters/travel-amap/index.js +164 -0
  105. package/lib/adapters/travel-baidu-map/index.js +162 -0
  106. package/lib/adapters/travel-base/index.js +240 -0
  107. package/lib/adapters/travel-ctrip/index.js +151 -0
  108. package/lib/adapters/wechat/bootstrap.js +146 -0
  109. package/lib/adapters/wechat/content-parser.js +326 -0
  110. package/lib/adapters/wechat/db-reader.js +209 -0
  111. package/lib/adapters/wechat/env-probe.js +218 -0
  112. package/lib/adapters/wechat/frida-agent/loader.js +67 -0
  113. package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
  114. package/lib/adapters/wechat/index.js +37 -0
  115. package/lib/adapters/wechat/key-extractor.js +158 -0
  116. package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
  117. package/lib/adapters/wechat/key-providers/index.js +22 -0
  118. package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
  119. package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
  120. package/lib/adapters/wechat/normalize.js +220 -0
  121. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  122. package/lib/analysis-skills/base.js +113 -0
  123. package/lib/analysis-skills/footprint.js +167 -0
  124. package/lib/analysis-skills/index.js +58 -0
  125. package/lib/analysis-skills/interests.js +161 -0
  126. package/lib/analysis-skills/relations.js +226 -0
  127. package/lib/analysis-skills/spending.js +219 -0
  128. package/lib/analysis-skills/timeline.js +167 -0
  129. package/lib/analysis.js +191 -2
  130. package/lib/entity-resolver/embedding-stage.js +198 -0
  131. package/lib/entity-resolver/entity-resolver.js +384 -0
  132. package/lib/entity-resolver/index.js +42 -0
  133. package/lib/entity-resolver/llm-stage.js +191 -0
  134. package/lib/entity-resolver/rule-stage.js +208 -0
  135. package/lib/entity-resolver/worker.js +149 -0
  136. package/lib/index.js +131 -0
  137. package/lib/migrations.js +73 -0
  138. package/lib/mobile-extractor/android.js +193 -0
  139. package/lib/mobile-extractor/index.js +9 -0
  140. package/lib/mobile-extractor/ios.js +223 -0
  141. package/lib/prompt-builder.js +11 -1
  142. package/lib/query-parser.js +7 -1
  143. package/lib/registry.js +42 -0
  144. package/lib/sidecar/index.js +15 -0
  145. package/lib/sidecar/supervisor.js +359 -0
  146. package/lib/vault.js +343 -0
  147. package/package.json +36 -3
  148. package/scripts/_make-fixture-all.js +126 -0
  149. package/scripts/_make-fixture-contacts.js +84 -0
  150. package/scripts/evaluate-entity-resolver.js +213 -0
  151. package/scripts/smoke-phase-5-5.js +196 -0
  152. package/scripts/smoke-phase-5-7.js +181 -0
  153. package/scripts/smoke-system-data-contacts.js +309 -0
  154. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Phase 9.3 — Ctrip (携程) order adapter.
3
+ *
4
+ * Ctrip has no official user export. Two input paths:
5
+ * 1. JSON dump from a 3rd-party scraper or user-curated file
6
+ * 2. Email order-confirmation events from Phase 5 (vault-side derive)
7
+ *
8
+ * Ctrip orders cover 4 sub-types: flight / hotel / train / cruise.
9
+ * We map each to the appropriate `vehicleType` in TravelRecord:
10
+ * flight → "flight", hotel → "hotel", train → "train", cruise → "cruise"
11
+ */
12
+
13
+ "use strict";
14
+
15
+ const fs = require("node:fs");
16
+ const { normalizeTravelRecord, parseChineseDateTime } = require("../travel-base");
17
+
18
+ const NAME = "travel-ctrip";
19
+ const VERSION = "0.5.0";
20
+
21
+ class CtripAdapter {
22
+ constructor(opts = {}) {
23
+ if (!opts.account || !opts.account.email) {
24
+ throw new Error("CtripAdapter: opts.account.email required");
25
+ }
26
+ this.account = opts.account;
27
+ this._dataPath = opts.dataPath || null;
28
+
29
+ this.name = NAME;
30
+ this.version = VERSION;
31
+ this.capabilities = ["import:json", "parse:ctrip-orders"];
32
+ this.extractMode = "file-import";
33
+ this.rateLimits = {};
34
+ this.dataDisclosure = {
35
+ fields: [
36
+ "ctrip:orderId / type / fromCity / toCity / dates / passengerName / price / carrier",
37
+ ],
38
+ sensitivity: "medium",
39
+ legalGate: false,
40
+ };
41
+ }
42
+
43
+ async authenticate() {
44
+ return { ok: true, account: this.account.email };
45
+ }
46
+
47
+ async healthCheck() {
48
+ return { ok: true, lastChecked: Date.now() };
49
+ }
50
+
51
+ async *sync(opts = {}) {
52
+ const dataPath = opts.dataPath || this._dataPath;
53
+ if (!dataPath || !fs.existsSync(dataPath)) return;
54
+ const text = fs.readFileSync(dataPath, "utf-8");
55
+ let records;
56
+ try {
57
+ records = parseRecords(text);
58
+ } catch (err) {
59
+ throw new Error(`CtripAdapter: parse failed: ${err.message}`);
60
+ }
61
+ for (const r of records) {
62
+ yield {
63
+ adapter: NAME,
64
+ originalId: r.recordId,
65
+ capturedAt: r.bookedAt || r.departureMs || Date.now(),
66
+ payload: { record: r },
67
+ };
68
+ }
69
+ }
70
+
71
+ normalize(raw) {
72
+ if (!raw || !raw.payload || !raw.payload.record) {
73
+ throw new Error("CtripAdapter.normalize: raw.payload.record missing");
74
+ }
75
+ return normalizeTravelRecord(raw.payload.record, {
76
+ adapterName: NAME,
77
+ adapterVersion: VERSION,
78
+ });
79
+ }
80
+ }
81
+
82
+ const TYPE_MAP = {
83
+ flight: "flight",
84
+ airline: "flight",
85
+ hotel: "hotel",
86
+ train: "train",
87
+ cruise: "cruise",
88
+ bus: "bus",
89
+ car: "car",
90
+ };
91
+
92
+ function parseRecords(text) {
93
+ let raw;
94
+ try {
95
+ raw = JSON.parse(text);
96
+ } catch (_e) {
97
+ // Try JSONL
98
+ raw = text
99
+ .split(/\r?\n/)
100
+ .filter((l) => l.trim().startsWith("{"))
101
+ .map((l) => JSON.parse(l));
102
+ }
103
+ const orders = Array.isArray(raw) ? raw : raw.orders || [];
104
+ return orders.map(orderToRecord).filter(Boolean);
105
+ }
106
+
107
+ function orderToRecord(o) {
108
+ if (!o || typeof o !== "object") return null;
109
+ const recordId = o.orderId || o.id || o.order_no;
110
+ if (!recordId) return null;
111
+ const type = (o.type || o.orderType || "").toLowerCase();
112
+ const vehicleType = TYPE_MAP[type] || "trip";
113
+
114
+ return {
115
+ vendorId: "ctrip",
116
+ recordId: String(recordId),
117
+ vehicleType,
118
+ from: o.fromCity || o.from_city || o.depCity
119
+ ? { city: o.fromCity || o.from_city || o.depCity }
120
+ : null,
121
+ to: o.toCity || o.to_city || o.arrCity || o.hotelCity
122
+ ? { city: o.toCity || o.to_city || o.arrCity || o.hotelCity }
123
+ : null,
124
+ departureMs: numberOrParse(o.departureTime || o.dep_time || o.checkIn || o.check_in),
125
+ arrivalMs: numberOrParse(o.arrivalTime || o.arr_time || o.checkOut || o.check_out),
126
+ carrier: o.carrier || o.airline || o.hotelName || o.hotel_name || "携程",
127
+ vehicleNumber: o.flightNumber || o.flight_no || o.trainNumber || o.train_no,
128
+ totalCost: o.price != null
129
+ ? { value: parseFloat(o.price), currency: o.currency || "CNY" }
130
+ : null,
131
+ traveler: o.passengerName || o.passenger || o.guestName || o.guest_name,
132
+ confirmationCode: o.confirmationCode || o.pnr || o.confirmation_no,
133
+ bookedAt: numberOrParse(o.bookedAt || o.order_time),
134
+ extras: {
135
+ type,
136
+ ...(o.hotel ? { hotel: o.hotel } : {}),
137
+ ...(o.nights != null ? { nights: o.nights } : {}),
138
+ },
139
+ };
140
+ }
141
+
142
+ function numberOrParse(v) {
143
+ if (Number.isFinite(v)) return v;
144
+ if (typeof v === "string") {
145
+ if (/^\d+$/.test(v) && v.length >= 10) return parseInt(v, 10);
146
+ return parseChineseDateTime(v);
147
+ }
148
+ return null;
149
+ }
150
+
151
+ module.exports = { CtripAdapter, parseRecords, TYPE_MAP, NAME, VERSION };
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Phase 12.6.7 — WeChat adapter bootstrap helper.
3
+ *
4
+ * Glues env-probe (12.6.4) → KeyProvider choice (12.6.1) → WechatAdapter
5
+ * instantiation (12.6.5) into one entry point so the IPC / WS / CLI
6
+ * layers don't each have to recreate the wiring.
7
+ *
8
+ * Decision matrix (mirrors `env-probe.decide`):
9
+ * - probe.suggestedKeyProvider === "md5" → MD5KeyProvider
10
+ * - probe.suggestedKeyProvider === "frida" → FridaKeyProvider
11
+ * - probe.suggestedKeyProvider === "unsupported" → no adapter created;
12
+ * caller gets `{ ok: false, probe, reason }` and is expected to surface
13
+ * `probe.reasons[]` to the user.
14
+ *
15
+ * Caller may force a specific provider via `opts.keyProviderOverride`
16
+ * (e.g. `"md5"` on a real device that env-probe misclassified, useful for
17
+ * the rare 8.0+ install where the user has the MD5 path working). The
18
+ * override skips the suggestion but the probe still runs and is returned
19
+ * for transparency.
20
+ *
21
+ * Returns shape (also see __tests__/adapters/wechat-bootstrap.test.js):
22
+ *
23
+ * { ok: true, adapter, keyProvider, probe }
24
+ * { ok: false, reason: "ENV_UNSUPPORTED" | "MD5_NEEDS_WECHAT_DATA_PATH"
25
+ * | "FRIDA_NEEDS_WXID" | "ADAPTER_CTOR_FAILED",
26
+ * probe, message? }
27
+ *
28
+ * Test seams:
29
+ * - opts._probe inject pre-computed probe (skip exec)
30
+ * - opts._md5Provider inject pre-built MD5KeyProvider instance
31
+ * - opts._fridaProvider inject pre-built FridaKeyProvider instance
32
+ * - opts._WechatAdapter swap the adapter constructor (default: real)
33
+ */
34
+ "use strict";
35
+
36
+ const { WechatAdapter } = require("./wechat-adapter");
37
+ const { MD5KeyProvider } = require("./key-providers/md5-key-provider");
38
+ const { FridaKeyProvider } = require("./key-providers/frida-key-provider");
39
+ const { probe: realProbe } = require("./env-probe");
40
+
41
+ /**
42
+ * @param {object} opts
43
+ * @param {object} opts.account `{ uin, wxid? }` — adapter sees uin
44
+ * @param {string} [opts.dbPath] local path to pulled EnMicroMsg.db
45
+ * @param {string} [opts.wechatDataPath] local pulled /data/data/com.tencent.mm
46
+ * (required when MD5KeyProvider is chosen)
47
+ * @param {object} [opts.fridaOpts] forwarded to FridaKeyProvider ctor
48
+ * (deviceId / packageName / timeoutMs)
49
+ * @param {string} [opts.keyProviderOverride] "md5" | "frida" — force selection
50
+ * @param {Function} [opts.exec] exec seam forwarded to env-probe
51
+ * @param {object} [opts._probe] pre-computed probe (test seam)
52
+ * @param {object} [opts._md5Provider] (test seam)
53
+ * @param {object} [opts._fridaProvider] (test seam)
54
+ * @param {Function} [opts._WechatAdapter] (test seam)
55
+ * @returns {Promise<object>}
56
+ */
57
+ async function bootstrapWechatAdapter(opts = {}) {
58
+ if (!opts || typeof opts !== "object") {
59
+ throw new Error("bootstrapWechatAdapter: opts required");
60
+ }
61
+ if (!opts.account || !opts.account.uin) {
62
+ throw new Error("bootstrapWechatAdapter: opts.account.uin required");
63
+ }
64
+
65
+ const probe = opts._probe || (await realProbe({ exec: opts.exec }));
66
+ const chosen = opts.keyProviderOverride || probe.suggestedKeyProvider;
67
+
68
+ if (chosen === "unsupported") {
69
+ return {
70
+ ok: false,
71
+ reason: "ENV_UNSUPPORTED",
72
+ message: (probe.reasons || []).join("; ") || "env-probe could not pick a viable KeyProvider",
73
+ probe,
74
+ };
75
+ }
76
+
77
+ // Pick / build KeyProvider
78
+ let keyProvider;
79
+ if (chosen === "md5") {
80
+ if (opts._md5Provider) {
81
+ keyProvider = opts._md5Provider;
82
+ } else {
83
+ if (!opts.wechatDataPath) {
84
+ return {
85
+ ok: false,
86
+ reason: "MD5_NEEDS_WECHAT_DATA_PATH",
87
+ message: "MD5KeyProvider requires opts.wechatDataPath (pulled /data/data/com.tencent.mm/)",
88
+ probe,
89
+ };
90
+ }
91
+ keyProvider = new MD5KeyProvider({
92
+ wechatDataPath: opts.wechatDataPath,
93
+ uin: opts.account.uin,
94
+ });
95
+ }
96
+ } else if (chosen === "frida") {
97
+ if (opts._fridaProvider) {
98
+ keyProvider = opts._fridaProvider;
99
+ } else {
100
+ // FridaKeyProvider doesn't strictly need wxid, but we surface a
101
+ // clear error here when the wire-level account looks incomplete.
102
+ if (!opts.account.uin) {
103
+ return {
104
+ ok: false,
105
+ reason: "FRIDA_NEEDS_WXID",
106
+ message: "FridaKeyProvider expects opts.account.uin for downstream adapter wiring",
107
+ probe,
108
+ };
109
+ }
110
+ keyProvider = new FridaKeyProvider({
111
+ deviceId: (opts.fridaOpts && opts.fridaOpts.deviceId) || probe.device.serial || null,
112
+ packageName: (opts.fridaOpts && opts.fridaOpts.packageName) || "com.tencent.mm",
113
+ timeoutMs: (opts.fridaOpts && opts.fridaOpts.timeoutMs) || 30_000,
114
+ });
115
+ }
116
+ } else {
117
+ return {
118
+ ok: false,
119
+ reason: "UNKNOWN_KEY_PROVIDER",
120
+ message: `Unknown keyProvider "${chosen}"`,
121
+ probe,
122
+ };
123
+ }
124
+
125
+ // Instantiate adapter
126
+ const AdapterCtor = opts._WechatAdapter || WechatAdapter;
127
+ let adapter;
128
+ try {
129
+ adapter = new AdapterCtor({
130
+ account: opts.account,
131
+ dbPath: opts.dbPath || null,
132
+ keyProvider,
133
+ });
134
+ } catch (err) {
135
+ return {
136
+ ok: false,
137
+ reason: "ADAPTER_CTOR_FAILED",
138
+ message: err && err.message ? err.message : String(err),
139
+ probe,
140
+ };
141
+ }
142
+
143
+ return { ok: true, adapter, keyProvider, probe };
144
+ }
145
+
146
+ module.exports = { bootstrapWechatAdapter };
@@ -0,0 +1,326 @@
1
+ /**
2
+ * Phase 12 v0.5 — WeChat message.content parser.
3
+ *
4
+ * Frida-INDEPENDENT — operates on decrypted message rows AFTER db-reader
5
+ * has done its job. Pure string/XML parsing.
6
+ *
7
+ * Handles the 6 common message types per `Adapter_WeChat_SQLCipher.md` §4.4:
8
+ * type=1 text
9
+ * type=3 image (XML w/ cdnUrl/md5/imgPath)
10
+ * type=34 voice .amr (XML w/ voiceLength/fileName)
11
+ * type=43 video (XML w/ cdnUrl)
12
+ * type=47 GIF/emoji (XML w/ md5/filename)
13
+ * type=49 composite — nested <appmsg type="N">, sub-types:
14
+ * 2 image, 3 music, 4 video, 5 link, 6 file, 8 GIF,
15
+ * 17 location, 19 forwarded, 21 redpacket, 33/36 mini-program,
16
+ * 51 channel video
17
+ * type=10000 system message
18
+ *
19
+ * Output is always `{ kind, text, structured }`:
20
+ * - kind: short string ("text" / "image" / "voice" / "link" / etc.)
21
+ * - text: human-readable summary (for vault content.text)
22
+ * - structured: parsed fields (for vault content.extra)
23
+ *
24
+ * Group-message prefix `<wxid_xxx>:\n` is stripped + returned in
25
+ * `structured.senderWxid` so the message text stays clean.
26
+ */
27
+
28
+ "use strict";
29
+
30
+ const TYPE_NAMES = {
31
+ 1: "text",
32
+ 3: "image",
33
+ 34: "voice",
34
+ 42: "card",
35
+ 43: "video",
36
+ 47: "emoji",
37
+ 48: "location",
38
+ 49: "appmsg",
39
+ 50: "voipcall",
40
+ 10000: "system",
41
+ };
42
+
43
+ const APPMSG_SUBTYPES = {
44
+ 1: "text-link",
45
+ 2: "image-share",
46
+ 3: "music",
47
+ 4: "video",
48
+ 5: "link",
49
+ 6: "file",
50
+ 8: "gif",
51
+ 17: "location-share",
52
+ 19: "forwarded",
53
+ 21: "redpacket",
54
+ 33: "miniprogram",
55
+ 36: "miniprogram",
56
+ 51: "channel-video",
57
+ };
58
+
59
+ /**
60
+ * Top-level: parse a WeChat message row's content + type.
61
+ *
62
+ * @param {object} row { content, type, isSend, talker, ... }
63
+ * @returns {{ kind, text, structured }}
64
+ */
65
+ function parseContent(row) {
66
+ if (!row || typeof row !== "object") {
67
+ return { kind: "unknown", text: "", structured: {} };
68
+ }
69
+ const type = Number(row.type);
70
+ const isGroup = isGroupTalker(row.talker);
71
+ const rawContent = typeof row.content === "string" ? row.content : "";
72
+
73
+ // Strip group sender prefix
74
+ let groupSenderWxid = null;
75
+ let body = rawContent;
76
+ if (isGroup) {
77
+ const m = /^([a-zA-Z0-9_-]+):\n/.exec(rawContent);
78
+ if (m) {
79
+ groupSenderWxid = m[1];
80
+ body = rawContent.slice(m[0].length);
81
+ }
82
+ }
83
+
84
+ let result;
85
+ switch (type) {
86
+ case 1:
87
+ result = parseText(body);
88
+ break;
89
+ case 3:
90
+ result = parseImage(body);
91
+ break;
92
+ case 34:
93
+ result = parseVoice(body);
94
+ break;
95
+ case 42:
96
+ result = parseCard(body);
97
+ break;
98
+ case 43:
99
+ result = parseVideo(body);
100
+ break;
101
+ case 47:
102
+ result = parseEmoji(body);
103
+ break;
104
+ case 48:
105
+ result = parseLocation(body);
106
+ break;
107
+ case 49:
108
+ result = parseAppMsg(body);
109
+ break;
110
+ case 50:
111
+ result = parseVoipCall(body);
112
+ break;
113
+ case 10000:
114
+ result = parseSystem(body);
115
+ break;
116
+ default:
117
+ result = {
118
+ kind: TYPE_NAMES[type] || `type-${type}`,
119
+ text: body.slice(0, 200),
120
+ structured: { type, body: body.slice(0, 1000) },
121
+ };
122
+ }
123
+
124
+ if (groupSenderWxid) {
125
+ result.structured = { ...result.structured, senderWxid: groupSenderWxid };
126
+ }
127
+ return result;
128
+ }
129
+
130
+ // ─── per-type parsers ────────────────────────────────────────────────────
131
+
132
+ function parseText(body) {
133
+ return { kind: "text", text: body, structured: {} };
134
+ }
135
+
136
+ function parseImage(body) {
137
+ const meta = parseXmlAttrs(body, "img");
138
+ return {
139
+ kind: "image",
140
+ text: "[图片]",
141
+ structured: {
142
+ cdnUrl: meta.cdnbigimgurl || meta.cdnmidimgurl || null,
143
+ md5: meta.md5 || null,
144
+ length: meta.length ? parseInt(meta.length, 10) : null,
145
+ },
146
+ };
147
+ }
148
+
149
+ function parseVoice(body) {
150
+ const meta = parseXmlAttrs(body, "voicemsg");
151
+ return {
152
+ kind: "voice",
153
+ text: "[语音]",
154
+ structured: {
155
+ fileName: meta.clientmsgid || null,
156
+ voiceLength: meta.voicelength ? parseInt(meta.voicelength, 10) : null,
157
+ fileType: meta.fromusername || null,
158
+ },
159
+ };
160
+ }
161
+
162
+ function parseCard(body) {
163
+ const meta = parseXmlAttrs(body, "msg");
164
+ return {
165
+ kind: "card",
166
+ text: `[名片] ${meta.nickname || meta.username || ""}`,
167
+ structured: {
168
+ nickname: meta.nickname || null,
169
+ username: meta.username || null,
170
+ province: meta.province || null,
171
+ city: meta.city || null,
172
+ },
173
+ };
174
+ }
175
+
176
+ function parseVideo(body) {
177
+ const meta = parseXmlAttrs(body, "videomsg");
178
+ return {
179
+ kind: "video",
180
+ text: "[视频]",
181
+ structured: {
182
+ cdnUrl: meta.cdnvideourl || null,
183
+ length: meta.length ? parseInt(meta.length, 10) : null,
184
+ playLength: meta.playlength ? parseInt(meta.playlength, 10) : null,
185
+ },
186
+ };
187
+ }
188
+
189
+ function parseEmoji(body) {
190
+ const meta = parseXmlAttrs(body, "emoji");
191
+ return {
192
+ kind: "emoji",
193
+ text: "[表情]",
194
+ structured: { md5: meta.md5 || null, type: meta.type || null },
195
+ };
196
+ }
197
+
198
+ function parseLocation(body) {
199
+ const meta = parseXmlAttrs(body, "location");
200
+ return {
201
+ kind: "location",
202
+ text: `[位置] ${meta.label || meta.poiname || ""}`,
203
+ structured: {
204
+ x: meta.x ? parseFloat(meta.x) : null,
205
+ y: meta.y ? parseFloat(meta.y) : null,
206
+ label: meta.label || null,
207
+ poiName: meta.poiname || null,
208
+ },
209
+ };
210
+ }
211
+
212
+ function parseAppMsg(body) {
213
+ // Type 49: <msg><appmsg type="N"><...subtype-specific...></appmsg></msg>
214
+ const appType = extractAppMsgType(body);
215
+ const subtype = APPMSG_SUBTYPES[appType] || `appmsg-${appType}`;
216
+ const title = extractTag(body, "title");
217
+ const desc = extractTag(body, "des");
218
+ const url = extractTag(body, "url");
219
+
220
+ const structured = {
221
+ appType,
222
+ subtype,
223
+ title: title || null,
224
+ desc: desc || null,
225
+ url: url || null,
226
+ };
227
+
228
+ // Redpacket-specific
229
+ if (appType === 21) {
230
+ structured.redPacketTitle = title;
231
+ }
232
+ // File-specific
233
+ if (appType === 6) {
234
+ structured.fileName = title;
235
+ structured.fileSize = extractTag(body, "totallen");
236
+ }
237
+ // Mini program
238
+ if (appType === 33 || appType === 36) {
239
+ structured.miniProgramName = extractTag(body, "sourcedisplayname")
240
+ || extractTag(body, "weappiconurl") || title;
241
+ }
242
+
243
+ const text = title ? `[${subtype}] ${title}` : `[${subtype}]`;
244
+ return { kind: subtype, text, structured };
245
+ }
246
+
247
+ function parseVoipCall(body) {
248
+ return {
249
+ kind: "voipcall",
250
+ text: "[通话]",
251
+ structured: { raw: body.slice(0, 500) },
252
+ };
253
+ }
254
+
255
+ function parseSystem(body) {
256
+ return {
257
+ kind: "system",
258
+ text: body.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim().slice(0, 300),
259
+ structured: {},
260
+ };
261
+ }
262
+
263
+ // ─── helpers ────────────────────────────────────────────────────────────
264
+
265
+ /**
266
+ * Parse XML attributes of a named tag into a flat key-value map.
267
+ * E.g. <img attr1="v1" attr2="v2" /> → { attr1: "v1", attr2: "v2" }.
268
+ * Returns {} when the tag isn't found.
269
+ */
270
+ function parseXmlAttrs(xml, tagName) {
271
+ if (typeof xml !== "string" || xml.length === 0) return {};
272
+ const re = new RegExp(`<${tagName}\\b([^>]*)`, "i");
273
+ const m = re.exec(xml);
274
+ if (!m) return {};
275
+ const attrsText = m[1];
276
+ const out = {};
277
+ const attrRe = /(\w+)\s*=\s*"([^"]*)"/g;
278
+ let am;
279
+ while ((am = attrRe.exec(attrsText)) !== null) {
280
+ out[am[1].toLowerCase()] = am[2];
281
+ }
282
+ return out;
283
+ }
284
+
285
+ /**
286
+ * Pull the text content of a tag: <title>X</title> → "X".
287
+ */
288
+ function extractTag(xml, tagName) {
289
+ if (typeof xml !== "string") return null;
290
+ const re = new RegExp(`<${tagName}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${tagName}>`, "i");
291
+ const m = re.exec(xml);
292
+ if (!m) return null;
293
+ return decodeXmlEntities(m[1].trim());
294
+ }
295
+
296
+ function extractAppMsgType(xml) {
297
+ if (typeof xml !== "string") return -1;
298
+ const re = /<appmsg\s+[^>]*type\s*=\s*"(\d+)"|<type>(\d+)<\/type>/i;
299
+ const m = re.exec(xml);
300
+ if (!m) return -1;
301
+ return parseInt(m[1] || m[2], 10);
302
+ }
303
+
304
+ function decodeXmlEntities(s) {
305
+ return String(s)
306
+ .replace(/&amp;/g, "&")
307
+ .replace(/&lt;/g, "<")
308
+ .replace(/&gt;/g, ">")
309
+ .replace(/&quot;/g, '"')
310
+ .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n, 10)));
311
+ }
312
+
313
+ function isGroupTalker(talker) {
314
+ // Group chat talker IDs end with @chatroom
315
+ return typeof talker === "string" && talker.endsWith("@chatroom");
316
+ }
317
+
318
+ module.exports = {
319
+ parseContent,
320
+ parseXmlAttrs,
321
+ extractTag,
322
+ extractAppMsgType,
323
+ isGroupTalker,
324
+ TYPE_NAMES,
325
+ APPMSG_SUBTYPES,
326
+ };