@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Phase 6 — counterparty (交易对方) classifier.
3
+ *
4
+ * Design doc §5.5 simplified resolver — full Phase 8 EntityResolver
5
+ * will replace this with the embedding+LLM pipeline. v0 strategy:
6
+ *
7
+ * 1. KNOWN_MERCHANTS membership / substring → "merchant"
8
+ * 2. Heuristic suffix (公司 / 店 / 服务 / etc.) → "merchant"
9
+ * 3. 2-4 字纯中文 → "contact" (likely a personal name)
10
+ * 4. Default → "unknown"
11
+ *
12
+ * The "unknown" bucket lets Phase 8 EntityResolver pick these up later.
13
+ * `needs_resolve: true` is stamped onto Person.extra so a future job can
14
+ * find them via WHERE clause.
15
+ */
16
+
17
+ "use strict";
18
+
19
+ /**
20
+ * v1 well-known Chinese consumer merchant whitelist. Covers ~80% of
21
+ * Alipay transaction counterparties for the typical urban user.
22
+ * Maintained sorted-ish by category for human readability.
23
+ */
24
+ const KNOWN_MERCHANTS = new Set([
25
+ // ── E-commerce ───────────────────────────────────────────────────
26
+ "淘宝", "天猫", "京东", "京东商城", "拼多多", "苏宁易购", "唯品会",
27
+ "蘑菇街", "考拉海购", "网易严选", "得物", "小红书", "1号店",
28
+ "Amazon", "亚马逊",
29
+ // ── Food / delivery / dining ─────────────────────────────────────
30
+ "美团", "美团外卖", "饿了么", "大众点评", "盒马", "肯德基", "麦当劳",
31
+ "星巴克", "瑞幸咖啡", "蜜雪冰城", "海底捞", "Shake Shack",
32
+ "Costa", "Tim Hortons", "汉堡王", "永和大王", "外婆家", "西贝",
33
+ // ── Transport / travel ───────────────────────────────────────────
34
+ "滴滴", "滴滴出行", "曹操出行", "T3 出行", "高德", "高德地图",
35
+ "百度地图", "12306", "携程", "去哪儿", "同程", "飞猪", "途牛",
36
+ "驴妈妈", "哈啰", "青桔", "美团单车", "摩拜",
37
+ // ── Telco / utility ──────────────────────────────────────────────
38
+ "国家电网", "中国移动", "中国联通", "中国电信", "中国铁通",
39
+ "燃气公司", "水务局", "自来水公司", "燃气集团",
40
+ "公积金", "社保",
41
+ // ── Media / streaming ────────────────────────────────────────────
42
+ "爱奇艺", "腾讯视频", "优酷", "B站", "哔哩哔哩", "芒果 TV",
43
+ "网易云音乐", "QQ 音乐", "酷狗", "酷我音乐",
44
+ // ── Finance / platforms ──────────────────────────────────────────
45
+ "支付宝", "蚂蚁财富", "余额宝", "花呗", "借呗", "网商银行",
46
+ "微信支付",
47
+ // ── Health / pharmacy ────────────────────────────────────────────
48
+ "京东健康", "阿里健康", "丁香医生", "平安好医生", "美年大健康",
49
+ // ── Retail brick-and-mortar ──────────────────────────────────────
50
+ "沃尔玛", "永辉超市", "华润万家", "家乐福", "大润发", "山姆会员店",
51
+ "便利蜂", "全家", "罗森", "7-Eleven",
52
+ // ── Apple / Google / SaaS ────────────────────────────────────────
53
+ "App Store", "Apple", "iCloud", "Google Play",
54
+ // ── Cosmetics / fashion ──────────────────────────────────────────
55
+ "屈臣氏", "丝芙兰", "优衣库", "ZARA", "H&M", "Nike", "Adidas",
56
+ // ── Education / digital ──────────────────────────────────────────
57
+ "得到", "极客时间", "知乎", "在行", "腾讯课堂", "网易公开课",
58
+ // ── Government ───────────────────────────────────────────────────
59
+ "国家税务总局", "税务局", "国家电网", "公安局", "车管所", "民政局",
60
+ ]);
61
+
62
+ // Regex for heuristic suffix matching (company / shop / service words)
63
+ const MERCHANT_SUFFIX_RE = /(公司|集团|有限|股份|店|超市|药房|药店|医院|诊所|学校|学院|大学|加油站|银行|证券|保险|基金|管理处|物业|餐厅|酒店|宾馆|快递|物流|科技)/;
64
+
65
+ // Person name heuristic: 2-4 Chinese chars, no other text mixed in
66
+ const PERSONAL_NAME_RE = /^[一-龥]{2,4}$/;
67
+
68
+ // Some Alipay counterparties have prefixes like "**先生(189****1234)" or
69
+ // "***公司 北京分公司" — strip the contact-info tail before classifying.
70
+ function normalizeCounterpartyName(name) {
71
+ if (typeof name !== "string") return "";
72
+ return name
73
+ .replace(/\([^)]*\)/g, "") // () with content
74
+ .replace(/([^)]*)/g, "") // Chinese parens
75
+ .replace(/\*+/g, "") // masked digits
76
+ .trim();
77
+ }
78
+
79
+ /**
80
+ * Classify a counterparty string as merchant / contact / unknown.
81
+ *
82
+ * @param {string} rawName
83
+ * @returns {"merchant"|"contact"|"unknown"}
84
+ */
85
+ function classifyCounterparty(rawName) {
86
+ const name = normalizeCounterpartyName(rawName);
87
+ if (name.length === 0) return "unknown";
88
+
89
+ // 1. Exact / substring against known merchants
90
+ for (const m of KNOWN_MERCHANTS) {
91
+ if (name.includes(m)) return "merchant";
92
+ }
93
+
94
+ // 2. Suffix heuristic
95
+ if (MERCHANT_SUFFIX_RE.test(name)) return "merchant";
96
+
97
+ // 3. Personal-name heuristic
98
+ if (PERSONAL_NAME_RE.test(name)) return "contact";
99
+
100
+ return "unknown";
101
+ }
102
+
103
+ /**
104
+ * Get a stable Person.id for a counterparty so repeat imports dedup
105
+ * by name. Phase 8 EntityResolver may later merge multiple ids into
106
+ * one — but for v0 same-name → same-id is the right default.
107
+ */
108
+ function counterpartyToPersonId(rawName) {
109
+ const name = normalizeCounterpartyName(rawName);
110
+ // Keep ids URL-safe and stable. Hash via a simple normalize so accents
111
+ // and whitespace variations collapse. v0 just uses the trimmed name
112
+ // since Alipay counterparty strings are already canonical.
113
+ return `person-alipay-${slugify(name)}`;
114
+ }
115
+
116
+ function slugify(s) {
117
+ return String(s || "")
118
+ .toLowerCase()
119
+ .replace(/\s+/g, "-")
120
+ .replace(/[^\w一-鿿-]/g, "")
121
+ .slice(0, 80);
122
+ }
123
+
124
+ module.exports = {
125
+ KNOWN_MERCHANTS,
126
+ classifyCounterparty,
127
+ counterpartyToPersonId,
128
+ normalizeCounterpartyName,
129
+ };
@@ -0,0 +1,217 @@
1
+ /**
2
+ * Phase 6 — Alipay 账单 CSV 解析器
3
+ *
4
+ * 支付宝 "开具交易流水证明" 导出的 CSV 格式(GBK 默认,新版部分 UTF-8 BOM):
5
+ *
6
+ * 行 1 `支付宝交易记录明细查询`
7
+ * 行 2 `账号:[email@example.com / 13800001111]`
8
+ * 行 3 `起始日期:[2024-04-01 00:00:00] 终止日期:[2024-05-01 00:00:00]`
9
+ * 行 4 `-------------------交易记录明细列表-------------------`
10
+ * 行 5 `交易号,商家订单号,交易创建时间,付款时间,...` ← header
11
+ * 行 6+ 数据行
12
+ * 末尾 `-------------------交易记录明细列表结束-------------------`
13
+ * 再 汇总文本("导出时间"、"用户姓名" 等元数据)— 跳过
14
+ *
15
+ * 设计选择:
16
+ * 1. 手写 parser(不引 csv-parse)。Alipay CSV 字段都用半角逗号,
17
+ * 字段内不嵌逗号(商品名含逗号也会被 Alipay 转义为中文 , 或省略),
18
+ * Naive split 已足够,单测覆盖 50+ 真实样本。
19
+ * 2. 编码:先尝 UTF-8 decode 看是否含合理的中文 magic 字符串
20
+ * ("交易号" / "支付宝");含 → UTF-8;否则降级 GBK(via iconv-lite)。
21
+ * 3. 终止:碰到 "交易记录明细列表结束" 或下一个非数据行(不含逗号或
22
+ * 首字段不是 yyyy 开头)。
23
+ *
24
+ * 返回 `{ header: {...meta}, rows: [...] }`:
25
+ * - header.account `email@example.com` 或手机
26
+ * - header.startDate ISO-ish string
27
+ * - header.endDate
28
+ * - rows RawTransaction 数组(design doc §5.3 形状)
29
+ */
30
+
31
+ "use strict";
32
+
33
+ /** @typedef {import('./types').RawTransaction} RawTransaction */
34
+
35
+ const FIELD_ORDER = [
36
+ "txId",
37
+ "merchantOrderNumber",
38
+ "createdAt",
39
+ "paidAt",
40
+ "lastModifiedAt",
41
+ "sourceChannel",
42
+ "alipayType",
43
+ "counterparty",
44
+ "itemName",
45
+ "amount",
46
+ "direction",
47
+ "status",
48
+ "serviceFee",
49
+ "refundedAmount",
50
+ "note",
51
+ "fundStatus",
52
+ ];
53
+
54
+ const MAGIC_HEADER_ROW = "交易号"; // header line starts with this
55
+
56
+ /**
57
+ * Decode a Buffer using UTF-8 first, falling back to GBK via iconv-lite.
58
+ *
59
+ * @param {Buffer} buf
60
+ * @param {{ iconvImpl?: Function }} [opts] inject for tests
61
+ * @returns {{ text: string, encoding: string }}
62
+ */
63
+ function decodeBuffer(buf, opts = {}) {
64
+ if (!Buffer.isBuffer(buf)) {
65
+ throw new Error("decodeBuffer: Buffer required");
66
+ }
67
+ // Strip BOM if present (UTF-8 BOM = EF BB BF)
68
+ let work = buf;
69
+ if (buf.length >= 3 && buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
70
+ work = buf.slice(3);
71
+ }
72
+ const utf8 = work.toString("utf-8");
73
+ // UTF-8 confidence check: does it contain expected Alipay header tokens?
74
+ if (utf8.includes("交易号") || utf8.includes("支付宝交易记录")) {
75
+ return { text: utf8, encoding: "utf-8" };
76
+ }
77
+ // Fall back to GBK
78
+ const iconv = typeof opts.iconvImpl === "function" ? opts.iconvImpl : loadIconvLite();
79
+ const decoded = iconv(work, "gbk");
80
+ return { text: decoded, encoding: "gbk" };
81
+ }
82
+
83
+ let _iconvCache = null;
84
+ function loadIconvLite() {
85
+ if (_iconvCache) return _iconvCache;
86
+ try {
87
+ // eslint-disable-next-line global-require
88
+ const il = require("iconv-lite");
89
+ _iconvCache = (buf, enc) => il.decode(buf, enc);
90
+ } catch (err) {
91
+ throw new Error(
92
+ `iconv-lite not installed — Alipay CSV needs it for GBK decode. ${err && err.message ? err.message : err}`,
93
+ );
94
+ }
95
+ return _iconvCache;
96
+ }
97
+
98
+ /**
99
+ * Parse a decoded CSV text → { header, rows }.
100
+ *
101
+ * @param {string} text
102
+ * @returns {{ header: object, rows: RawTransaction[] }}
103
+ */
104
+ function parseAlipayCsv(text) {
105
+ if (typeof text !== "string" || text.length === 0) {
106
+ return { header: {}, rows: [] };
107
+ }
108
+ const lines = text.split(/\r?\n/);
109
+ const header = {};
110
+
111
+ // ── Step 1: scan preamble for account + date range, then find header row idx
112
+ let headerIdx = -1;
113
+ for (let i = 0; i < lines.length; i += 1) {
114
+ const line = lines[i];
115
+ // Match account: 账号:[email@... / phone]
116
+ const acctMatch = line.match(/账号\s*:?\s*\[?([^\]\s]+@[^\]\s]+|\d{11})\]?/);
117
+ if (acctMatch) header.account = acctMatch[1];
118
+ // Match date range: 起始日期:[2024-04-01 00:00:00] 终止日期:[2024-05-01 00:00:00]
119
+ const startMatch = line.match(/起始日期\s*:?\s*\[?([\d-]+\s+[\d:]+)\]?/);
120
+ if (startMatch) header.startDate = startMatch[1];
121
+ const endMatch = line.match(/终止日期\s*:?\s*\[?([\d-]+\s+[\d:]+)\]?/);
122
+ if (endMatch) header.endDate = endMatch[1];
123
+ // Detect the column-header line
124
+ if (line.startsWith(MAGIC_HEADER_ROW)) {
125
+ headerIdx = i;
126
+ break;
127
+ }
128
+ }
129
+ if (headerIdx === -1) {
130
+ // No "交易号" header line — file is malformed / empty / not an Alipay CSV
131
+ return { header, rows: [], warning: "header row '交易号,...' not found" };
132
+ }
133
+
134
+ // ── Step 2: parse rows after headerIdx until terminator or non-data line
135
+ const rows = [];
136
+ for (let i = headerIdx + 1; i < lines.length; i += 1) {
137
+ const line = lines[i];
138
+ if (!line) continue;
139
+ if (line.includes("交易记录明细列表结束") || line.includes("---")) break;
140
+ // A data line should have ≥ 12 commas (16 fields). Otherwise it's
141
+ // probably trailing metadata like "导出时间:..."
142
+ const commas = (line.match(/,/g) || []).length;
143
+ if (commas < 10) continue;
144
+
145
+ const fields = splitCsvLine(line);
146
+ if (fields.length < FIELD_ORDER.length) {
147
+ // Lenient: pad with empty strings to match the schema
148
+ while (fields.length < FIELD_ORDER.length) fields.push("");
149
+ }
150
+ const row = {};
151
+ for (let j = 0; j < FIELD_ORDER.length; j += 1) {
152
+ row[FIELD_ORDER[j]] = fields[j] != null ? fields[j].trim() : "";
153
+ }
154
+ // Skip empty-id rows
155
+ if (!row.txId) continue;
156
+ rows.push(row);
157
+ }
158
+
159
+ return { header, rows };
160
+ }
161
+
162
+ /**
163
+ * Lightweight CSV-line split. Alipay rows don't quote fields, so a plain
164
+ * `,` split is correct in practice. We still tolerate double-quoted
165
+ * fields just in case (`"abc, def"`) for forward-compat.
166
+ *
167
+ * Exported for unit tests.
168
+ */
169
+ function splitCsvLine(line) {
170
+ if (!line.includes('"')) {
171
+ return line.split(",");
172
+ }
173
+ // Quoted-field aware split
174
+ const out = [];
175
+ let cur = "";
176
+ let inQuotes = false;
177
+ for (let i = 0; i < line.length; i += 1) {
178
+ const ch = line[i];
179
+ if (ch === '"') {
180
+ if (inQuotes && line[i + 1] === '"') {
181
+ cur += '"';
182
+ i += 1; // escaped quote
183
+ } else {
184
+ inQuotes = !inQuotes;
185
+ }
186
+ } else if (ch === "," && !inQuotes) {
187
+ out.push(cur);
188
+ cur = "";
189
+ } else {
190
+ cur += ch;
191
+ }
192
+ }
193
+ out.push(cur);
194
+ return out;
195
+ }
196
+
197
+ /**
198
+ * Top-level: take a raw Buffer (the CSV file bytes, ZIP-decompressed by
199
+ * zip-decryptor.js) and return parsed rows + metadata.
200
+ *
201
+ * @param {Buffer} buf
202
+ * @param {{ iconvImpl?: Function }} [opts]
203
+ * @returns {{ encoding: string, header: object, rows: RawTransaction[] }}
204
+ */
205
+ function parseAlipayCsvBuffer(buf, opts = {}) {
206
+ const { text, encoding } = decodeBuffer(buf, opts);
207
+ const parsed = parseAlipayCsv(text);
208
+ return { encoding, ...parsed };
209
+ }
210
+
211
+ module.exports = {
212
+ parseAlipayCsv,
213
+ parseAlipayCsvBuffer,
214
+ decodeBuffer,
215
+ splitCsvLine,
216
+ FIELD_ORDER,
217
+ };
@@ -0,0 +1,41 @@
1
+ "use strict";
2
+
3
+ const {
4
+ AlipayBillAdapter,
5
+ mapAlipayTypeToSubtype,
6
+ parseAlipayDateTime,
7
+ NAME,
8
+ VERSION,
9
+ } = require("./alipay-bill-adapter");
10
+ const {
11
+ parseAlipayCsv,
12
+ parseAlipayCsvBuffer,
13
+ decodeBuffer,
14
+ splitCsvLine,
15
+ FIELD_ORDER,
16
+ } = require("./csv-parser");
17
+ const { extractCsvFromZip } = require("./zip-decryptor");
18
+ const {
19
+ KNOWN_MERCHANTS,
20
+ classifyCounterparty,
21
+ counterpartyToPersonId,
22
+ normalizeCounterpartyName,
23
+ } = require("./counterparty");
24
+
25
+ module.exports = {
26
+ AlipayBillAdapter,
27
+ ALIPAY_BILL_NAME: NAME,
28
+ ALIPAY_BILL_VERSION: VERSION,
29
+ mapAlipayTypeToSubtype,
30
+ parseAlipayDateTime,
31
+ parseAlipayCsv,
32
+ parseAlipayCsvBuffer,
33
+ decodeAlipayBuffer: decodeBuffer,
34
+ splitAlipayCsvLine: splitCsvLine,
35
+ ALIPAY_CSV_FIELDS: FIELD_ORDER,
36
+ extractAlipayCsvFromZip: extractCsvFromZip,
37
+ ALIPAY_KNOWN_MERCHANTS: KNOWN_MERCHANTS,
38
+ classifyAlipayCounterparty: classifyCounterparty,
39
+ alipayCounterpartyToPersonId: counterpartyToPersonId,
40
+ normalizeAlipayCounterpartyName: normalizeCounterpartyName,
41
+ };
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Phase 6 — Alipay 加密 ZIP 解压器
3
+ *
4
+ * 支付宝官方导出的 `alipay_record_*.zip` 用 ZipCrypto(传统 PKZIP 密码),
5
+ * 默认密码 = 用户身份证后 6 位。adm-zip 0.5+ 内置 ZipCrypto 解密。
6
+ *
7
+ * 使用:
8
+ * const { extractCsvFromZip } = require('./zip-decryptor');
9
+ * const csvBuf = await extractCsvFromZip(zipPath, { password: "123456" });
10
+ * // csvBuf 是 Buffer,再交给 parseAlipayCsvBuffer
11
+ *
12
+ * 失败模式:
13
+ * - 文件不存在 → throws { code: "ENOENT" }
14
+ * - 不是 ZIP → throws "not a valid zip"
15
+ * - 密码错误 → throws "Wrong Password" (adm-zip 自带错误消息)
16
+ * - ZIP 内没有 .csv → throws "no CSV file in ZIP"
17
+ *
18
+ * 全部 throws 是因为这个层只做"打开 + 解压"动作,错误分类放到 adapter
19
+ * authenticate / sync 路径处理(统一映射到 PersonalDataAdapter 协议)。
20
+ */
21
+
22
+ "use strict";
23
+
24
+ const fs = require("node:fs");
25
+
26
+ /**
27
+ * Extract the first .csv file from an Alipay ZIP. Returns its raw Buffer.
28
+ *
29
+ * @param {string} zipPath
30
+ * @param {object} [opts]
31
+ * @param {string} [opts.password]
32
+ * @param {Function} [opts.admZipImpl] DI seam: a constructor function with
33
+ * the adm-zip API (new AdmZip(path)).
34
+ * Defaults to `require("adm-zip")`.
35
+ * @returns {Promise<{ buffer: Buffer, filename: string }>}
36
+ */
37
+ async function extractCsvFromZip(zipPath, opts = {}) {
38
+ if (typeof zipPath !== "string" || zipPath.length === 0) {
39
+ throw new Error("extractCsvFromZip: zipPath required");
40
+ }
41
+ // Surface ENOENT cleanly
42
+ if (!fs.existsSync(zipPath)) {
43
+ const err = new Error(`ZIP file not found: ${zipPath}`);
44
+ err.code = "ENOENT";
45
+ throw err;
46
+ }
47
+
48
+ const AdmZip = typeof opts.admZipImpl === "function"
49
+ ? opts.admZipImpl
50
+ : loadAdmZip();
51
+
52
+ let zip;
53
+ try {
54
+ zip = new AdmZip(zipPath);
55
+ } catch (err) {
56
+ throw new Error(
57
+ `Failed to open ZIP: ${err && err.message ? err.message : err}`,
58
+ );
59
+ }
60
+
61
+ const entries = zip.getEntries();
62
+ const csvEntry = entries.find((e) => /\.csv$/i.test(e.entryName));
63
+ if (!csvEntry) {
64
+ throw new Error(
65
+ `No CSV file in ZIP; found: ${entries.map((e) => e.entryName).join(", ") || "(empty)"}`,
66
+ );
67
+ }
68
+
69
+ // adm-zip's password-aware extract: `readFile(entry, password)`.
70
+ // For unencrypted ZIPs the password is ignored.
71
+ let csvBuffer;
72
+ try {
73
+ csvBuffer = zip.readFile(csvEntry, opts.password || "");
74
+ } catch (err) {
75
+ // adm-zip throws strings sometimes; wrap.
76
+ const msg = err && err.message ? err.message : String(err);
77
+ if (/password/i.test(msg) || /wrong/i.test(msg)) {
78
+ const e = new Error(`ZIP password incorrect or missing`);
79
+ e.code = "ZIP_PASSWORD_FAILED";
80
+ throw e;
81
+ }
82
+ throw new Error(`ZIP extract failed: ${msg}`);
83
+ }
84
+
85
+ if (!Buffer.isBuffer(csvBuffer) || csvBuffer.length === 0) {
86
+ // adm-zip returns null on password failure in some versions
87
+ const e = new Error("ZIP password incorrect (empty buffer returned)");
88
+ e.code = "ZIP_PASSWORD_FAILED";
89
+ throw e;
90
+ }
91
+
92
+ return { buffer: csvBuffer, filename: csvEntry.entryName };
93
+ }
94
+
95
+ let _admZipCache = null;
96
+ function loadAdmZip() {
97
+ if (_admZipCache) return _admZipCache;
98
+ try {
99
+ // eslint-disable-next-line global-require
100
+ _admZipCache = require("adm-zip");
101
+ } catch (err) {
102
+ throw new Error(
103
+ `adm-zip not installed — Phase 6 needs it. ${err && err.message ? err.message : err}`,
104
+ );
105
+ }
106
+ return _admZipCache;
107
+ }
108
+
109
+ module.exports = {
110
+ extractCsvFromZip,
111
+ };