@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,347 @@
1
+ "use strict";
2
+
3
+ import { describe, it, expect } from "vitest";
4
+
5
+ const {
6
+ CATEGORIES,
7
+ ALL_CATEGORIES,
8
+ classifyLayer1,
9
+ classifyLayer2,
10
+ classifyEmail,
11
+ parseLayer2Response,
12
+ } = require("../../lib/adapters/email-imap/classifier");
13
+ const { MockLLMClient } = require("../../lib/llm-client");
14
+
15
+ const email = (overrides = {}) => ({
16
+ from: [{ name: "Someone", address: "user@example.com" }],
17
+ subject: "Hello",
18
+ textBody: "",
19
+ htmlBody: "",
20
+ attachments: [],
21
+ headers: {},
22
+ indicatorHeaders: {},
23
+ ...overrides,
24
+ });
25
+
26
+ // ─── Layer 1 ─────────────────────────────────────────────────────────────
27
+
28
+ describe("Layer 1 — bank statements", () => {
29
+ it("招商银行 from-domain + 对账单 subject → bill_bank with high confidence", () => {
30
+ const r = classifyLayer1(email({
31
+ from: [{ address: "ebank@cmbchina.com" }],
32
+ subject: "招商银行信用卡 11 月对账单",
33
+ }));
34
+ expect(r.category).toBe(CATEGORIES.BILL_BANK);
35
+ expect(r.confidence).toBeGreaterThanOrEqual(0.92);
36
+ expect(r.ruleName).toContain("bill_bank");
37
+ });
38
+
39
+ it("中国银行 from-domain alone (no subject keyword) → bill_bank confidence ~0.9", () => {
40
+ const r = classifyLayer1(email({
41
+ from: [{ address: "noreply@bochk.cn" }],
42
+ subject: "您的账户更新",
43
+ }));
44
+ expect(r.category).toBe(CATEGORIES.BILL_BANK);
45
+ expect(r.confidence).toBeGreaterThanOrEqual(0.85);
46
+ });
47
+
48
+ it("ICBC from-domain → bill_bank", () => {
49
+ const r = classifyLayer1(email({ from: [{ address: "ebanking@icbc.com.cn" }] }));
50
+ expect(r.category).toBe(CATEGORIES.BILL_BANK);
51
+ });
52
+
53
+ it("credit-card subject keyword overrides generic bill_bank", () => {
54
+ const r = classifyLayer1(email({
55
+ from: [{ address: "card@cmbchina.com" }],
56
+ subject: "信用卡 11 月对账单",
57
+ }));
58
+ // bill_bank.cn-bank-major fires at 0.95, bill_credit at 0.92 — bank
59
+ // wins on confidence. The point is BOTH match; we don't break.
60
+ expect([CATEGORIES.BILL_BANK, CATEGORIES.BILL_CREDIT]).toContain(r.category);
61
+ expect(r.confidence).toBeGreaterThanOrEqual(0.9);
62
+ });
63
+
64
+ it("subject 信用卡账单 alone (no bank domain) → bill_credit", () => {
65
+ const r = classifyLayer1(email({
66
+ from: [{ address: "noreply@somerandombank.example" }],
67
+ subject: "您的信用卡账单已生成",
68
+ }));
69
+ expect(r.category).toBe(CATEGORIES.BILL_CREDIT);
70
+ });
71
+ });
72
+
73
+ describe("Layer 1 — e-commerce orders", () => {
74
+ it("淘宝 + 订单 → order high confidence", () => {
75
+ const r = classifyLayer1(email({
76
+ from: [{ address: "service@taobao.com" }],
77
+ subject: "您的订单 1234567 已发货",
78
+ }));
79
+ expect(r.category).toBe(CATEGORIES.ORDER);
80
+ expect(r.confidence).toBeGreaterThanOrEqual(0.92);
81
+ });
82
+
83
+ it("京东 domain alone → order", () => {
84
+ const r = classifyLayer1(email({ from: [{ address: "marketing@jd.com" }], subject: "限时优惠" }));
85
+ expect(r.category).toBe(CATEGORIES.ORDER);
86
+ });
87
+
88
+ it("拼多多 + 已签收 → order", () => {
89
+ const r = classifyLayer1(email({
90
+ from: [{ address: "no-reply@pinduoduo.com" }],
91
+ subject: "已签收,请确认",
92
+ }));
93
+ expect(r.category).toBe(CATEGORIES.ORDER);
94
+ });
95
+
96
+ it("Amazon → order", () => {
97
+ const r = classifyLayer1(email({ from: [{ address: "auto-confirm@amazon.com" }], subject: "Your order has shipped" }));
98
+ expect(r.category).toBe(CATEGORIES.ORDER);
99
+ });
100
+ });
101
+
102
+ describe("Layer 1 — travel", () => {
103
+ it("携程 → travel", () => {
104
+ const r = classifyLayer1(email({ from: [{ address: "noreply@ctrip.com" }], subject: "您的酒店预订" }));
105
+ expect(r.category).toBe(CATEGORIES.TRAVEL);
106
+ });
107
+
108
+ it("12306 → travel", () => {
109
+ const r = classifyLayer1(email({ from: [{ address: "no-reply@12306.cn" }], subject: "出票成功" }));
110
+ expect(r.category).toBe(CATEGORIES.TRAVEL);
111
+ });
112
+
113
+ it("航班 keyword without travel domain → travel medium-confidence", () => {
114
+ const r = classifyLayer1(email({
115
+ from: [{ address: "noreply@example.com" }],
116
+ subject: "您的航班 CA1234 出票成功",
117
+ }));
118
+ expect(r.category).toBe(CATEGORIES.TRAVEL);
119
+ expect(r.confidence).toBeGreaterThanOrEqual(0.85);
120
+ });
121
+ });
122
+
123
+ describe("Layer 1 — government", () => {
124
+ it(".gov.cn → government", () => {
125
+ const r = classifyLayer1(email({
126
+ from: [{ address: "notify@beijing.gov.cn" }],
127
+ subject: "您的纳税申报已提交",
128
+ }));
129
+ expect(r.category).toBe(CATEGORIES.GOVERNMENT);
130
+ });
131
+
132
+ it("社保 keyword alone → government", () => {
133
+ const r = classifyLayer1(email({
134
+ from: [{ address: "info@somewebsite.com" }],
135
+ subject: "您的社保缴费记录",
136
+ }));
137
+ expect(r.category).toBe(CATEGORIES.GOVERNMENT);
138
+ });
139
+ });
140
+
141
+ describe("Layer 1 — register", () => {
142
+ it("验证码 → register", () => {
143
+ const r = classifyLayer1(email({
144
+ from: [{ address: "noreply@somesite.com" }],
145
+ subject: "您的验证码是 1234",
146
+ }));
147
+ expect(r.category).toBe(CATEGORIES.REGISTER);
148
+ });
149
+
150
+ it("Password reset → register", () => {
151
+ const r = classifyLayer1(email({
152
+ from: [{ address: "support@app.io" }],
153
+ subject: "Password reset request",
154
+ }));
155
+ expect(r.category).toBe(CATEGORIES.REGISTER);
156
+ });
157
+
158
+ it("Welcome to → register lower confidence", () => {
159
+ const r = classifyLayer1(email({ subject: "Welcome to ChainlessChain" }));
160
+ expect(r.category).toBe(CATEGORIES.REGISTER);
161
+ expect(r.confidence).toBeLessThan(0.85); // → Layer 2 in orchestrator
162
+ });
163
+ });
164
+
165
+ describe("Layer 1 — notify (marketing)", () => {
166
+ it("List-Unsubscribe header → notify", () => {
167
+ const r = classifyLayer1(email({
168
+ from: [{ address: "newsletter@example.com" }],
169
+ subject: "Our weekly digest",
170
+ indicatorHeaders: { "list-unsubscribe": "<mailto:unsub@example.com>" },
171
+ }));
172
+ expect(r.category).toBe(CATEGORIES.NOTIFY);
173
+ });
174
+
175
+ it("Precedence: bulk → notify", () => {
176
+ const r = classifyLayer1(email({
177
+ headers: { precedence: "bulk" },
178
+ }));
179
+ expect(r.category).toBe(CATEGORIES.NOTIFY);
180
+ });
181
+
182
+ it("Auto-Submitted: auto-generated → notify", () => {
183
+ const r = classifyLayer1(email({
184
+ headers: { "auto-submitted": "auto-generated" },
185
+ }));
186
+ expect(r.category).toBe(CATEGORIES.NOTIFY);
187
+ });
188
+
189
+ it("Marketing header doesn't override bill_bank (specificity wins)", () => {
190
+ const r = classifyLayer1(email({
191
+ from: [{ address: "promo@cmbchina.com" }],
192
+ subject: "招行卡片新优惠",
193
+ indicatorHeaders: { "list-unsubscribe": "<...>" },
194
+ }));
195
+ expect(r.category).toBe(CATEGORIES.BILL_BANK);
196
+ });
197
+ });
198
+
199
+ describe("Layer 1 — defaults & malformed input", () => {
200
+ it("no signals → other with 0 confidence", () => {
201
+ const r = classifyLayer1(email({
202
+ from: [{ address: "friend@gmail.com" }],
203
+ subject: "Hi how are you",
204
+ }));
205
+ expect(r.category).toBe(CATEGORIES.OTHER);
206
+ expect(r.confidence).toBe(0);
207
+ });
208
+
209
+ it("null input → other", () => {
210
+ const r = classifyLayer1(null);
211
+ expect(r.category).toBe(CATEGORIES.OTHER);
212
+ });
213
+
214
+ it("layer field is 'L1'", () => {
215
+ const r = classifyLayer1(email({ from: [{ address: "x@taobao.com" }] }));
216
+ expect(r.layer).toBe("L1");
217
+ });
218
+ });
219
+
220
+ // ─── Layer 2 — LLM disambiguation ────────────────────────────────────────
221
+
222
+ describe("Layer 2 LLM classifier", () => {
223
+ it("parses LLM JSON response into category/confidence/reason", async () => {
224
+ const llm = new MockLLMClient({
225
+ reply: '{"category":"order","confidence":0.88,"reason":"shipment notification"}',
226
+ });
227
+ const r = await classifyLayer2(email({ subject: "ambiguous" }), { llm });
228
+ expect(r.category).toBe(CATEGORIES.ORDER);
229
+ expect(r.confidence).toBe(0.88);
230
+ expect(r.reason).toContain("shipment");
231
+ expect(r.layer).toBe("L2");
232
+ });
233
+
234
+ it("strips markdown code fences", async () => {
235
+ const llm = new MockLLMClient({
236
+ reply: '```json\n{"category":"travel","confidence":0.9}\n```',
237
+ });
238
+ const r = await classifyLayer2(email(), { llm });
239
+ expect(r.category).toBe(CATEGORIES.TRAVEL);
240
+ });
241
+
242
+ it("regex-falls back to find first {...} when commentary precedes JSON", async () => {
243
+ const llm = new MockLLMClient({
244
+ reply: 'Sure, here is my classification: {"category":"register","confidence":0.6}',
245
+ });
246
+ const r = await classifyLayer2(email(), { llm });
247
+ expect(r.category).toBe(CATEGORIES.REGISTER);
248
+ });
249
+
250
+ it("malformed JSON → falls back to layer1 result", async () => {
251
+ const llm = new MockLLMClient({ reply: "I think it's an order email" });
252
+ const fallback = { category: CATEGORIES.NOTIFY, confidence: 0.4, ruleName: "fallback" };
253
+ const r = await classifyLayer2(email(), { llm, fallback });
254
+ expect(r.category).toBe(CATEGORIES.NOTIFY);
255
+ expect(r.layer).toBe("L1-fallback");
256
+ });
257
+
258
+ it("LLM throws → falls back to layer1 result", async () => {
259
+ const llm = new MockLLMClient({});
260
+ llm.chat = async () => { throw new Error("Ollama down"); };
261
+ const fallback = { category: CATEGORIES.BILL_BANK, confidence: 0.7 };
262
+ const r = await classifyLayer2(email(), { llm, fallback });
263
+ expect(r.category).toBe(CATEGORIES.BILL_BANK);
264
+ expect(r.layer).toBe("L1-fallback");
265
+ });
266
+
267
+ it("unknown category from LLM → falls back to fallback / OTHER", async () => {
268
+ const llm = new MockLLMClient({ reply: '{"category":"alien-spam","confidence":0.9}' });
269
+ const r = await classifyLayer2(email(), { llm });
270
+ expect(r.category).toBe(CATEGORIES.OTHER);
271
+ expect(r.layer).toBe("L1-fallback");
272
+ });
273
+
274
+ it("no LLM → throws", async () => {
275
+ await expect(classifyLayer2(email(), {})).rejects.toThrow(/llm/i);
276
+ });
277
+
278
+ it("parseLayer2Response: strict JSON", () => {
279
+ expect(parseLayer2Response('{"category":"order"}')).toEqual({ category: "order" });
280
+ });
281
+
282
+ it("parseLayer2Response: empty / non-json → null", () => {
283
+ expect(parseLayer2Response("")).toBeNull();
284
+ expect(parseLayer2Response("just text no json")).toBeNull();
285
+ });
286
+ });
287
+
288
+ // ─── classifyEmail orchestrator ─────────────────────────────────────────
289
+
290
+ describe("classifyEmail orchestrator", () => {
291
+ it("high-confidence layer1 short-circuits layer2 even when LLM provided", async () => {
292
+ const llm = new MockLLMClient({ reply: '{"category":"other"}' });
293
+ const r = await classifyEmail(email({
294
+ from: [{ address: "x@taobao.com" }],
295
+ subject: "订单已发货",
296
+ }), { llm });
297
+ expect(r.layer).toBe("L1");
298
+ expect(r.category).toBe(CATEGORIES.ORDER);
299
+ expect(llm.calls).toHaveLength(0); // LLM NOT called
300
+ });
301
+
302
+ it("low-confidence layer1 → falls through to layer2", async () => {
303
+ const llm = new MockLLMClient({
304
+ reply: '{"category":"register","confidence":0.7}',
305
+ });
306
+ const r = await classifyEmail(email({
307
+ from: [{ address: "service@unknown-bank.example" }],
308
+ subject: "Welcome to our service",
309
+ }), { llm });
310
+ expect(r.layer).toBe("L2");
311
+ expect(r.category).toBe(CATEGORIES.REGISTER);
312
+ expect(llm.calls).toHaveLength(1);
313
+ });
314
+
315
+ it("no LLM + low-confidence layer1 → returns layer1 as-is", async () => {
316
+ const r = await classifyEmail(email());
317
+ expect(r.layer).toBe("L1");
318
+ expect(r.category).toBe(CATEGORIES.OTHER);
319
+ });
320
+
321
+ it("disableLayer2 forces layer1-only even with LLM provided", async () => {
322
+ const llm = new MockLLMClient({ reply: '{"category":"order"}' });
323
+ const r = await classifyEmail(email(), { llm, disableLayer2: true });
324
+ expect(r.layer).toBe("L1");
325
+ expect(llm.calls).toHaveLength(0);
326
+ });
327
+
328
+ it("minLayer1Confidence threshold respected (0.99 forces layer2)", async () => {
329
+ const llm = new MockLLMClient({ reply: '{"category":"travel","confidence":0.7}' });
330
+ const r = await classifyEmail(email({
331
+ from: [{ address: "x@taobao.com" }],
332
+ subject: "订单",
333
+ }), { llm, minLayer1Confidence: 0.99 });
334
+ // taobao + 订单 hits 0.95 < 0.99 → layer 2 fires
335
+ expect(r.layer).toBe("L2");
336
+ });
337
+
338
+ it("ALL_CATEGORIES contains the 8 documented categories", () => {
339
+ expect(ALL_CATEGORIES).toHaveLength(8);
340
+ expect(ALL_CATEGORIES).toEqual(
341
+ expect.arrayContaining([
342
+ "bill_bank", "bill_credit", "order", "travel",
343
+ "government", "register", "notify", "other",
344
+ ])
345
+ );
346
+ });
347
+ });