@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Build a complete fixture directory with all 4 system-data sources:
4
+ *
5
+ * <out>/
6
+ * contacts2.db (raw_contacts + data + mimetypes)
7
+ * mmssms.db (sms)
8
+ * wifi/
9
+ * WifiConfigStore.xml
10
+ *
11
+ * Usage:
12
+ * node scripts/_make-fixture-all.js ./fixtures
13
+ */
14
+
15
+ "use strict";
16
+
17
+ const fs = require("node:fs");
18
+ const path = require("node:path");
19
+ const Database = require("better-sqlite3-multiple-ciphers");
20
+
21
+ const outDir = path.resolve(process.argv[2] || "./fixtures");
22
+ fs.mkdirSync(path.join(outDir, "wifi"), { recursive: true });
23
+
24
+ // ── contacts2.db ──────────────────────────────────────────────────────────
25
+ const contactsPath = path.join(outDir, "contacts2.db");
26
+ if (fs.existsSync(contactsPath)) fs.unlinkSync(contactsPath);
27
+ const contacts = new Database(contactsPath);
28
+ contacts.exec(`
29
+ CREATE TABLE raw_contacts (
30
+ _id INTEGER PRIMARY KEY, display_name TEXT, starred INTEGER DEFAULT 0, deleted INTEGER DEFAULT 0
31
+ );
32
+ CREATE TABLE mimetypes (_id INTEGER PRIMARY KEY, mimetype TEXT NOT NULL UNIQUE);
33
+ CREATE TABLE data (
34
+ _id INTEGER PRIMARY KEY, raw_contact_id INTEGER NOT NULL, mimetype_id INTEGER NOT NULL, data1 TEXT
35
+ );
36
+ CREATE TABLE calls (
37
+ _id INTEGER PRIMARY KEY, number TEXT, type INTEGER, duration INTEGER, date INTEGER, name TEXT, is_read INTEGER DEFAULT 1
38
+ );
39
+ `);
40
+ const MT = { phone: 5, email: 1, org: 4, note: 10, photo: 14 };
41
+ const mi = contacts.prepare("INSERT INTO mimetypes (_id, mimetype) VALUES (?, ?)");
42
+ mi.run(MT.phone, "vnd.android.cursor.item/phone_v2");
43
+ mi.run(MT.email, "vnd.android.cursor.item/email_v2");
44
+ mi.run(MT.org, "vnd.android.cursor.item/organization");
45
+ mi.run(MT.note, "vnd.android.cursor.item/note");
46
+ mi.run(MT.photo, "vnd.android.cursor.item/photo");
47
+ const ci = contacts.prepare(
48
+ "INSERT INTO raw_contacts (_id, display_name, starred, deleted) VALUES (?, ?, ?, 0)",
49
+ );
50
+ ci.run(1, "妈妈", 1);
51
+ ci.run(2, "张三", 0);
52
+ ci.run(3, "李四 Manager", 0);
53
+ ci.run(5, "工商银行客服", 0);
54
+ const di = contacts.prepare(
55
+ "INSERT INTO data (raw_contact_id, mimetype_id, data1) VALUES (?, ?, ?)",
56
+ );
57
+ di.run(1, MT.phone, "13800001111");
58
+ di.run(1, MT.phone, "13900002222");
59
+ di.run(1, MT.email, "mom@example.com");
60
+ di.run(1, MT.note, "亲妈,过年回家");
61
+ di.run(2, MT.phone, "13711112222");
62
+ di.run(3, MT.phone, "13822223333");
63
+ di.run(3, MT.email, "lisi@corp.example.com");
64
+ di.run(3, MT.org, "Example Corp");
65
+ di.run(5, MT.phone, "95588");
66
+ // Calls table inside contacts2.db (pre-Android-11 location)
67
+ const li = contacts.prepare(
68
+ "INSERT INTO calls (_id, number, type, duration, date, name, is_read) VALUES (?, ?, ?, ?, ?, ?, ?)",
69
+ );
70
+ li.run(1, "13800001111", 1, 120, 1737000000000, "妈妈", 1);
71
+ li.run(2, "13800001111", 2, 45, 1737010000000, "妈妈", 1);
72
+ li.run(3, "13999998888", 3, 0, 1737020000000, "", 0);
73
+ li.run(4, "10086", 1, 8, 1737030000000, "中国移动", 1);
74
+ contacts.close();
75
+ console.log("wrote:", contactsPath);
76
+
77
+ // ── mmssms.db ─────────────────────────────────────────────────────────────
78
+ const smsPath = path.join(outDir, "mmssms.db");
79
+ if (fs.existsSync(smsPath)) fs.unlinkSync(smsPath);
80
+ const sms = new Database(smsPath);
81
+ sms.exec(`
82
+ CREATE TABLE sms (
83
+ _id INTEGER PRIMARY KEY, thread_id INTEGER, address TEXT, body TEXT,
84
+ type INTEGER, date INTEGER, read INTEGER
85
+ );
86
+ `);
87
+ const si = sms.prepare(
88
+ "INSERT INTO sms (_id, thread_id, address, body, type, date, read) VALUES (?, ?, ?, ?, ?, ?, ?)",
89
+ );
90
+ si.run(1, 100, "13800001111", "妈妈我到家了", 2, 1737000000000, 1);
91
+ si.run(2, 100, "13800001111", "好的,注意安全", 1, 1737000010000, 1);
92
+ si.run(3, 200, "10086", "【中国移动】您的话费余额为 ¥36.50", 1, 1737000020000, 1);
93
+ si.run(4, 300, "95588", "【工商银行】您的验证码为 123456,3 分钟内有效", 1, 1737000030000, 0);
94
+ sms.close();
95
+ console.log("wrote:", smsPath);
96
+
97
+ // ── wifi/WifiConfigStore.xml ──────────────────────────────────────────────
98
+ const wifiXml = path.join(outDir, "wifi", "WifiConfigStore.xml");
99
+ fs.writeFileSync(
100
+ wifiXml,
101
+ `<?xml version='1.0' encoding='UTF-8'?>
102
+ <WifiConfigStoreData>
103
+ <NetworkList>
104
+ <Network>
105
+ <WifiConfiguration>
106
+ <string name="SSID">"Home_5G"</string>
107
+ <string name="PreSharedKey">"secret"</string>
108
+ <string name="KeyMgmt">WPA-PSK</string>
109
+ <boolean name="HiddenSSID">false</boolean>
110
+ </WifiConfiguration>
111
+ </Network>
112
+ <Network>
113
+ <WifiConfiguration>
114
+ <string name="SSID">"Starbucks Free"</string>
115
+ <string name="KeyMgmt">NONE</string>
116
+ <boolean name="HiddenSSID">false</boolean>
117
+ </WifiConfiguration>
118
+ </Network>
119
+ </NetworkList>
120
+ </WifiConfigStoreData>
121
+ `,
122
+ "utf-8",
123
+ );
124
+ console.log("wrote:", wifiXml);
125
+
126
+ console.log("\nAll fixtures ready under:", outDir);
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Build a synthetic Android contacts2.db at the given path.
4
+ *
5
+ * Only used by the smoke runner / docs walkthrough — production code never
6
+ * relies on this. Mirrors the fixture from
7
+ * packages/personal-data-hub-bridge/tests/test_parsers_system_contacts.py.
8
+ *
9
+ * Usage:
10
+ * node scripts/_make-fixture-contacts.js ./fixtures/contacts2.db
11
+ */
12
+
13
+ "use strict";
14
+
15
+ const fs = require("node:fs");
16
+ const path = require("node:path");
17
+ const Database = require("better-sqlite3-multiple-ciphers");
18
+
19
+ const target = path.resolve(process.argv[2] || "./fixtures/contacts2.db");
20
+ fs.mkdirSync(path.dirname(target), { recursive: true });
21
+ if (fs.existsSync(target)) fs.unlinkSync(target);
22
+
23
+ const db = new Database(target);
24
+ try {
25
+ db.exec(`
26
+ CREATE TABLE raw_contacts (
27
+ _id INTEGER PRIMARY KEY,
28
+ display_name TEXT,
29
+ starred INTEGER DEFAULT 0,
30
+ deleted INTEGER DEFAULT 0
31
+ );
32
+ CREATE TABLE mimetypes (
33
+ _id INTEGER PRIMARY KEY,
34
+ mimetype TEXT NOT NULL UNIQUE
35
+ );
36
+ CREATE TABLE data (
37
+ _id INTEGER PRIMARY KEY,
38
+ raw_contact_id INTEGER NOT NULL,
39
+ mimetype_id INTEGER NOT NULL,
40
+ data1 TEXT
41
+ );
42
+ `);
43
+
44
+ const MT = {
45
+ phone: 5,
46
+ email: 1,
47
+ org: 4,
48
+ note: 10,
49
+ photo: 14,
50
+ };
51
+ const insertMime = db.prepare(
52
+ "INSERT INTO mimetypes (_id, mimetype) VALUES (?, ?)",
53
+ );
54
+ insertMime.run(MT.phone, "vnd.android.cursor.item/phone_v2");
55
+ insertMime.run(MT.email, "vnd.android.cursor.item/email_v2");
56
+ insertMime.run(MT.org, "vnd.android.cursor.item/organization");
57
+ insertMime.run(MT.note, "vnd.android.cursor.item/note");
58
+ insertMime.run(MT.photo, "vnd.android.cursor.item/photo");
59
+
60
+ const insertC = db.prepare(
61
+ "INSERT INTO raw_contacts (_id, display_name, starred, deleted) VALUES (?, ?, ?, 0)",
62
+ );
63
+ insertC.run(1, "妈妈", 1);
64
+ insertC.run(2, "张三", 0);
65
+ insertC.run(3, "李四 Manager", 0);
66
+ insertC.run(4, "", 0); // nameless — skipped by parser
67
+ insertC.run(5, "工商银行客服", 0);
68
+
69
+ const insertD = db.prepare(
70
+ "INSERT INTO data (raw_contact_id, mimetype_id, data1) VALUES (?, ?, ?)",
71
+ );
72
+ insertD.run(1, MT.phone, "13800001111");
73
+ insertD.run(1, MT.phone, "13900002222");
74
+ insertD.run(1, MT.email, "mom@example.com");
75
+ insertD.run(1, MT.note, "亲妈,过年回家");
76
+ insertD.run(2, MT.phone, "13711112222");
77
+ insertD.run(3, MT.phone, "13822223333");
78
+ insertD.run(3, MT.email, "lisi@corp.example.com");
79
+ insertD.run(3, MT.org, "Example Corp");
80
+ insertD.run(5, MT.phone, "95588");
81
+ } finally {
82
+ db.close();
83
+ }
84
+ console.log("fixture written:", target);
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Phase 8.8 — EntityResolver evaluation runner.
4
+ *
5
+ * Reads a labeled pair fixture, runs each pair through the configured
6
+ * stages, computes recall / accuracy / per-stage breakdown, and exits
7
+ * non-zero if the CI gate (recall ≥ 80%, accuracy ≥ 90%) fails.
8
+ *
9
+ * Usage:
10
+ * node scripts/evaluate-entity-resolver.js \
11
+ * [--fixture <path>] [--use-embedding] [--use-llm] [--require-pass]
12
+ *
13
+ * Defaults:
14
+ * - fixture: __tests__/fixtures/entity-resolver-200-mock.json
15
+ * - --use-embedding: skipped unless flag set (needs Ollama running)
16
+ * - --use-llm: skipped unless flag set (needs Ollama + chat model)
17
+ * - --require-pass: exit 1 when gate fails (use in CI; otherwise warn-only)
18
+ */
19
+
20
+ "use strict";
21
+
22
+ const fs = require("node:fs");
23
+ const path = require("node:path");
24
+
25
+ const {
26
+ entityResolverRuleStage: ruleStage,
27
+ } = require("../lib/entity-resolver");
28
+
29
+ const args = parseArgs(process.argv.slice(2));
30
+ const fixturePath = args.fixture || path.join(
31
+ __dirname, "..", "__tests__", "fixtures", "entity-resolver-200-mock.json"
32
+ );
33
+ const useEmbedding = !!args["use-embedding"];
34
+ const useLlm = !!args["use-llm"];
35
+ const requirePass = !!args["require-pass"];
36
+
37
+ const RECALL_GATE = 0.80;
38
+ const ACCURACY_GATE = 0.90;
39
+
40
+ async function main() {
41
+ console.log("== EntityResolver evaluation ==");
42
+ console.log("fixture:", fixturePath);
43
+ if (!fs.existsSync(fixturePath)) {
44
+ console.error("\nFAIL: fixture not found");
45
+ process.exit(2);
46
+ }
47
+ const data = JSON.parse(fs.readFileSync(fixturePath, "utf-8"));
48
+ if (!Array.isArray(data.pairs) || data.pairs.length === 0) {
49
+ console.error("\nFAIL: fixture has no pairs");
50
+ process.exit(2);
51
+ }
52
+
53
+ const stages = { embedding: null, llm: null };
54
+ if (useEmbedding) {
55
+ const { EntityResolverEmbeddingStage } = require("../lib/entity-resolver");
56
+ stages.embedding = new EntityResolverEmbeddingStage({}).asStageFn();
57
+ console.log("embedding stage: Ollama nomic-embed-text @ localhost:11434");
58
+ }
59
+ if (useLlm) {
60
+ const { EntityResolverLLMStage } = require("../lib/entity-resolver");
61
+ const { OllamaClient } = require("../lib/llm-client");
62
+ const llm = new OllamaClient({ baseUrl: "http://localhost:11434", model: "qwen2.5:7b-instruct" });
63
+ stages.llm = new EntityResolverLLMStage({ llm }).asStageFn();
64
+ console.log("llm stage: Ollama qwen2.5:7b-instruct");
65
+ }
66
+
67
+ // Per-stage counters
68
+ const counts = {
69
+ ruleSame: 0, ruleDifferent: 0, ruleUncertain: 0,
70
+ embeddingSame: 0, embeddingDifferent: 0, embeddingUncertain: 0,
71
+ llmSame: 0, llmDifferent: 0, llmMaybe: 0,
72
+ };
73
+
74
+ // Confusion matrix
75
+ const confusion = { tp: 0, fp: 0, fn: 0, tn: 0, unresolved: 0 };
76
+
77
+ // Per-pair breakdown
78
+ const results = [];
79
+
80
+ for (const pair of data.pairs) {
81
+ const truth = pair.groundTruth; // "same" | "different"
82
+ const ruleVerdict = ruleStage(pair.a, pair.b).verdict;
83
+ let finalVerdict = ruleVerdict;
84
+ let stage = "rule";
85
+
86
+ if (ruleVerdict === "same") counts.ruleSame += 1;
87
+ else if (ruleVerdict === "different") counts.ruleDifferent += 1;
88
+ else counts.ruleUncertain += 1;
89
+
90
+ if (ruleVerdict === "uncertain" && stages.embedding) {
91
+ const e = await stages.embedding(pair.a, pair.b);
92
+ if (e.sim >= 0.85) {
93
+ finalVerdict = "same";
94
+ counts.embeddingSame += 1;
95
+ stage = "embedding";
96
+ } else if (e.sim < 0.55) {
97
+ finalVerdict = "different";
98
+ counts.embeddingDifferent += 1;
99
+ stage = "embedding";
100
+ } else {
101
+ counts.embeddingUncertain += 1;
102
+ if (stages.llm) {
103
+ const v = await stages.llm(pair.a, pair.b);
104
+ if (v.verdict === "yes" && v.confidence >= 0.7) {
105
+ finalVerdict = "same";
106
+ counts.llmSame += 1;
107
+ stage = "llm";
108
+ } else if (v.verdict === "no" && v.confidence >= 0.7) {
109
+ finalVerdict = "different";
110
+ counts.llmDifferent += 1;
111
+ stage = "llm";
112
+ } else {
113
+ finalVerdict = "review";
114
+ counts.llmMaybe += 1;
115
+ stage = "llm-review";
116
+ }
117
+ }
118
+ }
119
+ }
120
+
121
+ // Tally confusion (only counting decided verdicts)
122
+ if (finalVerdict === "same" && truth === "same") confusion.tp += 1;
123
+ else if (finalVerdict === "same" && truth === "different") confusion.fp += 1;
124
+ else if (finalVerdict === "different" && truth === "same") confusion.fn += 1;
125
+ else if (finalVerdict === "different" && truth === "different") confusion.tn += 1;
126
+ else confusion.unresolved += 1;
127
+
128
+ results.push({ id: pair.id, truth, finalVerdict, stage, category: pair.category });
129
+ }
130
+
131
+ const total = data.pairs.length;
132
+ const resolved = total - confusion.unresolved;
133
+ const accuracy = resolved > 0 ? (confusion.tp + confusion.tn) / resolved : 0;
134
+ const recall = (confusion.tp + confusion.fn) > 0
135
+ ? confusion.tp / (confusion.tp + confusion.fn)
136
+ : 1;
137
+ const precision = (confusion.tp + confusion.fp) > 0
138
+ ? confusion.tp / (confusion.tp + confusion.fp)
139
+ : 1;
140
+ const resolveRate = resolved / total;
141
+
142
+ // ── Report ──
143
+ console.log("\nPair counts:", { total, resolved, unresolved: confusion.unresolved });
144
+ console.log("\nPipeline stages:");
145
+ console.log(` Rule: same=${counts.ruleSame} different=${counts.ruleDifferent} uncertain=${counts.ruleUncertain}`);
146
+ if (stages.embedding) console.log(` Embedding: same=${counts.embeddingSame} different=${counts.embeddingDifferent} uncertain=${counts.embeddingUncertain}`);
147
+ if (stages.llm) console.log(` LLM: same=${counts.llmSame} different=${counts.llmDifferent} maybe=${counts.llmMaybe}`);
148
+
149
+ console.log("\nConfusion (decided only):");
150
+ console.log(` TP=${confusion.tp} FP=${confusion.fp} FN=${confusion.fn} TN=${confusion.tn}`);
151
+ console.log(` unresolved=${confusion.unresolved} (rule "uncertain" with no embedding/llm wired)`);
152
+
153
+ console.log("\nMetrics:");
154
+ const recallStr = (recall * 100).toFixed(1) + "%";
155
+ const accStr = (accuracy * 100).toFixed(1) + "%";
156
+ const precStr = (precision * 100).toFixed(1) + "%";
157
+ const resolveStr = (resolveRate * 100).toFixed(1) + "%";
158
+ console.log(` Recall: ${recallStr} (target ≥ ${(RECALL_GATE * 100).toFixed(0)}%) ${recall >= RECALL_GATE ? "✓" : "✗"}`);
159
+ console.log(` Accuracy: ${accStr} (target ≥ ${(ACCURACY_GATE * 100).toFixed(0)}%) ${accuracy >= ACCURACY_GATE ? "✓" : "✗"}`);
160
+ console.log(` Precision: ${precStr}`);
161
+ console.log(` Resolve rate: ${resolveStr}`);
162
+
163
+ // Per-category breakdown (useful for spotting weak spots)
164
+ const byCategory = {};
165
+ for (const r of results) {
166
+ const cat = r.category || "(uncat)";
167
+ if (!byCategory[cat]) byCategory[cat] = { total: 0, correct: 0, unresolved: 0 };
168
+ byCategory[cat].total += 1;
169
+ if (r.finalVerdict === "review" || r.finalVerdict === "uncertain") {
170
+ byCategory[cat].unresolved += 1;
171
+ } else if (r.finalVerdict === r.truth) {
172
+ byCategory[cat].correct += 1;
173
+ }
174
+ }
175
+ console.log("\nPer-category:");
176
+ for (const cat of Object.keys(byCategory)) {
177
+ const s = byCategory[cat];
178
+ const accStr = s.total - s.unresolved > 0
179
+ ? ((s.correct / (s.total - s.unresolved)) * 100).toFixed(0) + "%"
180
+ : "N/A";
181
+ console.log(` ${cat}: ${s.correct}/${s.total - s.unresolved} correct (${accStr}); ${s.unresolved} unresolved`);
182
+ }
183
+
184
+ // Gate decision
185
+ const passed = recall >= RECALL_GATE && accuracy >= ACCURACY_GATE;
186
+ console.log(`\n${passed ? "✓ PASS" : "✗ FAIL"} — recall ${recallStr} / accuracy ${accStr}`);
187
+
188
+ if (!passed && requirePass) {
189
+ process.exit(1);
190
+ }
191
+ }
192
+
193
+ function parseArgs(argv) {
194
+ const out = {};
195
+ for (let i = 0; i < argv.length; i += 1) {
196
+ const a = argv[i];
197
+ if (a.startsWith("--")) {
198
+ const k = a.slice(2);
199
+ if (argv[i + 1] && !argv[i + 1].startsWith("--")) {
200
+ out[k] = argv[i + 1];
201
+ i += 1;
202
+ } else {
203
+ out[k] = true;
204
+ }
205
+ }
206
+ }
207
+ return out;
208
+ }
209
+
210
+ main().catch((err) => {
211
+ console.error("\nFATAL:", err && err.message ? err.message : err);
212
+ process.exit(2);
213
+ });
@@ -0,0 +1,196 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Phase 5.5 smoke — drives EmailAdapter end-to-end with a mock encrypted
4
+ * PDF attachment, exercising:
5
+ * - password-trial loop (3 wrong → 1 right)
6
+ * - text extraction
7
+ * - transactions regex (3 rows from a 招行-style statement)
8
+ * - merging transactions[] into bill template fields
9
+ * - per-attachment pdfExtraction summary
10
+ * - attachment buffer stripping before raw-event emission
11
+ *
12
+ * Uses an INJECTED pdfExtractor so the smoke runs without pulling the
13
+ * heavy pdfjs dep. The shape of the injected output matches what
14
+ * `extractPdfText` from pdf-extractor.js would return.
15
+ */
16
+
17
+ "use strict";
18
+
19
+ const { EmailAdapter } = require("../lib/adapters/email-imap/email-adapter");
20
+ const { extractTransactions } = require("../lib/adapters/email-imap/transactions");
21
+
22
+ const PDF_TEXT = [
23
+ "招商银行信用卡 11 月对账单",
24
+ "持卡人: 张三 尾号 1234",
25
+ "账单周期: 2026-10-26 至 2026-11-25",
26
+ "最后还款日: 2026-12-05 应还金额: ¥3,000.00",
27
+ "",
28
+ "交易明细:",
29
+ "2026-10-30 星巴克 上海中山公园店 -39.00 2,961.00",
30
+ "2026-11-05 京东自营 -899.00 2,062.00",
31
+ "2026-11-12 退款 淘宝 +50.00 2,112.00",
32
+ "2026-11-18 美团外卖 -85.00 2,027.00",
33
+ "",
34
+ "第 1 页 共 1 页",
35
+ ].join("\n");
36
+
37
+ const PDF_PASSWORDS = ["wrong1", "wrong2", "wrong3", "987654"];
38
+
39
+ function makeSession() {
40
+ const env = {
41
+ uid: 1,
42
+ internalDate: new Date("2026-11-26T10:00:00Z"),
43
+ flags: ["\\Seen"],
44
+ messageId: "<bill-cmb-11@x>",
45
+ subject: "招商银行信用卡 11 月对账单",
46
+ from: [{ name: "招商银行", address: "ebank@cmbchina.com" }],
47
+ to: [{ address: "me@example.com" }],
48
+ cc: [],
49
+ date: new Date("2026-11-26T10:00:00Z"),
50
+ size: 8192,
51
+ source: Buffer.from("RAW", "utf8"),
52
+ };
53
+ return () => ({
54
+ async connect() {},
55
+ async openMailbox(_name) {
56
+ return { uidValidity: 1, uidNext: 9999, exists: 1 };
57
+ },
58
+ async *fetchFullSince(sinceUid = 0) {
59
+ if (env.uid > sinceUid) yield env;
60
+ },
61
+ async close() {},
62
+ });
63
+ }
64
+
65
+ let trialCount = 0;
66
+ async function mockPdfExtractor(buffer, opts) {
67
+ trialCount = 0;
68
+ for (const pw of ["", ...(opts.passwords || [])]) {
69
+ trialCount += 1;
70
+ if (pw === "987654") {
71
+ return {
72
+ decrypted: true,
73
+ text: PDF_TEXT,
74
+ password: pw,
75
+ attempted: trialCount,
76
+ wasEncrypted: true,
77
+ pageCount: 1,
78
+ };
79
+ }
80
+ }
81
+ return {
82
+ decrypted: false,
83
+ text: "",
84
+ attempted: trialCount,
85
+ wasEncrypted: true,
86
+ pageCount: 0,
87
+ error: "all passwords failed",
88
+ };
89
+ }
90
+
91
+ async function main() {
92
+ console.log("== Phase 5.5 smoke ==");
93
+
94
+ // First validate the standalone transactions parser on the fixture text
95
+ console.log("\n— Standalone transactions regex —");
96
+ const standaloneTxns = extractTransactions(PDF_TEXT);
97
+ console.log(`extracted ${standaloneTxns.length} transactions:`);
98
+ for (const t of standaloneTxns) {
99
+ const dir = t.amount.direction || "?";
100
+ const date = new Date(t.occurredAtMs).toISOString().slice(0, 10);
101
+ console.log(` ${date} ${dir.padEnd(3)} ¥${t.amount.value.toFixed(2).padStart(8)} ${t.description}`);
102
+ }
103
+ if (standaloneTxns.length !== 4) {
104
+ console.log(`FAIL: expected 4 transactions, got ${standaloneTxns.length}`);
105
+ process.exitCode = 1;
106
+ return;
107
+ }
108
+
109
+ // Now full pipeline
110
+ console.log("\n— Full adapter pipeline —");
111
+
112
+ const a = new EmailAdapter({
113
+ account: { provider: "qq", email: "me@qq.com", authCode: "x", folders: ["INBOX"] },
114
+ sessionFactory: makeSession(),
115
+ parser: async () => ({
116
+ textBody: "您的招商银行信用卡 11 月对账单已生成,详情见附件 PDF。",
117
+ attachments: [{
118
+ filename: "招行账单_11月.pdf",
119
+ contentType: "application/pdf",
120
+ contentDisposition: "attachment",
121
+ size: 78_456,
122
+ sha256: "abc123sha256deadbeef",
123
+ isInline: false,
124
+ isEncrypted: true,
125
+ buffer: Buffer.from("FAKE-PDF-BYTES-DO-NOT-LEAK"),
126
+ }],
127
+ }),
128
+ pdfExtractor: mockPdfExtractor,
129
+ pdfPasswordHints: { idCardLast6: "987654", phoneLast6: "555000" },
130
+ pdfPasswords: ["wrong1", "wrong2", "wrong3"], // tried before hints
131
+ });
132
+
133
+ console.log("adapter.version =", a.version);
134
+ console.log("adapter.capabilities =", a.capabilities.join(", "));
135
+ console.log("pdfPasswords (merged) =", a._pdfPasswords);
136
+
137
+ let count = 0;
138
+ for await (const raw of a.sync()) {
139
+ count += 1;
140
+ const ext = raw.payload.extraction;
141
+ console.log(`\nemail #${count} subject: ${raw.payload.subject}`);
142
+ console.log(" classification.category:", raw.payload.classification.category);
143
+ console.log(" extraction.template :", ext.template);
144
+ console.log(" extraction.confidence :", ext.confidence);
145
+ console.log(" extraction.fields keys :", Object.keys(ext.fields || {}).join(", "));
146
+ if (ext.fields.transactions) {
147
+ console.log(` transactions[] count : ${ext.fields.transactions.length}`);
148
+ for (const t of ext.fields.transactions) {
149
+ const date = new Date(t.occurredAtMs).toISOString().slice(0, 10);
150
+ console.log(` ${date} ¥${t.amount.value} ${t.amount.direction} ${t.description}`);
151
+ }
152
+ }
153
+ console.log(" pdfExtraction[] :");
154
+ for (const p of ext.pdfExtraction || []) {
155
+ console.log(` ${p.filename}: decrypted=${p.decrypted} attempted=${p.attempted} txns=${p.transactionsExtracted ?? "-"}`);
156
+ }
157
+
158
+ // Buffer-leakage check
159
+ const serialized = JSON.stringify(raw);
160
+ if (serialized.includes("FAKE-PDF-BYTES-DO-NOT-LEAK")) {
161
+ console.log("\nBUFFER LEAK ✗ — raw PDF bytes survived into payload!");
162
+ process.exitCode = 1;
163
+ } else {
164
+ console.log(" buffer stripping : ✓ no PDF bytes in payload");
165
+ }
166
+ // Password-leakage check
167
+ if (serialized.match(/987654/)) {
168
+ console.log(" PASSWORD LEAK ✗ — real password survived into payload");
169
+ process.exitCode = 1;
170
+ } else {
171
+ console.log(" password redaction : ✓ real password not in payload");
172
+ }
173
+
174
+ // Normalize and confirm transactions land in extra.fields
175
+ const batch = a.normalize(raw);
176
+ const ev = batch.events[0];
177
+ if (ev.extra.fields && Array.isArray(ev.extra.fields.transactions)) {
178
+ console.log(` normalize → extra.fields.transactions: ${ev.extra.fields.transactions.length} rows ✓`);
179
+ } else {
180
+ console.log(" normalize MISSING transactions in extra.fields ✗");
181
+ process.exitCode = 1;
182
+ }
183
+ }
184
+
185
+ if (count === 1 && !process.exitCode) {
186
+ console.log("\n== Phase 5.5 smoke PASSED ==");
187
+ } else if (!process.exitCode) {
188
+ console.log(`expected 1 email, got ${count}`);
189
+ process.exitCode = 1;
190
+ }
191
+ }
192
+
193
+ main().catch((err) => {
194
+ console.error("smoke failed:", err);
195
+ process.exitCode = 1;
196
+ });