@chainlesschain/personal-data-hub 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
- package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
- package/__tests__/adapters/ai-chat-history.test.js +396 -0
- package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
- package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
- package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
- package/__tests__/adapters/email-adapter.test.js +138 -1
- package/__tests__/adapters/email-classifier.test.js +347 -0
- package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
- package/__tests__/adapters/email-retry-progress.test.js +294 -0
- package/__tests__/adapters/email-templates.test.js +699 -0
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
- package/__tests__/adapters/system-data-adapter.test.js +440 -0
- package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
- package/__tests__/adapters/system-data-android.test.js +387 -0
- package/__tests__/adapters/system-data-disclosure.test.js +153 -0
- package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
- package/__tests__/adapters/wechat-env-probe.test.js +162 -0
- package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
- package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
- package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
- package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
- package/__tests__/analysis-skills.test.js +556 -0
- package/__tests__/analysis.test.js +329 -1
- package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
- package/__tests__/e2e/full-user-journey.test.js +188 -0
- package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
- package/__tests__/entity-resolver-stages.test.js +411 -0
- package/__tests__/entity-resolver-vault.test.js +246 -0
- package/__tests__/entity-resolver.test.js +526 -0
- package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
- package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
- package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
- package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
- package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
- package/__tests__/longtail-adapters.test.js +217 -0
- package/__tests__/mobile-extractor.test.js +288 -0
- package/__tests__/registry.test.js +4 -2
- package/__tests__/shopping-adapters.test.js +296 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
- package/__tests__/sidecar-supervisor.test.js +120 -0
- package/__tests__/social-adapters.test.js +206 -0
- package/__tests__/travel-adapters.test.js +325 -0
- package/__tests__/vault.test.js +3 -3
- package/__tests__/wechat-adapter.test.js +476 -0
- package/__tests__/whatsapp-adapter.test.js +135 -0
- package/lib/adapter-spec.js +12 -0
- package/lib/adapters/_python-sidecar-base.js +207 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
- package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
- package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
- package/lib/adapters/ai-chat-history/health-checker.js +210 -0
- package/lib/adapters/ai-chat-history/http-client.js +211 -0
- package/lib/adapters/ai-chat-history/index.js +28 -0
- package/lib/adapters/ai-chat-history/schema-map.js +258 -0
- package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
- package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
- package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
- package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
- package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
- package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
- package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
- package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
- package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
- package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
- package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
- package/lib/adapters/alipay-bill/counterparty.js +129 -0
- package/lib/adapters/alipay-bill/csv-parser.js +217 -0
- package/lib/adapters/alipay-bill/index.js +41 -0
- package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
- package/lib/adapters/email-imap/classifier.js +495 -0
- package/lib/adapters/email-imap/email-adapter.js +419 -8
- package/lib/adapters/email-imap/index.js +42 -0
- package/lib/adapters/email-imap/pdf-extractor.js +192 -0
- package/lib/adapters/email-imap/templates/bill.js +232 -0
- package/lib/adapters/email-imap/templates/government.js +120 -0
- package/lib/adapters/email-imap/templates/index.js +78 -0
- package/lib/adapters/email-imap/templates/order.js +186 -0
- package/lib/adapters/email-imap/templates/other.js +114 -0
- package/lib/adapters/email-imap/templates/register.js +113 -0
- package/lib/adapters/email-imap/templates/travel.js +157 -0
- package/lib/adapters/email-imap/templates/utils.js +275 -0
- package/lib/adapters/email-imap/transactions.js +234 -0
- package/lib/adapters/messaging-qq/index.js +158 -0
- package/lib/adapters/messaging-telegram/index.js +142 -0
- package/lib/adapters/messaging-whatsapp/index.js +189 -0
- package/lib/adapters/shopping-base/index.js +208 -0
- package/lib/adapters/shopping-jd/index.js +150 -0
- package/lib/adapters/shopping-meituan/index.js +154 -0
- package/lib/adapters/shopping-taobao/index.js +176 -0
- package/lib/adapters/social-bilibili/index.js +171 -0
- package/lib/adapters/social-douyin/index.js +116 -0
- package/lib/adapters/social-kuaishou/index.js +237 -0
- package/lib/adapters/social-toutiao/index.js +236 -0
- package/lib/adapters/social-weibo/index.js +164 -0
- package/lib/adapters/social-xiaohongshu/index.js +96 -0
- package/lib/adapters/system-data/disclosure.js +166 -0
- package/lib/adapters/system-data/index.js +34 -0
- package/lib/adapters/system-data/system-data-adapter.js +344 -0
- package/lib/adapters/system-data-android/adapter.js +348 -0
- package/lib/adapters/system-data-android/index.js +76 -0
- package/lib/adapters/travel-12306/index.js +151 -0
- package/lib/adapters/travel-amap/index.js +164 -0
- package/lib/adapters/travel-baidu-map/index.js +162 -0
- package/lib/adapters/travel-base/index.js +240 -0
- package/lib/adapters/travel-ctrip/index.js +151 -0
- package/lib/adapters/wechat/bootstrap.js +146 -0
- package/lib/adapters/wechat/content-parser.js +326 -0
- package/lib/adapters/wechat/db-reader.js +209 -0
- package/lib/adapters/wechat/env-probe.js +218 -0
- package/lib/adapters/wechat/frida-agent/loader.js +67 -0
- package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
- package/lib/adapters/wechat/index.js +37 -0
- package/lib/adapters/wechat/key-extractor.js +158 -0
- package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
- package/lib/adapters/wechat/key-providers/index.js +22 -0
- package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
- package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
- package/lib/adapters/wechat/normalize.js +220 -0
- package/lib/adapters/wechat/wechat-adapter.js +205 -0
- package/lib/analysis-skills/base.js +113 -0
- package/lib/analysis-skills/footprint.js +167 -0
- package/lib/analysis-skills/index.js +58 -0
- package/lib/analysis-skills/interests.js +161 -0
- package/lib/analysis-skills/relations.js +226 -0
- package/lib/analysis-skills/spending.js +219 -0
- package/lib/analysis-skills/timeline.js +167 -0
- package/lib/analysis.js +191 -2
- package/lib/entity-resolver/embedding-stage.js +198 -0
- package/lib/entity-resolver/entity-resolver.js +384 -0
- package/lib/entity-resolver/index.js +42 -0
- package/lib/entity-resolver/llm-stage.js +191 -0
- package/lib/entity-resolver/rule-stage.js +208 -0
- package/lib/entity-resolver/worker.js +149 -0
- package/lib/index.js +131 -0
- package/lib/migrations.js +73 -0
- package/lib/mobile-extractor/android.js +193 -0
- package/lib/mobile-extractor/index.js +9 -0
- package/lib/mobile-extractor/ios.js +223 -0
- package/lib/prompt-builder.js +11 -1
- package/lib/query-parser.js +7 -1
- package/lib/registry.js +42 -0
- package/lib/sidecar/index.js +15 -0
- package/lib/sidecar/supervisor.js +359 -0
- package/lib/vault.js +343 -0
- package/package.json +36 -3
- package/scripts/_make-fixture-all.js +126 -0
- package/scripts/_make-fixture-contacts.js +84 -0
- package/scripts/evaluate-entity-resolver.js +213 -0
- package/scripts/smoke-phase-5-5.js +196 -0
- package/scripts/smoke-phase-5-7.js +181 -0
- package/scripts/smoke-system-data-contacts.js +309 -0
- package/scripts/smoke-system-data.js +312 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Phase 8.8 — EntityResolver evaluation runner.
|
|
4
|
+
*
|
|
5
|
+
* Reads a labeled pair fixture, runs each pair through the configured
|
|
6
|
+
* stages, computes recall / accuracy / per-stage breakdown, and exits
|
|
7
|
+
* non-zero if the CI gate (recall ≥ 80%, accuracy ≥ 90%) fails.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node scripts/evaluate-entity-resolver.js \
|
|
11
|
+
* [--fixture <path>] [--use-embedding] [--use-llm] [--require-pass]
|
|
12
|
+
*
|
|
13
|
+
* Defaults:
|
|
14
|
+
* - fixture: __tests__/fixtures/entity-resolver-200-mock.json
|
|
15
|
+
* - --use-embedding: skipped unless flag set (needs Ollama running)
|
|
16
|
+
* - --use-llm: skipped unless flag set (needs Ollama + chat model)
|
|
17
|
+
* - --require-pass: exit 1 when gate fails (use in CI; otherwise warn-only)
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
"use strict";
|
|
21
|
+
|
|
22
|
+
const fs = require("node:fs");
|
|
23
|
+
const path = require("node:path");
|
|
24
|
+
|
|
25
|
+
const {
|
|
26
|
+
entityResolverRuleStage: ruleStage,
|
|
27
|
+
} = require("../lib/entity-resolver");
|
|
28
|
+
|
|
29
|
+
const args = parseArgs(process.argv.slice(2));
|
|
30
|
+
const fixturePath = args.fixture || path.join(
|
|
31
|
+
__dirname, "..", "__tests__", "fixtures", "entity-resolver-200-mock.json"
|
|
32
|
+
);
|
|
33
|
+
const useEmbedding = !!args["use-embedding"];
|
|
34
|
+
const useLlm = !!args["use-llm"];
|
|
35
|
+
const requirePass = !!args["require-pass"];
|
|
36
|
+
|
|
37
|
+
const RECALL_GATE = 0.80;
|
|
38
|
+
const ACCURACY_GATE = 0.90;
|
|
39
|
+
|
|
40
|
+
async function main() {
|
|
41
|
+
console.log("== EntityResolver evaluation ==");
|
|
42
|
+
console.log("fixture:", fixturePath);
|
|
43
|
+
if (!fs.existsSync(fixturePath)) {
|
|
44
|
+
console.error("\nFAIL: fixture not found");
|
|
45
|
+
process.exit(2);
|
|
46
|
+
}
|
|
47
|
+
const data = JSON.parse(fs.readFileSync(fixturePath, "utf-8"));
|
|
48
|
+
if (!Array.isArray(data.pairs) || data.pairs.length === 0) {
|
|
49
|
+
console.error("\nFAIL: fixture has no pairs");
|
|
50
|
+
process.exit(2);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const stages = { embedding: null, llm: null };
|
|
54
|
+
if (useEmbedding) {
|
|
55
|
+
const { EntityResolverEmbeddingStage } = require("../lib/entity-resolver");
|
|
56
|
+
stages.embedding = new EntityResolverEmbeddingStage({}).asStageFn();
|
|
57
|
+
console.log("embedding stage: Ollama nomic-embed-text @ localhost:11434");
|
|
58
|
+
}
|
|
59
|
+
if (useLlm) {
|
|
60
|
+
const { EntityResolverLLMStage } = require("../lib/entity-resolver");
|
|
61
|
+
const { OllamaClient } = require("../lib/llm-client");
|
|
62
|
+
const llm = new OllamaClient({ baseUrl: "http://localhost:11434", model: "qwen2.5:7b-instruct" });
|
|
63
|
+
stages.llm = new EntityResolverLLMStage({ llm }).asStageFn();
|
|
64
|
+
console.log("llm stage: Ollama qwen2.5:7b-instruct");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Per-stage counters
|
|
68
|
+
const counts = {
|
|
69
|
+
ruleSame: 0, ruleDifferent: 0, ruleUncertain: 0,
|
|
70
|
+
embeddingSame: 0, embeddingDifferent: 0, embeddingUncertain: 0,
|
|
71
|
+
llmSame: 0, llmDifferent: 0, llmMaybe: 0,
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
// Confusion matrix
|
|
75
|
+
const confusion = { tp: 0, fp: 0, fn: 0, tn: 0, unresolved: 0 };
|
|
76
|
+
|
|
77
|
+
// Per-pair breakdown
|
|
78
|
+
const results = [];
|
|
79
|
+
|
|
80
|
+
for (const pair of data.pairs) {
|
|
81
|
+
const truth = pair.groundTruth; // "same" | "different"
|
|
82
|
+
const ruleVerdict = ruleStage(pair.a, pair.b).verdict;
|
|
83
|
+
let finalVerdict = ruleVerdict;
|
|
84
|
+
let stage = "rule";
|
|
85
|
+
|
|
86
|
+
if (ruleVerdict === "same") counts.ruleSame += 1;
|
|
87
|
+
else if (ruleVerdict === "different") counts.ruleDifferent += 1;
|
|
88
|
+
else counts.ruleUncertain += 1;
|
|
89
|
+
|
|
90
|
+
if (ruleVerdict === "uncertain" && stages.embedding) {
|
|
91
|
+
const e = await stages.embedding(pair.a, pair.b);
|
|
92
|
+
if (e.sim >= 0.85) {
|
|
93
|
+
finalVerdict = "same";
|
|
94
|
+
counts.embeddingSame += 1;
|
|
95
|
+
stage = "embedding";
|
|
96
|
+
} else if (e.sim < 0.55) {
|
|
97
|
+
finalVerdict = "different";
|
|
98
|
+
counts.embeddingDifferent += 1;
|
|
99
|
+
stage = "embedding";
|
|
100
|
+
} else {
|
|
101
|
+
counts.embeddingUncertain += 1;
|
|
102
|
+
if (stages.llm) {
|
|
103
|
+
const v = await stages.llm(pair.a, pair.b);
|
|
104
|
+
if (v.verdict === "yes" && v.confidence >= 0.7) {
|
|
105
|
+
finalVerdict = "same";
|
|
106
|
+
counts.llmSame += 1;
|
|
107
|
+
stage = "llm";
|
|
108
|
+
} else if (v.verdict === "no" && v.confidence >= 0.7) {
|
|
109
|
+
finalVerdict = "different";
|
|
110
|
+
counts.llmDifferent += 1;
|
|
111
|
+
stage = "llm";
|
|
112
|
+
} else {
|
|
113
|
+
finalVerdict = "review";
|
|
114
|
+
counts.llmMaybe += 1;
|
|
115
|
+
stage = "llm-review";
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Tally confusion (only counting decided verdicts)
|
|
122
|
+
if (finalVerdict === "same" && truth === "same") confusion.tp += 1;
|
|
123
|
+
else if (finalVerdict === "same" && truth === "different") confusion.fp += 1;
|
|
124
|
+
else if (finalVerdict === "different" && truth === "same") confusion.fn += 1;
|
|
125
|
+
else if (finalVerdict === "different" && truth === "different") confusion.tn += 1;
|
|
126
|
+
else confusion.unresolved += 1;
|
|
127
|
+
|
|
128
|
+
results.push({ id: pair.id, truth, finalVerdict, stage, category: pair.category });
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const total = data.pairs.length;
|
|
132
|
+
const resolved = total - confusion.unresolved;
|
|
133
|
+
const accuracy = resolved > 0 ? (confusion.tp + confusion.tn) / resolved : 0;
|
|
134
|
+
const recall = (confusion.tp + confusion.fn) > 0
|
|
135
|
+
? confusion.tp / (confusion.tp + confusion.fn)
|
|
136
|
+
: 1;
|
|
137
|
+
const precision = (confusion.tp + confusion.fp) > 0
|
|
138
|
+
? confusion.tp / (confusion.tp + confusion.fp)
|
|
139
|
+
: 1;
|
|
140
|
+
const resolveRate = resolved / total;
|
|
141
|
+
|
|
142
|
+
// ── Report ──
|
|
143
|
+
console.log("\nPair counts:", { total, resolved, unresolved: confusion.unresolved });
|
|
144
|
+
console.log("\nPipeline stages:");
|
|
145
|
+
console.log(` Rule: same=${counts.ruleSame} different=${counts.ruleDifferent} uncertain=${counts.ruleUncertain}`);
|
|
146
|
+
if (stages.embedding) console.log(` Embedding: same=${counts.embeddingSame} different=${counts.embeddingDifferent} uncertain=${counts.embeddingUncertain}`);
|
|
147
|
+
if (stages.llm) console.log(` LLM: same=${counts.llmSame} different=${counts.llmDifferent} maybe=${counts.llmMaybe}`);
|
|
148
|
+
|
|
149
|
+
console.log("\nConfusion (decided only):");
|
|
150
|
+
console.log(` TP=${confusion.tp} FP=${confusion.fp} FN=${confusion.fn} TN=${confusion.tn}`);
|
|
151
|
+
console.log(` unresolved=${confusion.unresolved} (rule "uncertain" with no embedding/llm wired)`);
|
|
152
|
+
|
|
153
|
+
console.log("\nMetrics:");
|
|
154
|
+
const recallStr = (recall * 100).toFixed(1) + "%";
|
|
155
|
+
const accStr = (accuracy * 100).toFixed(1) + "%";
|
|
156
|
+
const precStr = (precision * 100).toFixed(1) + "%";
|
|
157
|
+
const resolveStr = (resolveRate * 100).toFixed(1) + "%";
|
|
158
|
+
console.log(` Recall: ${recallStr} (target ≥ ${(RECALL_GATE * 100).toFixed(0)}%) ${recall >= RECALL_GATE ? "✓" : "✗"}`);
|
|
159
|
+
console.log(` Accuracy: ${accStr} (target ≥ ${(ACCURACY_GATE * 100).toFixed(0)}%) ${accuracy >= ACCURACY_GATE ? "✓" : "✗"}`);
|
|
160
|
+
console.log(` Precision: ${precStr}`);
|
|
161
|
+
console.log(` Resolve rate: ${resolveStr}`);
|
|
162
|
+
|
|
163
|
+
// Per-category breakdown (useful for spotting weak spots)
|
|
164
|
+
const byCategory = {};
|
|
165
|
+
for (const r of results) {
|
|
166
|
+
const cat = r.category || "(uncat)";
|
|
167
|
+
if (!byCategory[cat]) byCategory[cat] = { total: 0, correct: 0, unresolved: 0 };
|
|
168
|
+
byCategory[cat].total += 1;
|
|
169
|
+
if (r.finalVerdict === "review" || r.finalVerdict === "uncertain") {
|
|
170
|
+
byCategory[cat].unresolved += 1;
|
|
171
|
+
} else if (r.finalVerdict === r.truth) {
|
|
172
|
+
byCategory[cat].correct += 1;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
console.log("\nPer-category:");
|
|
176
|
+
for (const cat of Object.keys(byCategory)) {
|
|
177
|
+
const s = byCategory[cat];
|
|
178
|
+
const accStr = s.total - s.unresolved > 0
|
|
179
|
+
? ((s.correct / (s.total - s.unresolved)) * 100).toFixed(0) + "%"
|
|
180
|
+
: "N/A";
|
|
181
|
+
console.log(` ${cat}: ${s.correct}/${s.total - s.unresolved} correct (${accStr}); ${s.unresolved} unresolved`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Gate decision
|
|
185
|
+
const passed = recall >= RECALL_GATE && accuracy >= ACCURACY_GATE;
|
|
186
|
+
console.log(`\n${passed ? "✓ PASS" : "✗ FAIL"} — recall ${recallStr} / accuracy ${accStr}`);
|
|
187
|
+
|
|
188
|
+
if (!passed && requirePass) {
|
|
189
|
+
process.exit(1);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function parseArgs(argv) {
|
|
194
|
+
const out = {};
|
|
195
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
196
|
+
const a = argv[i];
|
|
197
|
+
if (a.startsWith("--")) {
|
|
198
|
+
const k = a.slice(2);
|
|
199
|
+
if (argv[i + 1] && !argv[i + 1].startsWith("--")) {
|
|
200
|
+
out[k] = argv[i + 1];
|
|
201
|
+
i += 1;
|
|
202
|
+
} else {
|
|
203
|
+
out[k] = true;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
return out;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
main().catch((err) => {
|
|
211
|
+
console.error("\nFATAL:", err && err.message ? err.message : err);
|
|
212
|
+
process.exit(2);
|
|
213
|
+
});
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Phase 5.5 smoke — drives EmailAdapter end-to-end with a mock encrypted
|
|
4
|
+
* PDF attachment, exercising:
|
|
5
|
+
* - password-trial loop (3 wrong → 1 right)
|
|
6
|
+
* - text extraction
|
|
7
|
+
* - transactions regex (3 rows from a 招行-style statement)
|
|
8
|
+
* - merging transactions[] into bill template fields
|
|
9
|
+
* - per-attachment pdfExtraction summary
|
|
10
|
+
* - attachment buffer stripping before raw-event emission
|
|
11
|
+
*
|
|
12
|
+
* Uses an INJECTED pdfExtractor so the smoke runs without pulling the
|
|
13
|
+
* heavy pdfjs dep. The shape of the injected output matches what
|
|
14
|
+
* `extractPdfText` from pdf-extractor.js would return.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
"use strict";
|
|
18
|
+
|
|
19
|
+
const { EmailAdapter } = require("../lib/adapters/email-imap/email-adapter");
|
|
20
|
+
const { extractTransactions } = require("../lib/adapters/email-imap/transactions");
|
|
21
|
+
|
|
22
|
+
const PDF_TEXT = [
|
|
23
|
+
"招商银行信用卡 11 月对账单",
|
|
24
|
+
"持卡人: 张三 尾号 1234",
|
|
25
|
+
"账单周期: 2026-10-26 至 2026-11-25",
|
|
26
|
+
"最后还款日: 2026-12-05 应还金额: ¥3,000.00",
|
|
27
|
+
"",
|
|
28
|
+
"交易明细:",
|
|
29
|
+
"2026-10-30 星巴克 上海中山公园店 -39.00 2,961.00",
|
|
30
|
+
"2026-11-05 京东自营 -899.00 2,062.00",
|
|
31
|
+
"2026-11-12 退款 淘宝 +50.00 2,112.00",
|
|
32
|
+
"2026-11-18 美团外卖 -85.00 2,027.00",
|
|
33
|
+
"",
|
|
34
|
+
"第 1 页 共 1 页",
|
|
35
|
+
].join("\n");
|
|
36
|
+
|
|
37
|
+
const PDF_PASSWORDS = ["wrong1", "wrong2", "wrong3", "987654"];
|
|
38
|
+
|
|
39
|
+
function makeSession() {
|
|
40
|
+
const env = {
|
|
41
|
+
uid: 1,
|
|
42
|
+
internalDate: new Date("2026-11-26T10:00:00Z"),
|
|
43
|
+
flags: ["\\Seen"],
|
|
44
|
+
messageId: "<bill-cmb-11@x>",
|
|
45
|
+
subject: "招商银行信用卡 11 月对账单",
|
|
46
|
+
from: [{ name: "招商银行", address: "ebank@cmbchina.com" }],
|
|
47
|
+
to: [{ address: "me@example.com" }],
|
|
48
|
+
cc: [],
|
|
49
|
+
date: new Date("2026-11-26T10:00:00Z"),
|
|
50
|
+
size: 8192,
|
|
51
|
+
source: Buffer.from("RAW", "utf8"),
|
|
52
|
+
};
|
|
53
|
+
return () => ({
|
|
54
|
+
async connect() {},
|
|
55
|
+
async openMailbox(_name) {
|
|
56
|
+
return { uidValidity: 1, uidNext: 9999, exists: 1 };
|
|
57
|
+
},
|
|
58
|
+
async *fetchFullSince(sinceUid = 0) {
|
|
59
|
+
if (env.uid > sinceUid) yield env;
|
|
60
|
+
},
|
|
61
|
+
async close() {},
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let trialCount = 0;
|
|
66
|
+
async function mockPdfExtractor(buffer, opts) {
|
|
67
|
+
trialCount = 0;
|
|
68
|
+
for (const pw of ["", ...(opts.passwords || [])]) {
|
|
69
|
+
trialCount += 1;
|
|
70
|
+
if (pw === "987654") {
|
|
71
|
+
return {
|
|
72
|
+
decrypted: true,
|
|
73
|
+
text: PDF_TEXT,
|
|
74
|
+
password: pw,
|
|
75
|
+
attempted: trialCount,
|
|
76
|
+
wasEncrypted: true,
|
|
77
|
+
pageCount: 1,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
decrypted: false,
|
|
83
|
+
text: "",
|
|
84
|
+
attempted: trialCount,
|
|
85
|
+
wasEncrypted: true,
|
|
86
|
+
pageCount: 0,
|
|
87
|
+
error: "all passwords failed",
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async function main() {
|
|
92
|
+
console.log("== Phase 5.5 smoke ==");
|
|
93
|
+
|
|
94
|
+
// First validate the standalone transactions parser on the fixture text
|
|
95
|
+
console.log("\n— Standalone transactions regex —");
|
|
96
|
+
const standaloneTxns = extractTransactions(PDF_TEXT);
|
|
97
|
+
console.log(`extracted ${standaloneTxns.length} transactions:`);
|
|
98
|
+
for (const t of standaloneTxns) {
|
|
99
|
+
const dir = t.amount.direction || "?";
|
|
100
|
+
const date = new Date(t.occurredAtMs).toISOString().slice(0, 10);
|
|
101
|
+
console.log(` ${date} ${dir.padEnd(3)} ¥${t.amount.value.toFixed(2).padStart(8)} ${t.description}`);
|
|
102
|
+
}
|
|
103
|
+
if (standaloneTxns.length !== 4) {
|
|
104
|
+
console.log(`FAIL: expected 4 transactions, got ${standaloneTxns.length}`);
|
|
105
|
+
process.exitCode = 1;
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Now full pipeline
|
|
110
|
+
console.log("\n— Full adapter pipeline —");
|
|
111
|
+
|
|
112
|
+
const a = new EmailAdapter({
|
|
113
|
+
account: { provider: "qq", email: "me@qq.com", authCode: "x", folders: ["INBOX"] },
|
|
114
|
+
sessionFactory: makeSession(),
|
|
115
|
+
parser: async () => ({
|
|
116
|
+
textBody: "您的招商银行信用卡 11 月对账单已生成,详情见附件 PDF。",
|
|
117
|
+
attachments: [{
|
|
118
|
+
filename: "招行账单_11月.pdf",
|
|
119
|
+
contentType: "application/pdf",
|
|
120
|
+
contentDisposition: "attachment",
|
|
121
|
+
size: 78_456,
|
|
122
|
+
sha256: "abc123sha256deadbeef",
|
|
123
|
+
isInline: false,
|
|
124
|
+
isEncrypted: true,
|
|
125
|
+
buffer: Buffer.from("FAKE-PDF-BYTES-DO-NOT-LEAK"),
|
|
126
|
+
}],
|
|
127
|
+
}),
|
|
128
|
+
pdfExtractor: mockPdfExtractor,
|
|
129
|
+
pdfPasswordHints: { idCardLast6: "987654", phoneLast6: "555000" },
|
|
130
|
+
pdfPasswords: ["wrong1", "wrong2", "wrong3"], // tried before hints
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
console.log("adapter.version =", a.version);
|
|
134
|
+
console.log("adapter.capabilities =", a.capabilities.join(", "));
|
|
135
|
+
console.log("pdfPasswords (merged) =", a._pdfPasswords);
|
|
136
|
+
|
|
137
|
+
let count = 0;
|
|
138
|
+
for await (const raw of a.sync()) {
|
|
139
|
+
count += 1;
|
|
140
|
+
const ext = raw.payload.extraction;
|
|
141
|
+
console.log(`\nemail #${count} subject: ${raw.payload.subject}`);
|
|
142
|
+
console.log(" classification.category:", raw.payload.classification.category);
|
|
143
|
+
console.log(" extraction.template :", ext.template);
|
|
144
|
+
console.log(" extraction.confidence :", ext.confidence);
|
|
145
|
+
console.log(" extraction.fields keys :", Object.keys(ext.fields || {}).join(", "));
|
|
146
|
+
if (ext.fields.transactions) {
|
|
147
|
+
console.log(` transactions[] count : ${ext.fields.transactions.length}`);
|
|
148
|
+
for (const t of ext.fields.transactions) {
|
|
149
|
+
const date = new Date(t.occurredAtMs).toISOString().slice(0, 10);
|
|
150
|
+
console.log(` ${date} ¥${t.amount.value} ${t.amount.direction} ${t.description}`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
console.log(" pdfExtraction[] :");
|
|
154
|
+
for (const p of ext.pdfExtraction || []) {
|
|
155
|
+
console.log(` ${p.filename}: decrypted=${p.decrypted} attempted=${p.attempted} txns=${p.transactionsExtracted ?? "-"}`);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Buffer-leakage check
|
|
159
|
+
const serialized = JSON.stringify(raw);
|
|
160
|
+
if (serialized.includes("FAKE-PDF-BYTES-DO-NOT-LEAK")) {
|
|
161
|
+
console.log("\nBUFFER LEAK ✗ — raw PDF bytes survived into payload!");
|
|
162
|
+
process.exitCode = 1;
|
|
163
|
+
} else {
|
|
164
|
+
console.log(" buffer stripping : ✓ no PDF bytes in payload");
|
|
165
|
+
}
|
|
166
|
+
// Password-leakage check
|
|
167
|
+
if (serialized.match(/987654/)) {
|
|
168
|
+
console.log(" PASSWORD LEAK ✗ — real password survived into payload");
|
|
169
|
+
process.exitCode = 1;
|
|
170
|
+
} else {
|
|
171
|
+
console.log(" password redaction : ✓ real password not in payload");
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Normalize and confirm transactions land in extra.fields
|
|
175
|
+
const batch = a.normalize(raw);
|
|
176
|
+
const ev = batch.events[0];
|
|
177
|
+
if (ev.extra.fields && Array.isArray(ev.extra.fields.transactions)) {
|
|
178
|
+
console.log(` normalize → extra.fields.transactions: ${ev.extra.fields.transactions.length} rows ✓`);
|
|
179
|
+
} else {
|
|
180
|
+
console.log(" normalize MISSING transactions in extra.fields ✗");
|
|
181
|
+
process.exitCode = 1;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (count === 1 && !process.exitCode) {
|
|
186
|
+
console.log("\n== Phase 5.5 smoke PASSED ==");
|
|
187
|
+
} else if (!process.exitCode) {
|
|
188
|
+
console.log(`expected 1 email, got ${count}`);
|
|
189
|
+
process.exitCode = 1;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
main().catch((err) => {
|
|
194
|
+
console.error("smoke failed:", err);
|
|
195
|
+
process.exitCode = 1;
|
|
196
|
+
});
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Phase 5.7 smoke — exercises retry-with-backoff + onProgress through a
|
|
4
|
+
* flaky session. No real IMAP needed.
|
|
5
|
+
*
|
|
6
|
+
* 1. First 2 connect attempts throw ECONNRESET (transient)
|
|
7
|
+
* 2. 3rd attempt succeeds, yields 5 envelopes
|
|
8
|
+
* 3. Verify progress events fire in order: connecting → error → connecting
|
|
9
|
+
* → error → connecting → connected → mailbox-opened → fetching × 5 → done
|
|
10
|
+
* 4. Verify retry was capped (3 attempts total)
|
|
11
|
+
* 5. Verify AUTH_FAILED does NOT retry (separate run)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
"use strict";
|
|
15
|
+
|
|
16
|
+
const { EmailAdapter } = require("../lib/adapters/email-imap/email-adapter");
|
|
17
|
+
const { ImapAuthFailedError } = require("../lib/adapters/email-imap/imap-session");
|
|
18
|
+
|
|
19
|
+
function makeFlakyFactory(failuresFirst, envelopes) {
|
|
20
|
+
const failures = failuresFirst.slice();
|
|
21
|
+
const recorder = { attempts: 0 };
|
|
22
|
+
const factory = () => {
|
|
23
|
+
let openMb = null;
|
|
24
|
+
return {
|
|
25
|
+
async connect() {
|
|
26
|
+
recorder.attempts += 1;
|
|
27
|
+
if (failures.length > 0) {
|
|
28
|
+
throw failures.shift();
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
async openMailbox(name) {
|
|
32
|
+
openMb = name;
|
|
33
|
+
return { uidValidity: 1, uidNext: 9999, exists: envelopes.length };
|
|
34
|
+
},
|
|
35
|
+
async *fetchFullSince(sinceUid = 0) {
|
|
36
|
+
for (const env of envelopes) {
|
|
37
|
+
if (env.uid > sinceUid) yield { ...env, source: env.source || Buffer.alloc(0) };
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
async close() {},
|
|
41
|
+
};
|
|
42
|
+
};
|
|
43
|
+
return { factory, recorder };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function makeEnv(uid) {
|
|
47
|
+
return {
|
|
48
|
+
uid,
|
|
49
|
+
internalDate: new Date(`2026-05-${String(uid).padStart(2, "0")}T10:00:00Z`),
|
|
50
|
+
flags: ["\\Seen"],
|
|
51
|
+
messageId: `<m-${uid}@x>`,
|
|
52
|
+
subject: `Subject ${uid}`,
|
|
53
|
+
from: [{ address: `s${uid}@example.com` }],
|
|
54
|
+
to: [{ address: "me@example.com" }],
|
|
55
|
+
cc: [],
|
|
56
|
+
date: new Date(`2026-05-${String(uid).padStart(2, "0")}T10:00:00Z`),
|
|
57
|
+
size: 1024,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async function scenarioRetrySucceeds() {
|
|
62
|
+
console.log("\n=== Scenario A — 2 fails then success, 5 envelopes ===");
|
|
63
|
+
const transient = Object.assign(new Error("ECONNRESET"), { code: "ECONNRESET" });
|
|
64
|
+
const envs = [1, 2, 3, 4, 5].map(makeEnv);
|
|
65
|
+
const { factory, recorder } = makeFlakyFactory([transient, transient], envs);
|
|
66
|
+
|
|
67
|
+
const events = [];
|
|
68
|
+
const a = new EmailAdapter({
|
|
69
|
+
account: { provider: "qq", email: "me@qq.com", authCode: "x", folders: ["INBOX"] },
|
|
70
|
+
sessionFactory: factory,
|
|
71
|
+
parser: async () => ({ textBody: "", attachments: [] }),
|
|
72
|
+
maxConnectRetries: 3,
|
|
73
|
+
retryBaseDelayMs: 5,
|
|
74
|
+
onProgress: (e) => events.push(e),
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
console.log("adapter.version =", a.version);
|
|
78
|
+
console.log("adapter.capabilities (Phase 5.7) =", a.capabilities.filter((c) => c.startsWith("sync:")).join(", "));
|
|
79
|
+
|
|
80
|
+
const raws = [];
|
|
81
|
+
for await (const r of a.sync()) raws.push(r);
|
|
82
|
+
console.log(`emitted ${raws.length} raws (expected 5)`);
|
|
83
|
+
console.log(`connect attempts: ${recorder.attempts} (expected 3)`);
|
|
84
|
+
|
|
85
|
+
const phaseSeq = events.map((e) => `${e.phase}${e.attempt ? "(" + e.attempt + ")" : ""}`);
|
|
86
|
+
console.log("phase sequence:");
|
|
87
|
+
for (const p of phaseSeq) console.log(" -", p);
|
|
88
|
+
|
|
89
|
+
// Verify expected phase order
|
|
90
|
+
const errs = events.filter((e) => e.phase === "error");
|
|
91
|
+
if (errs.length !== 2) {
|
|
92
|
+
console.error(`FAIL: expected 2 error events, got ${errs.length}`);
|
|
93
|
+
process.exitCode = 1;
|
|
94
|
+
} else if (!errs.every((e) => e.retriable === true)) {
|
|
95
|
+
console.error("FAIL: error events should be retriable=true during first 2 attempts");
|
|
96
|
+
process.exitCode = 1;
|
|
97
|
+
} else {
|
|
98
|
+
console.log("error events: ✓ both marked retriable");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const done = events.find((e) => e.phase === "done");
|
|
102
|
+
if (!done || done.emitted !== 5) {
|
|
103
|
+
console.error(`FAIL: expected done event with emitted=5, got ${JSON.stringify(done)}`);
|
|
104
|
+
process.exitCode = 1;
|
|
105
|
+
} else {
|
|
106
|
+
console.log(`done event: ✓ emitted=${done.emitted} durationMs=${done.durationMs}`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const fetches = events.filter((e) => e.phase === "fetching");
|
|
110
|
+
if (fetches.length !== 5) {
|
|
111
|
+
console.error(`FAIL: expected 5 fetching events, got ${fetches.length}`);
|
|
112
|
+
process.exitCode = 1;
|
|
113
|
+
} else if (fetches[0].total !== 5 || fetches[4].current !== 5) {
|
|
114
|
+
console.error(`FAIL: fetching events should run 1..5 of 5`);
|
|
115
|
+
process.exitCode = 1;
|
|
116
|
+
} else {
|
|
117
|
+
console.log("fetching events: ✓ 5 events with current/total");
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async function scenarioAuthFailedNoRetry() {
|
|
122
|
+
console.log("\n=== Scenario B — AUTH_FAILED never retries ===");
|
|
123
|
+
const authErr = new ImapAuthFailedError("bad creds");
|
|
124
|
+
const { factory, recorder } = makeFlakyFactory(
|
|
125
|
+
[authErr, authErr, authErr], // shouldn't matter — first one stops us
|
|
126
|
+
[],
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
const events = [];
|
|
130
|
+
const a = new EmailAdapter({
|
|
131
|
+
account: { provider: "qq", email: "me@qq.com", authCode: "x", folders: ["INBOX"] },
|
|
132
|
+
sessionFactory: factory,
|
|
133
|
+
parser: async () => ({}),
|
|
134
|
+
maxConnectRetries: 3,
|
|
135
|
+
retryBaseDelayMs: 1,
|
|
136
|
+
onProgress: (e) => events.push(e),
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
let caught = null;
|
|
140
|
+
try {
|
|
141
|
+
for await (const _r of a.sync()) { /* drain */ }
|
|
142
|
+
} catch (err) {
|
|
143
|
+
caught = err;
|
|
144
|
+
}
|
|
145
|
+
console.log(`connect attempts: ${recorder.attempts} (expected 1)`);
|
|
146
|
+
console.log(`caught.code: ${caught && caught.code}`);
|
|
147
|
+
if (recorder.attempts !== 1) {
|
|
148
|
+
console.error("FAIL: AUTH_FAILED should not retry");
|
|
149
|
+
process.exitCode = 1;
|
|
150
|
+
} else if (!caught || caught.code !== "AUTH_FAILED") {
|
|
151
|
+
console.error("FAIL: error should propagate as AUTH_FAILED");
|
|
152
|
+
process.exitCode = 1;
|
|
153
|
+
} else {
|
|
154
|
+
console.log("AUTH short-circuit: ✓");
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const errEvent = events.find((e) => e.phase === "error");
|
|
158
|
+
if (!errEvent) {
|
|
159
|
+
console.error("FAIL: error progress event missing");
|
|
160
|
+
process.exitCode = 1;
|
|
161
|
+
} else if (errEvent.retriable !== false) {
|
|
162
|
+
console.error("FAIL: AUTH_FAILED error event should have retriable=false");
|
|
163
|
+
process.exitCode = 1;
|
|
164
|
+
} else {
|
|
165
|
+
console.log("AUTH error event: ✓ retriable=false");
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
async function main() {
|
|
170
|
+
console.log("== Phase 5.7 smoke (retry + progress) ==");
|
|
171
|
+
await scenarioRetrySucceeds();
|
|
172
|
+
await scenarioAuthFailedNoRetry();
|
|
173
|
+
if (!process.exitCode) {
|
|
174
|
+
console.log("\n== Phase 5.7 smoke PASSED ==");
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
main().catch((err) => {
|
|
179
|
+
console.error("smoke crashed:", err);
|
|
180
|
+
process.exitCode = 1;
|
|
181
|
+
});
|