@chainlesschain/personal-data-hub 0.3.9 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -25
- package/__tests__/adapters/apple-health.test.js +95 -0
- package/__tests__/adapters/email-templates.test.js +123 -0
- package/__tests__/adapters/family-23-collectors-scaffold.test.js +178 -0
- package/__tests__/adapters/game-genshin-scaffold.test.js +107 -0
- package/__tests__/adapters/git-activity.test.js +7 -1
- package/__tests__/adapters/local-im-pc.test.js +149 -0
- package/__tests__/adapters/netease-music.test.js +74 -0
- package/__tests__/adapters/qq-pc-direct-read.test.js +186 -0
- package/__tests__/adapters/system-data-adapter.test.js +4 -1
- package/__tests__/adapters/wechat-pc-direct-read.test.js +207 -0
- package/__tests__/adapters/weread.test.js +123 -0
- package/__tests__/analysis.test.js +120 -15
- package/__tests__/mobile-extractor-encrypted.test.js +460 -0
- package/__tests__/prompt-builder.test.js +25 -0
- package/__tests__/registry-readiness.test.js +233 -0
- package/__tests__/social-douyin-im-direct-read.test.js +311 -0
- package/__tests__/social-douyin-snapshot.test.js +5 -2
- package/__tests__/vault.test.js +99 -0
- package/lib/adapter-guide.js +520 -0
- package/lib/adapter-readiness.js +257 -0
- package/lib/adapters/_local-im-db-reader.js +218 -0
- package/lib/adapters/_local-im-pc-adapter.js +162 -0
- package/lib/adapters/apple-health/index.js +329 -0
- package/lib/adapters/dingtalk-pc/index.js +29 -0
- package/lib/adapters/edu-huawei-learning/api-client.js +47 -0
- package/lib/adapters/edu-huawei-learning/index.js +255 -0
- package/lib/adapters/edu-zuoyebang/api-client.js +48 -0
- package/lib/adapters/edu-zuoyebang/index.js +259 -0
- package/lib/adapters/email-imap/email-adapter.js +16 -0
- package/lib/adapters/email-imap/templates/bill.js +174 -18
- package/lib/adapters/feishu-pc/index.js +29 -0
- package/lib/adapters/finance-alipay/api-client.js +48 -0
- package/lib/adapters/finance-alipay/index.js +257 -0
- package/lib/adapters/game-genshin/api-client.js +59 -0
- package/lib/adapters/game-genshin/index.js +274 -0
- package/lib/adapters/game-honor-of-kings/api-client.js +54 -0
- package/lib/adapters/game-honor-of-kings/index.js +259 -0
- package/lib/adapters/netease-music/index.js +227 -0
- package/lib/adapters/qq-pc/index.js +200 -0
- package/lib/adapters/qq-pc/nt-db-reader.js +210 -0
- package/lib/adapters/social-douyin/index.js +194 -1
- package/lib/adapters/wechat/wechat-adapter.js +7 -1
- package/lib/adapters/wechat-pc/index.js +335 -0
- package/lib/adapters/wechat-pc/pc-db-reader.js +327 -0
- package/lib/adapters/weread/api-client.js +128 -0
- package/lib/adapters/weread/index.js +337 -0
- package/lib/analysis.js +65 -0
- package/lib/index.js +39 -0
- package/lib/mobile-extractor/bplist.js +233 -0
- package/lib/mobile-extractor/ios-backup-crypto.js +315 -0
- package/lib/mobile-extractor/ios.js +131 -16
- package/lib/prompt-builder.js +11 -1
- package/lib/registry.js +170 -0
- package/lib/vault.js +105 -0
- package/package.json +1 -1
- package/scripts/run-native-tests-sandbox.sh +2 -0
- package/vitest.config.js +79 -1
package/README.md
CHANGED
|
@@ -4,25 +4,38 @@ Personal Data Hub — UnifiedSchema, validators, batch helpers, SQLCipher
|
|
|
4
4
|
LocalVault, and AdapterRegistry for the "data back to the individual"
|
|
5
5
|
middleware.
|
|
6
6
|
|
|
7
|
-
> **
|
|
8
|
-
>
|
|
9
|
-
>
|
|
10
|
-
>
|
|
11
|
-
>
|
|
12
|
-
>
|
|
13
|
-
> Phase
|
|
14
|
-
>
|
|
15
|
-
>
|
|
16
|
-
>
|
|
17
|
-
>
|
|
18
|
-
>
|
|
19
|
-
>
|
|
20
|
-
>
|
|
21
|
-
>
|
|
22
|
-
>
|
|
23
|
-
>
|
|
24
|
-
>
|
|
25
|
-
>
|
|
7
|
+
> **v0.4.0 (ships with ChainlessChain v5.0.3.99, 2026-06-08).** Phase 0–13
|
|
8
|
+
> of the 13-phase plan in
|
|
9
|
+
> [`docs/design/Personal_Data_Hub_Architecture.md`](../../docs/design/Personal_Data_Hub_Architecture.md)
|
|
10
|
+
> have landed, plus the multi-platform collection layer. The foundation is
|
|
11
|
+
> unchanged: schema + validation + UUID v7 (Phase 0); SQLCipher LocalVault +
|
|
12
|
+
> pluggable key providers + migrations (Phase 1); AdapterRegistry + KG/RAG
|
|
13
|
+
> derivation (Phase 2); the natural-language AnalysisEngine with a hard
|
|
14
|
+
> privacy gate that refuses non-local LLMs unless the caller opts in
|
|
15
|
+
> (Phase 3); and production bridges — **CcLLMAdapter** (wraps cc llm-manager:
|
|
16
|
+
> Ollama / Volcengine / Anthropic / Gemini / DeepSeek), **CcKgSink**, **CcRagSink**
|
|
17
|
+
> — injected at the desktop/CLI entry so this package stays decoupled (Phase 3.5).
|
|
18
|
+
>
|
|
19
|
+
> **51 adapters are now live** (no longer "later phases"): Email IMAP,
|
|
20
|
+
> Alipay bill, 9 AI-chat vendors, WeChat / QQ / Weibo / Bilibili / Douyin /
|
|
21
|
+
> Xiaohongshu / Toutiao / Kuaishou social, Telegram / WhatsApp messaging,
|
|
22
|
+
> Taobao / JD / Meituan / Pinduoduo shopping, Amap / Baidu-map / Tencent-map /
|
|
23
|
+
> Ctrip / 12306 travel, system-data (contacts / calls / sms / location),
|
|
24
|
+
> and the developer-activity set (git / shell / vscode / browser-history /
|
|
25
|
+
> local-files / win-recent).
|
|
26
|
+
>
|
|
27
|
+
> **New in v0.4.0 (v5.0.3.99):** adapter **readiness** — split out from the
|
|
28
|
+
> loose `healthCheck` sync gate into a real ready/needs_setup/unavailable
|
|
29
|
+
> judgment (`registry.readiness()`) with a one-line reason, so "config looks
|
|
30
|
+
> fine but nothing collects" is no longer silent; an `adapter-guide.js`
|
|
31
|
+
> single-source of import steps reused across web-shell / desktop / CLI /
|
|
32
|
+
> Android; new local-direct-read sources (Douyin, WeChat PC, QQ-NT, DingTalk,
|
|
33
|
+
> Feishu, WeRead, Apple Health, NetEase Music); email-bill LLM gap-fill
|
|
34
|
+
> (Phase 5.5); and iOS encrypted-backup decryption (Phase 7.5b).
|
|
35
|
+
>
|
|
36
|
+
> Editing `lib/**` requires bumping the package version + `npm publish` +
|
|
37
|
+
> the Android `USR_VERSION` sentinel, or real devices keep running stale code
|
|
38
|
+
> (see hidden-risk-traps #27/#28).
|
|
26
39
|
|
|
27
40
|
## What's in here
|
|
28
41
|
|
|
@@ -40,6 +53,14 @@ lib/
|
|
|
40
53
|
│ typed put/get, queryEvents, watermarks, audit, key
|
|
41
54
|
│ rotation (WAL-safe), destroy
|
|
42
55
|
├── adapter-spec.js PersonalDataAdapter contract + assertAdapter check
|
|
56
|
+
├── adapter-readiness.js readiness() — ready/needs_setup/unavailable + reason,
|
|
57
|
+
│ split out from the loose healthCheck sync gate
|
|
58
|
+
├── adapter-guide.js category-driven import guides (single source of import
|
|
59
|
+
│ steps reused across web-shell / desktop / CLI / Android)
|
|
60
|
+
├── adapters/ 51 live adapters (email-imap, alipay-bill, ai-chat-history,
|
|
61
|
+
│ wechat / wechat-pc, qq-pc, dingtalk-pc, feishu-pc, weread,
|
|
62
|
+
│ apple-health, netease-music, social-*, shopping-*,
|
|
63
|
+
│ travel-*, system-data, git-activity, vscode, ...)
|
|
43
64
|
├── kg-derive.js UnifiedSchema → KG triples (rdf:type / by / involves /
|
|
44
65
|
│ happened-at / etc.) — engine-agnostic
|
|
45
66
|
├── rag-derive.js UnifiedSchema → RAG (text, metadata) docs for indexing
|
|
@@ -217,7 +238,7 @@ cd packages/personal-data-hub
|
|
|
217
238
|
npm test
|
|
218
239
|
```
|
|
219
240
|
|
|
220
|
-
**
|
|
241
|
+
**2040 tests** across 121 files covering ID generation, all 5 entity validators,
|
|
221
242
|
batch helpers, key providers, vault open/migrations, entity round-trips,
|
|
222
243
|
transactional putBatch with rollback, raw_events archive, queryEvents
|
|
223
244
|
filters + pagination, sync watermarks, audit log, key rotation (WAL-safe),
|
|
@@ -230,11 +251,10 @@ tolerance), and the 1k events <30s ingest perf gate.
|
|
|
230
251
|
|
|
231
252
|
| Concern | Lives in |
|
|
232
253
|
|-----------------------|---------------------------------------------------|
|
|
233
|
-
| Platform KeyProviders (DPAPI/Keychain/Keystore) |
|
|
234
|
-
|
|
|
235
|
-
|
|
|
236
|
-
|
|
|
237
|
-
| AI analysis skills | Phase 11 — `skills/personal-analysis-*/` |
|
|
254
|
+
| Platform KeyProviders (DPAPI/Keychain/Keystore) | desktop-app-vue main-process bridge (the package ships the contract + InMemory/File providers) |
|
|
255
|
+
| Qdrant vector retrieval | wired into the existing RAG engine at the cc entry (BM25 derivation ships here) |
|
|
256
|
+
| AI analysis skills | `skills/personal-analysis-*/` (the 5 built-in analysis skills) |
|
|
257
|
+
| Native SQLCipher build | `better-sqlite3-multiple-ciphers` — host/Electron ABI dual-load handled at the cc entry |
|
|
238
258
|
|
|
239
259
|
## License
|
|
240
260
|
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
|
|
5
|
+
const { AppleHealthAdapter } = require("../../lib/adapters/apple-health");
|
|
6
|
+
const { partitionBatch } = require("../../lib/batch");
|
|
7
|
+
|
|
8
|
+
const XML = [
|
|
9
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
10
|
+
'<HealthData locale="zh_CN">',
|
|
11
|
+
' <Record type="HKQuantityTypeIdentifierStepCount" sourceName="iPhone" unit="count" creationDate="2024-01-15 08:36:00 +0800" startDate="2024-01-15 08:30:00 +0800" endDate="2024-01-15 08:35:00 +0800" value="123"/>',
|
|
12
|
+
' <Record type="HKCategoryTypeIdentifierSleepAnalysis" sourceName="Watch" startDate="2024-01-15 23:00:00 +0800" endDate="2024-01-16 07:00:00 +0800" value="HKCategoryValueSleepAnalysisAsleep"/>',
|
|
13
|
+
' <Workout workoutActivityType="HKWorkoutActivityTypeRunning" duration="30" durationUnit="min" totalDistance="5" totalDistanceUnit="km" startDate="2024-01-15 18:00:00 +0800" endDate="2024-01-15 18:30:00 +0800"/>',
|
|
14
|
+
' <SomethingElse foo="bar"/>',
|
|
15
|
+
"</HealthData>",
|
|
16
|
+
].join("\n");
|
|
17
|
+
|
|
18
|
+
function adapter(xml = XML, { exists = true } = {}) {
|
|
19
|
+
const a = new AppleHealthAdapter();
|
|
20
|
+
a._deps.fs = {
|
|
21
|
+
existsSync: () => exists,
|
|
22
|
+
readFileSync: () => xml,
|
|
23
|
+
accessSync: () => {},
|
|
24
|
+
constants: { R_OK: 4 },
|
|
25
|
+
};
|
|
26
|
+
return a;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async function collect(iter) {
|
|
30
|
+
const out = [];
|
|
31
|
+
for await (const r of iter) out.push(r);
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
describe("AppleHealthAdapter", () => {
|
|
36
|
+
it("readinessOnly → NO_FILE (file-import, not 手机采集)", async () => {
|
|
37
|
+
const a = new AppleHealthAdapter();
|
|
38
|
+
const r = await a.authenticate({ readinessOnly: true });
|
|
39
|
+
expect(r.reason).toBe("NO_FILE");
|
|
40
|
+
expect(a.extractMode).toBe("file-import");
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("parses Record + Workout lines, ignores other elements", async () => {
|
|
44
|
+
const raws = await collect(adapter().sync({ inputPath: "/fake/export.xml" }));
|
|
45
|
+
expect(raws.map((r) => r.kind)).toEqual(["record", "record", "workout"]);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("normalizes to valid events (metrics → other, workout → trip)", async () => {
|
|
49
|
+
const a = adapter();
|
|
50
|
+
const raws = await collect(a.sync({ inputPath: "/fake/export.xml" }));
|
|
51
|
+
const merged = { events: [], persons: [], places: [], items: [], topics: [] };
|
|
52
|
+
for (const r of raws) {
|
|
53
|
+
const n = a.normalize(r);
|
|
54
|
+
for (const k of Object.keys(merged)) merged[k].push(...n[k]);
|
|
55
|
+
}
|
|
56
|
+
const { valid, invalidReasons } = partitionBatch(merged);
|
|
57
|
+
expect(invalidReasons).toHaveLength(0);
|
|
58
|
+
expect(valid.events).toHaveLength(3);
|
|
59
|
+
const subtypes = valid.events.map((e) => e.subtype).sort();
|
|
60
|
+
expect(subtypes).toEqual(["other", "other", "trip"]);
|
|
61
|
+
const steps = valid.events.find((e) => e.extra.metric === "HKQuantityTypeIdentifierStepCount");
|
|
62
|
+
expect(steps.content.title).toContain("步数");
|
|
63
|
+
expect(steps.content.title).toContain("123");
|
|
64
|
+
const workout = valid.events.find((e) => e.subtype === "trip");
|
|
65
|
+
expect(workout.extra.activityType).toBe("Running");
|
|
66
|
+
expect(workout.content.title).toContain("5km");
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("parses the +0800 timezone offset correctly", async () => {
|
|
70
|
+
const a = adapter();
|
|
71
|
+
const raws = await collect(a.sync({ inputPath: "/fake/export.xml" }));
|
|
72
|
+
// 2024-01-15 08:30:00 +0800 == 2024-01-15T00:30:00Z
|
|
73
|
+
expect(raws[0].capturedAt).toBe(Date.parse("2024-01-15T00:30:00Z"));
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("respects limit + include", async () => {
|
|
77
|
+
const a = adapter();
|
|
78
|
+
const capped = await collect(a.sync({ inputPath: "/x", limit: 1 }));
|
|
79
|
+
expect(capped).toHaveLength(1);
|
|
80
|
+
const noWorkout = await collect(a.sync({ inputPath: "/x", include: { workout: false } }));
|
|
81
|
+
expect(noWorkout.every((r) => r.kind === "record")).toBe(true);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("emits truncated progress when maxRecords exceeded", async () => {
|
|
85
|
+
const a = adapter();
|
|
86
|
+
const events = [];
|
|
87
|
+
await collect(a.sync({ inputPath: "/x", maxRecords: 1, onProgress: (e) => events.push(e) }));
|
|
88
|
+
expect(events.find((e) => e.phase === "truncated")).toBeTruthy();
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("missing file yields nothing", async () => {
|
|
92
|
+
const raws = await collect(adapter(XML, { exists: false }).sync({ inputPath: "/x" }));
|
|
93
|
+
expect(raws).toHaveLength(0);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
@@ -262,6 +262,129 @@ describe("extractBill — bank statement", () => {
|
|
|
262
262
|
});
|
|
263
263
|
});
|
|
264
264
|
|
|
265
|
+
// ─── bill.js Phase 5.5 — LLM gap-fill ─────────────────────────────────────
|
|
266
|
+
|
|
267
|
+
describe("extractBill — Phase 5.5 LLM gap-fill", () => {
|
|
268
|
+
// A body the regex can't crack (HTML-stripped marketing-style prose with
|
|
269
|
+
// no recognizable keywords) so coverage stays under 0.6 and the LLM fires.
|
|
270
|
+
// from:[] so even `institution` (otherwise derived from sender domain)
|
|
271
|
+
// is missing — keeps regex coverage at 0 so the LLM path is exercised.
|
|
272
|
+
const opaqueEmail = () => emailOf({
|
|
273
|
+
from: [],
|
|
274
|
+
subject: "Your statement is ready",
|
|
275
|
+
textBody: "Hello, your latest statement is now available. Please sign in to view the details of your account activity for this period.",
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
const llmReturning = (obj, sink) => ({
|
|
279
|
+
async chat(messages, _opts) {
|
|
280
|
+
if (sink) sink.messages = messages;
|
|
281
|
+
return { text: JSON.stringify(obj) };
|
|
282
|
+
},
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
it("fills missing fields from LLM when regex coverage < 60%", async () => {
|
|
286
|
+
const r = await extractBill(opaqueEmail(), {
|
|
287
|
+
llm: llmReturning({
|
|
288
|
+
amount: { value: 1234.5, currency: "CNY" },
|
|
289
|
+
dueAmount: { value: 1000, currency: "CNY" },
|
|
290
|
+
dueDate: "2026-12-20",
|
|
291
|
+
billingPeriod: { start: "2026-11-01", end: "2026-11-30" },
|
|
292
|
+
accountIdentifier: "6225 8801 2345 6789",
|
|
293
|
+
institution: "Example Bank",
|
|
294
|
+
billingMonth: "2026-11",
|
|
295
|
+
}),
|
|
296
|
+
});
|
|
297
|
+
expect(r.fields.amount.value).toBe(1234.5);
|
|
298
|
+
expect(r.fields.amount.direction).toBe("out");
|
|
299
|
+
expect(r.fields.dueAmount.value).toBe(1000);
|
|
300
|
+
expect(r.fields.dueDate).toBeGreaterThan(0);
|
|
301
|
+
expect(r.fields.billingPeriod.startMs).toBeLessThan(r.fields.billingPeriod.endMs);
|
|
302
|
+
// accountIdentifier coerced to last-4 only — never the full PAN
|
|
303
|
+
expect(r.fields.accountIdentifier).toBe("**** 6789");
|
|
304
|
+
expect(r.fields.institution).toBe("Example Bank");
|
|
305
|
+
expect(r.fields.billingMonth).toBe("2026-11");
|
|
306
|
+
expect(r.llmFilled).toEqual(
|
|
307
|
+
expect.arrayContaining(["amount", "dueAmount", "dueDate", "accountIdentifier", "institution", "billingMonth"]),
|
|
308
|
+
);
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
it("regex wins: LLM fills only the gaps, never overwrites a regex field", async () => {
|
|
312
|
+
// Only an amount is regex-extractable → coverage 1/7 < 0.6, LLM fires.
|
|
313
|
+
const r = await extractBill(emailOf({
|
|
314
|
+
from: [],
|
|
315
|
+
subject: "statement",
|
|
316
|
+
textBody: "您的账单金额为 ¥3,256.78。",
|
|
317
|
+
}), {
|
|
318
|
+
llm: llmReturning({ amount: { value: 99999, currency: "USD" }, institution: "LLM Bank" }),
|
|
319
|
+
});
|
|
320
|
+
// regex amount retained, LLM's bogus 99999/USD ignored
|
|
321
|
+
expect(r.fields.amount.value).toBe(3256.78);
|
|
322
|
+
expect(r.fields.amount.currency).toBe("CNY");
|
|
323
|
+
// institution was missing → LLM allowed to fill it
|
|
324
|
+
expect(r.fields.institution).toBe("LLM Bank");
|
|
325
|
+
expect(r.llmFilled).toEqual(["institution"]);
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
it("does NOT call the LLM when regex coverage already ≥ 60%", async () => {
|
|
329
|
+
let called = false;
|
|
330
|
+
const r = await extractBill(emailOf({
|
|
331
|
+
from: [{ address: "ebank@ccb.com.cn" }],
|
|
332
|
+
subject: "建设银行 11 月对账单",
|
|
333
|
+
textBody: "本期应还金额 ¥800 元,尾号 5555,账单周期 2026-10-01 至 2026-10-31,最后还款日 2026-11-25。",
|
|
334
|
+
}), {
|
|
335
|
+
llm: { async chat() { called = true; return { text: "{}" }; } },
|
|
336
|
+
});
|
|
337
|
+
expect(called).toBe(false);
|
|
338
|
+
expect(r.llmFilled).toBeUndefined();
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
it("drops malformed LLM values (bad dates, zero amounts, short account)", async () => {
|
|
342
|
+
const r = await extractBill(opaqueEmail(), {
|
|
343
|
+
llm: llmReturning({
|
|
344
|
+
amount: { value: 0 }, // non-positive → dropped
|
|
345
|
+
dueDate: "2026-13-45", // impossible date → dropped
|
|
346
|
+
accountIdentifier: "12", // < 4 digits → dropped
|
|
347
|
+
billingMonth: "2026-99", // bad month → dropped
|
|
348
|
+
institution: " ", // blank → dropped
|
|
349
|
+
}),
|
|
350
|
+
});
|
|
351
|
+
expect(r.fields.amount).toBeUndefined();
|
|
352
|
+
expect(r.fields.dueDate).toBeUndefined();
|
|
353
|
+
expect(r.fields.accountIdentifier).toBeUndefined();
|
|
354
|
+
expect(r.fields.billingMonth).toBeUndefined();
|
|
355
|
+
expect(r.fields.institution).toBeUndefined();
|
|
356
|
+
expect(r.llmFilled).toBeUndefined();
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
it("records a warning when the LLM returns unparseable output", async () => {
|
|
360
|
+
const r = await extractBill(opaqueEmail(), {
|
|
361
|
+
llm: { async chat() { return { text: "sorry, I can't help with that" }; } },
|
|
362
|
+
});
|
|
363
|
+
expect(r.warnings.some((w) => w.includes("not parseable JSON"))).toBe(true);
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it("records a warning when the LLM call throws", async () => {
|
|
367
|
+
const r = await extractBill(opaqueEmail(), {
|
|
368
|
+
llm: { async chat() { throw new Error("rate limited"); } },
|
|
369
|
+
});
|
|
370
|
+
expect(r.warnings.some((w) => w.includes("LLM bill fill failed") && w.includes("rate limited"))).toBe(true);
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
it("tolerates LLM output wrapped in ```json fences", async () => {
|
|
374
|
+
const r = await extractBill(opaqueEmail(), {
|
|
375
|
+
llm: { async chat() { return { text: "```json\n{\"institution\":\"Fenced Bank\"}\n```" }; } },
|
|
376
|
+
});
|
|
377
|
+
expect(r.fields.institution).toBe("Fenced Bank");
|
|
378
|
+
expect(r.llmFilled).toEqual(["institution"]);
|
|
379
|
+
});
|
|
380
|
+
|
|
381
|
+
it("no LLM provided → behaves exactly as before (no llmFilled, no warning)", async () => {
|
|
382
|
+
const r = await extractBill(opaqueEmail());
|
|
383
|
+
expect(r.llmFilled).toBeUndefined();
|
|
384
|
+
expect(r.warnings.every((w) => !w.includes("LLM"))).toBe(true);
|
|
385
|
+
});
|
|
386
|
+
});
|
|
387
|
+
|
|
265
388
|
// ─── order.js ───────────────────────────────────────────────────────────
|
|
266
389
|
|
|
267
390
|
describe("extractOrder — e-commerce", () => {
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FAMILY-23 v0.1 — 王者荣耀 / 作业帮 / 支付宝 / 华为学习中心 collector scaffold 测试。
|
|
3
|
+
* 各平台: 契约 + extractUid + sync NO_INPUT throw + snapshot sync + normalize → 合法 batch。
|
|
4
|
+
*/
|
|
5
|
+
import { describe, it, expect } from "vitest";
|
|
6
|
+
const fs = require("node:fs");
|
|
7
|
+
const os = require("node:os");
|
|
8
|
+
const path = require("node:path");
|
|
9
|
+
const lib = require("../../lib");
|
|
10
|
+
const { assertAdapter } = require("../../lib/adapter-spec");
|
|
11
|
+
const { validateBatch } = require("../../lib/batch");
|
|
12
|
+
|
|
13
|
+
const {
|
|
14
|
+
HonorOfKingsApiClient,
|
|
15
|
+
} = require("../../lib/adapters/game-honor-of-kings/api-client");
|
|
16
|
+
const {
|
|
17
|
+
ZuoyebangApiClient,
|
|
18
|
+
} = require("../../lib/adapters/edu-zuoyebang/api-client");
|
|
19
|
+
const { AlipayApiClient } = require("../../lib/adapters/finance-alipay/api-client");
|
|
20
|
+
const {
|
|
21
|
+
HuaweiLearningApiClient,
|
|
22
|
+
} = require("../../lib/adapters/edu-huawei-learning/api-client");
|
|
23
|
+
|
|
24
|
+
function writeSnapshot(obj) {
|
|
25
|
+
const p = path.join(
|
|
26
|
+
os.tmpdir(),
|
|
27
|
+
`fam23-snap-${Date.now()}-${Math.random().toString(36).slice(2)}.json`,
|
|
28
|
+
);
|
|
29
|
+
fs.writeFileSync(p, JSON.stringify(obj), "utf-8");
|
|
30
|
+
return p;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// platform → { Adapter, name, sensitivity, eventKind, eventSubtype, idKey }
|
|
34
|
+
const PLATFORMS = [
|
|
35
|
+
{
|
|
36
|
+
label: "王者荣耀",
|
|
37
|
+
Adapter: lib.HonorOfKingsAdapter,
|
|
38
|
+
name: "game-honor-of-kings",
|
|
39
|
+
sensitivity: "medium",
|
|
40
|
+
eventKind: "play",
|
|
41
|
+
eventSubtype: "media",
|
|
42
|
+
idKey: "hok-uid",
|
|
43
|
+
sample: { kind: "play", id: "p1", durationMs: 1800000, mode: "rank", startAt: 1700000000000 },
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
label: "作业帮",
|
|
47
|
+
Adapter: lib.ZuoyebangAdapter,
|
|
48
|
+
name: "edu-zuoyebang",
|
|
49
|
+
sensitivity: "medium",
|
|
50
|
+
eventKind: "study",
|
|
51
|
+
eventSubtype: "other",
|
|
52
|
+
idKey: "zuoyebang-uid",
|
|
53
|
+
sample: { kind: "study", id: "s1", subject: "math", durationMs: 1200000, startAt: 1700000000000 },
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
label: "支付宝",
|
|
57
|
+
Adapter: lib.AlipayAdapter,
|
|
58
|
+
name: "finance-alipay",
|
|
59
|
+
sensitivity: "high",
|
|
60
|
+
eventKind: "order",
|
|
61
|
+
eventSubtype: "payment",
|
|
62
|
+
idKey: "alipay-uid",
|
|
63
|
+
sample: { kind: "order", id: "o1", merchant: "便利店", amountFen: 350, direction: "out", startAt: 1700000000000 },
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
label: "华为学习中心",
|
|
67
|
+
Adapter: lib.HuaweiLearningAdapter,
|
|
68
|
+
name: "edu-huawei-learning",
|
|
69
|
+
sensitivity: "medium",
|
|
70
|
+
eventKind: "study",
|
|
71
|
+
eventSubtype: "other",
|
|
72
|
+
idKey: "huawei-learning-uid",
|
|
73
|
+
sample: { kind: "study", id: "h1", course: "物理", durationMs: 900000, startAt: 1700000000000 },
|
|
74
|
+
},
|
|
75
|
+
];
|
|
76
|
+
|
|
77
|
+
for (const P of PLATFORMS) {
|
|
78
|
+
describe(`${P.label} (${P.name}) — FAMILY-23 v0.1`, () => {
|
|
79
|
+
it("contract conformance + sensitivity", () => {
|
|
80
|
+
const a = new P.Adapter();
|
|
81
|
+
expect(assertAdapter(a).ok).toBe(true);
|
|
82
|
+
expect(a.name).toBe(P.name);
|
|
83
|
+
expect(a.version).toBe("0.1.0");
|
|
84
|
+
expect(a.dataDisclosure.sensitivity).toBe(P.sensitivity);
|
|
85
|
+
expect(a.capabilities).toContain("sync:snapshot");
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it("sync throws NO_INPUT without inputPath", async () => {
|
|
89
|
+
const a = new P.Adapter();
|
|
90
|
+
await expect(async () => {
|
|
91
|
+
for await (const _ of a.sync({})) void _;
|
|
92
|
+
}).rejects.toThrow(/inputPath/);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("sync via snapshot yields profile + platform event", async () => {
|
|
96
|
+
const snapPath = writeSnapshot({
|
|
97
|
+
schemaVersion: 1,
|
|
98
|
+
snapshottedAt: 1700000000000,
|
|
99
|
+
account: { uid: "12345", displayName: "kid" },
|
|
100
|
+
events: [
|
|
101
|
+
{ kind: "profile", id: "profile-12345", uid: "12345", nickname: "kid" },
|
|
102
|
+
P.sample,
|
|
103
|
+
],
|
|
104
|
+
});
|
|
105
|
+
try {
|
|
106
|
+
const a = new P.Adapter();
|
|
107
|
+
const raws = [];
|
|
108
|
+
for await (const r of a.sync({ inputPath: snapPath })) raws.push(r);
|
|
109
|
+
expect(raws).toHaveLength(2);
|
|
110
|
+
expect(raws[0].kind).toBe("profile");
|
|
111
|
+
expect(raws[1].kind).toBe(P.eventKind);
|
|
112
|
+
} finally {
|
|
113
|
+
fs.unlinkSync(snapPath);
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("normalize profile → person-self with platform uid; event → valid subtype", () => {
|
|
118
|
+
const a = new P.Adapter();
|
|
119
|
+
const profileBatch = a.normalize({
|
|
120
|
+
adapter: P.name,
|
|
121
|
+
kind: "profile",
|
|
122
|
+
originalId: `${P.name}:profile:12345`,
|
|
123
|
+
capturedAt: 1700000000000,
|
|
124
|
+
payload: { kind: "profile", uid: "12345", nickname: "kid" },
|
|
125
|
+
});
|
|
126
|
+
expect(validateBatch(profileBatch).valid).toBe(true);
|
|
127
|
+
expect(profileBatch.persons[0].subtype).toBe("self");
|
|
128
|
+
expect(profileBatch.persons[0].identifiers[P.idKey]).toEqual(["12345"]);
|
|
129
|
+
|
|
130
|
+
const eventBatch = a.normalize({
|
|
131
|
+
adapter: P.name,
|
|
132
|
+
kind: P.eventKind,
|
|
133
|
+
originalId: `${P.name}:${P.eventKind}:1`,
|
|
134
|
+
capturedAt: 1700000000000,
|
|
135
|
+
payload: P.sample,
|
|
136
|
+
});
|
|
137
|
+
expect(validateBatch(eventBatch).valid).toBe(true);
|
|
138
|
+
expect(eventBatch.events[0].subtype).toBe(P.eventSubtype);
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it("normalize throws on missing payload", () => {
|
|
142
|
+
const a = new P.Adapter();
|
|
143
|
+
expect(() => a.normalize({})).toThrow(/payload missing/);
|
|
144
|
+
});
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
describe("FAMILY-23 extractUid cookie scrape", () => {
|
|
149
|
+
it("王者荣耀: openid > uin > tencent_uid", () => {
|
|
150
|
+
const c = new HonorOfKingsApiClient();
|
|
151
|
+
expect(c.extractUid("openid=oABC1234XYZ; foo=bar")).toBe("oABC1234XYZ");
|
|
152
|
+
expect(c.extractUid("uin=o0012345; x=y")).toBe("12345");
|
|
153
|
+
expect(c.extractUid("tencent_uid=678; z=1")).toBe("678");
|
|
154
|
+
expect(c.extractUid("foo=bar")).toBeNull();
|
|
155
|
+
expect(c.lastError.code).toBe(-7);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it("作业帮: uid / student_id / passport_uid (opaque ZYBUSS → null)", () => {
|
|
159
|
+
const c = new ZuoyebangApiClient();
|
|
160
|
+
expect(c.extractUid("uid=111; ZYBUSS=opaque")).toBe("111");
|
|
161
|
+
expect(c.extractUid("student_id=222")).toBe("222");
|
|
162
|
+
expect(c.extractUid("ZYBUSS=onlyopaquetoken")).toBeNull();
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it("支付宝: alipay_uid / userId / loginUserId", () => {
|
|
166
|
+
const c = new AlipayApiClient();
|
|
167
|
+
expect(c.extractUid("alipay_uid=2088123; t=x")).toBe("2088123");
|
|
168
|
+
expect(c.extractUid("userId=999")).toBe("999");
|
|
169
|
+
expect(c.extractUid("ALIPAYJSESSIONID=abc")).toBeNull();
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it("华为学习中心: accountId / userId / huaweiUid", () => {
|
|
173
|
+
const c = new HuaweiLearningApiClient();
|
|
174
|
+
expect(c.extractUid("accountId=555; deviceId=abc")).toBe("555");
|
|
175
|
+
expect(c.extractUid("huaweiUid=777")).toBe("777");
|
|
176
|
+
expect(c.extractUid("deviceId=nonnumeric")).toBeNull();
|
|
177
|
+
});
|
|
178
|
+
});
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FAMILY-23 v0.1 — game-genshin adapter scaffold tests.
|
|
3
|
+
* 契约 + extractUid cookie scrape + snapshot sync + normalize → 合法 batch。
|
|
4
|
+
*/
|
|
5
|
+
import { describe, it, expect } from "vitest";
|
|
6
|
+
const fs = require("node:fs");
|
|
7
|
+
const os = require("node:os");
|
|
8
|
+
const path = require("node:path");
|
|
9
|
+
const { GenshinAdapter } = require("../../lib");
|
|
10
|
+
const { GenshinApiClient } = require("../../lib/adapters/game-genshin/api-client");
|
|
11
|
+
const { assertAdapter } = require("../../lib/adapter-spec");
|
|
12
|
+
const { validateBatch } = require("../../lib/batch");
|
|
13
|
+
|
|
14
|
+
function writeSnapshot(obj) {
|
|
15
|
+
const p = path.join(
|
|
16
|
+
os.tmpdir(),
|
|
17
|
+
`genshin-snap-${Date.now()}-${Math.random().toString(36).slice(2)}.json`,
|
|
18
|
+
);
|
|
19
|
+
fs.writeFileSync(p, JSON.stringify(obj), "utf-8");
|
|
20
|
+
return p;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
describe("GenshinAdapter — FAMILY-23 v0.1 cookie-scrape placeholder", () => {
|
|
24
|
+
it("contract conformance + sensitivity medium", () => {
|
|
25
|
+
const a = new GenshinAdapter();
|
|
26
|
+
expect(assertAdapter(a).ok).toBe(true);
|
|
27
|
+
expect(a.name).toBe("game-genshin");
|
|
28
|
+
expect(a.version).toBe("0.1.0");
|
|
29
|
+
expect(a.extractMode).toBe("web-api");
|
|
30
|
+
expect(a.dataDisclosure.sensitivity).toBe("medium");
|
|
31
|
+
expect(a.capabilities).toContain("sync:snapshot");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("extractUid parses HoYoLAB cookie keys (priority + null)", () => {
|
|
35
|
+
const c = new GenshinApiClient();
|
|
36
|
+
expect(c.extractUid("account_id_v2=12345; ltoken_v2=abc")).toBe("12345");
|
|
37
|
+
expect(c.extractUid("ltuid_v2=67890; foo=bar")).toBe("67890");
|
|
38
|
+
expect(c.extractUid("account_id=111; x=y")).toBe("111");
|
|
39
|
+
expect(c.extractUid("ltuid=222")).toBe("222");
|
|
40
|
+
// 优先级: account_id_v2 先于 ltuid
|
|
41
|
+
expect(c.extractUid("ltuid=999; account_id_v2=12345")).toBe("12345");
|
|
42
|
+
expect(c.extractUid("foo=bar")).toBeNull();
|
|
43
|
+
expect(c.lastError.code).toBe(-7);
|
|
44
|
+
expect(c.extractUid("")).toBeNull();
|
|
45
|
+
expect(c.lastError.code).toBe(-1);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("sync throws NO_INPUT without inputPath (v0.1 no live HTTP)", async () => {
|
|
49
|
+
const a = new GenshinAdapter();
|
|
50
|
+
await expect(async () => {
|
|
51
|
+
for await (const _ of a.sync({})) void _;
|
|
52
|
+
}).rejects.toThrow(/inputPath/);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("sync via snapshot yields profile + play raws", async () => {
|
|
56
|
+
const snapPath = writeSnapshot({
|
|
57
|
+
schemaVersion: 1,
|
|
58
|
+
snapshottedAt: 1700000000000,
|
|
59
|
+
account: { uid: "12345", displayName: "旅行者" },
|
|
60
|
+
events: [
|
|
61
|
+
{ kind: "profile", id: "profile-12345", uid: "12345", nickname: "旅行者", level: 58 },
|
|
62
|
+
{ kind: "play", id: "play-s1", durationMs: 3600000, mode: "single", startAt: 1700000000000 },
|
|
63
|
+
],
|
|
64
|
+
});
|
|
65
|
+
try {
|
|
66
|
+
const a = new GenshinAdapter();
|
|
67
|
+
const raws = [];
|
|
68
|
+
for await (const r of a.sync({ inputPath: snapPath })) raws.push(r);
|
|
69
|
+
expect(raws).toHaveLength(2);
|
|
70
|
+
expect(raws[0].kind).toBe("profile");
|
|
71
|
+
expect(raws[0].originalId).toBe("genshin:profile:profile-12345");
|
|
72
|
+
expect(raws[1].kind).toBe("play");
|
|
73
|
+
} finally {
|
|
74
|
+
fs.unlinkSync(snapPath);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it("normalize profile → person-self with genshin-uid; play → MEDIA event (valid batch)", () => {
|
|
79
|
+
const a = new GenshinAdapter();
|
|
80
|
+
const profileBatch = a.normalize({
|
|
81
|
+
adapter: "game-genshin",
|
|
82
|
+
kind: "profile",
|
|
83
|
+
originalId: "genshin:profile:profile-12345",
|
|
84
|
+
capturedAt: 1700000000000,
|
|
85
|
+
payload: { kind: "profile", uid: "12345", nickname: "旅行者", level: 58 },
|
|
86
|
+
});
|
|
87
|
+
expect(validateBatch(profileBatch).valid).toBe(true);
|
|
88
|
+
expect(profileBatch.persons[0].subtype).toBe("self");
|
|
89
|
+
expect(profileBatch.persons[0].identifiers["genshin-uid"]).toEqual(["12345"]);
|
|
90
|
+
|
|
91
|
+
const playBatch = a.normalize({
|
|
92
|
+
adapter: "game-genshin",
|
|
93
|
+
kind: "play",
|
|
94
|
+
originalId: "genshin:play:play-s1",
|
|
95
|
+
capturedAt: 1700000000000,
|
|
96
|
+
payload: { kind: "play", durationMs: 3600000, mode: "single", startAt: 1700000000000 },
|
|
97
|
+
});
|
|
98
|
+
expect(validateBatch(playBatch).valid).toBe(true);
|
|
99
|
+
expect(playBatch.events[0].subtype).toBe("media");
|
|
100
|
+
expect(playBatch.events[0].extra.durationMs).toBe(3600000);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it("normalize throws on missing payload", () => {
|
|
104
|
+
const a = new GenshinAdapter();
|
|
105
|
+
expect(() => a.normalize({})).toThrow(/payload missing/);
|
|
106
|
+
});
|
|
107
|
+
});
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
|
-
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
3
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
4
4
|
import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from "node:fs";
|
|
5
5
|
import { join } from "node:path";
|
|
6
6
|
import { tmpdir } from "node:os";
|
|
7
7
|
import { execFileSync } from "node:child_process";
|
|
8
8
|
|
|
9
|
+
// Every test here spawns real `git` (init/config/commit) across throwaway
|
|
10
|
+
// repos. On Windows, under the full-suite parallel worker pool, that subprocess
|
|
11
|
+
// fan-out routinely blows past the 10s default and flakes. Give the whole file
|
|
12
|
+
// generous headroom — the work is real, the default timeout is just too tight.
|
|
13
|
+
vi.setConfig({ testTimeout: 30000, hookTimeout: 30000 });
|
|
14
|
+
|
|
9
15
|
const {
|
|
10
16
|
GitActivityAdapter,
|
|
11
17
|
GIT_ACTIVITY_NAME,
|