@chainlesschain/personal-data-hub 0.3.9 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +45 -25
  2. package/__tests__/adapters/apple-health.test.js +95 -0
  3. package/__tests__/adapters/email-templates.test.js +123 -0
  4. package/__tests__/adapters/family-23-collectors-scaffold.test.js +178 -0
  5. package/__tests__/adapters/game-genshin-scaffold.test.js +107 -0
  6. package/__tests__/adapters/git-activity.test.js +7 -1
  7. package/__tests__/adapters/local-im-pc.test.js +149 -0
  8. package/__tests__/adapters/netease-music.test.js +74 -0
  9. package/__tests__/adapters/qq-pc-direct-read.test.js +186 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +4 -1
  11. package/__tests__/adapters/wechat-pc-direct-read.test.js +207 -0
  12. package/__tests__/adapters/weread.test.js +123 -0
  13. package/__tests__/analysis.test.js +120 -15
  14. package/__tests__/mobile-extractor-encrypted.test.js +460 -0
  15. package/__tests__/prompt-builder.test.js +25 -0
  16. package/__tests__/registry-readiness.test.js +233 -0
  17. package/__tests__/social-douyin-im-direct-read.test.js +311 -0
  18. package/__tests__/social-douyin-snapshot.test.js +5 -2
  19. package/__tests__/vault.test.js +99 -0
  20. package/lib/adapter-guide.js +520 -0
  21. package/lib/adapter-readiness.js +257 -0
  22. package/lib/adapters/_local-im-db-reader.js +218 -0
  23. package/lib/adapters/_local-im-pc-adapter.js +162 -0
  24. package/lib/adapters/apple-health/index.js +329 -0
  25. package/lib/adapters/dingtalk-pc/index.js +29 -0
  26. package/lib/adapters/edu-huawei-learning/api-client.js +47 -0
  27. package/lib/adapters/edu-huawei-learning/index.js +255 -0
  28. package/lib/adapters/edu-zuoyebang/api-client.js +48 -0
  29. package/lib/adapters/edu-zuoyebang/index.js +259 -0
  30. package/lib/adapters/email-imap/email-adapter.js +16 -0
  31. package/lib/adapters/email-imap/templates/bill.js +174 -18
  32. package/lib/adapters/feishu-pc/index.js +29 -0
  33. package/lib/adapters/finance-alipay/api-client.js +48 -0
  34. package/lib/adapters/finance-alipay/index.js +257 -0
  35. package/lib/adapters/game-genshin/api-client.js +59 -0
  36. package/lib/adapters/game-genshin/index.js +274 -0
  37. package/lib/adapters/game-honor-of-kings/api-client.js +54 -0
  38. package/lib/adapters/game-honor-of-kings/index.js +259 -0
  39. package/lib/adapters/netease-music/index.js +227 -0
  40. package/lib/adapters/qq-pc/index.js +200 -0
  41. package/lib/adapters/qq-pc/nt-db-reader.js +210 -0
  42. package/lib/adapters/social-douyin/index.js +194 -1
  43. package/lib/adapters/wechat/wechat-adapter.js +7 -1
  44. package/lib/adapters/wechat-pc/index.js +335 -0
  45. package/lib/adapters/wechat-pc/pc-db-reader.js +327 -0
  46. package/lib/adapters/weread/api-client.js +128 -0
  47. package/lib/adapters/weread/index.js +337 -0
  48. package/lib/analysis.js +65 -0
  49. package/lib/index.js +39 -0
  50. package/lib/mobile-extractor/bplist.js +233 -0
  51. package/lib/mobile-extractor/ios-backup-crypto.js +315 -0
  52. package/lib/mobile-extractor/ios.js +131 -16
  53. package/lib/prompt-builder.js +11 -1
  54. package/lib/registry.js +170 -0
  55. package/lib/vault.js +105 -0
  56. package/package.json +1 -1
  57. package/scripts/run-native-tests-sandbox.sh +2 -0
  58. package/vitest.config.js +79 -1
@@ -0,0 +1,123 @@
1
+ "use strict";
2
+
3
+ import { describe, it, expect } from "vitest";
4
+
5
+ const { WeReadAdapter } = require("../../lib/adapters/weread");
6
+ const { WeReadApiClient } = require("../../lib/adapters/weread/api-client");
7
+ const { partitionBatch } = require("../../lib/batch");
8
+
9
+ // ── stub fetch returning canned WeRead JSON by URL ──────────────────────
10
+ function makeFetch(routes) {
11
+ return async (url) => {
12
+ for (const [pat, body] of routes) {
13
+ if (url.includes(pat)) {
14
+ return {
15
+ ok: true,
16
+ status: 200,
17
+ headers: { get: () => null },
18
+ json: async () => body,
19
+ };
20
+ }
21
+ }
22
+ return { ok: true, status: 200, headers: { get: () => null }, json: async () => ({}) };
23
+ };
24
+ }
25
+
26
+ const ROUTES = [
27
+ ["/user/notebooks", { books: [{ bookId: "b1", book: { title: "人类简史", author: "赫拉利", cover: "c" }, noteCount: 2, reviewCount: 1 }] }],
28
+ ["/book/bookmarklist", { updated: [{ bookmarkId: "m1", bookId: "b1", markText: "认知革命", chapterTitle: "第一章", createTime: 1700000000 }] }],
29
+ ["/review/list", { reviews: [{ review: { reviewId: "r1", bookId: "b1", content: "很有启发", chapterTitle: "第一章", createTime: 1700000100 } }] }],
30
+ ];
31
+
32
+ async function collect(iter) {
33
+ const out = [];
34
+ for await (const r of iter) out.push(r);
35
+ return out;
36
+ }
37
+
38
+ describe("WeReadApiClient (cookie HTTP, stub fetch)", () => {
39
+ it("parses notebooks / bookmarks / reviews defensively", async () => {
40
+ const c = new WeReadApiClient({ cookie: "wr_skey=x", fetch: makeFetch(ROUTES) });
41
+ const books = await c.getNotebooks();
42
+ expect(books).toHaveLength(1);
43
+ expect(books[0].title).toBe("人类简史");
44
+ const marks = await c.getBookmarks("b1");
45
+ expect(marks[0].markText).toBe("认知革命");
46
+ const reviews = await c.getReviews("b1");
47
+ expect(reviews[0].content).toBe("很有启发");
48
+ });
49
+
50
+ it("requires a cookie", () => {
51
+ expect(() => new WeReadApiClient({})).toThrow(/cookie/);
52
+ });
53
+
54
+ it("degrades a failing endpoint to empty (no throw)", async () => {
55
+ const c = new WeReadApiClient({
56
+ cookie: "x",
57
+ fetch: async () => { throw new Error("network down"); },
58
+ });
59
+ expect(await c.getNotebooks()).toEqual([]);
60
+ expect(c.lastErrorCode).toBeTruthy();
61
+ });
62
+ });
63
+
64
+ describe("WeReadAdapter — cookie mode", () => {
65
+ it("readinessOnly without cookie → INVALID_COOKIE (credential)", async () => {
66
+ const r = await new WeReadAdapter().authenticate({ readinessOnly: true });
67
+ expect(r.reason).toBe("INVALID_COOKIE");
68
+ });
69
+
70
+ it("readinessOnly with cookie → configured", async () => {
71
+ const r = await new WeReadAdapter({ cookie: "x" }).authenticate({ readinessOnly: true });
72
+ expect(r.ok).toBe(true);
73
+ expect(r.mode).toBe("configured");
74
+ });
75
+
76
+ it("fetches book + highlight + review and normalizes to a valid batch", async () => {
77
+ const a = new WeReadAdapter();
78
+ const raws = await collect(a.sync({ cookie: "wr_skey=x", fetch: makeFetch(ROUTES) }));
79
+ expect(raws.map((r) => r.kind)).toEqual(["book", "highlight", "review"]);
80
+ const merged = { events: [], persons: [], places: [], items: [], topics: [] };
81
+ for (const r of raws) {
82
+ const n = a.normalize(r);
83
+ for (const k of Object.keys(merged)) merged[k].push(...n[k]);
84
+ }
85
+ const { valid, invalidReasons } = partitionBatch(merged);
86
+ expect(invalidReasons).toHaveLength(0);
87
+ expect(valid.events).toHaveLength(3); // book(browse) + highlight(other) + review(post)
88
+ expect(valid.items).toHaveLength(1); // the book
89
+ expect(valid.events.find((e) => e.subtype === "browse").content.title).toContain("人类简史");
90
+ expect(valid.events.find((e) => e.subtype === "post").content.text).toBe("很有启发");
91
+ });
92
+
93
+ it("includeNotes:false yields only book events", async () => {
94
+ const a = new WeReadAdapter();
95
+ const raws = await collect(a.sync({ cookie: "x", fetch: makeFetch(ROUTES), includeNotes: false }));
96
+ expect(raws.map((r) => r.kind)).toEqual(["book"]);
97
+ });
98
+ });
99
+
100
+ describe("WeReadAdapter — snapshot mode", () => {
101
+ const SNAP = {
102
+ schemaVersion: 1,
103
+ snapshottedAt: 1700000000000,
104
+ events: [
105
+ { kind: "book", id: "b1", bookId: "b1", title: "三体", author: "刘慈欣" },
106
+ { kind: "highlight", id: "m1", bookId: "b1", bookTitle: "三体", markText: "不要回答", createTime: 1700000001 },
107
+ ],
108
+ };
109
+ function snapAdapter(snap = SNAP, { exists = true } = {}) {
110
+ const a = new WeReadAdapter();
111
+ a._deps.fs = { existsSync: () => exists, readFileSync: () => JSON.stringify(snap), accessSync: () => {}, constants: { R_OK: 4 } };
112
+ return a;
113
+ }
114
+
115
+ it("ingests snapshot events", async () => {
116
+ const raws = await collect(snapAdapter().sync({ inputPath: "/x" }));
117
+ expect(raws.map((r) => r.kind)).toEqual(["book", "highlight"]);
118
+ });
119
+
120
+ it("schemaVersion mismatch throws", async () => {
121
+ await expect(collect(snapAdapter({ schemaVersion: 9, events: [] }).sync({ inputPath: "/x" }))).rejects.toThrow(/schemaVersion/);
122
+ });
123
+ });
@@ -354,6 +354,75 @@ describe("AnalysisEngine emits TOTALS preamble", () => {
354
354
  });
355
355
  });
356
356
 
357
+ // ─── intent=sum-amount Phase 2 — AMOUNT_SUM authoritative total ──────────
358
+ describe("AnalysisEngine emits AMOUNT_SUM preamble (intent=sum-amount Phase 2)", () => {
359
+ const baseVault = (over) => ({
360
+ queryEvents: () => [],
361
+ queryPersons: () => [],
362
+ queryItems: () => [],
363
+ stats: () => ({ events: 5, persons: 0, places: 0, items: 0, topics: 0 }),
364
+ getEvent: () => null,
365
+ audit: () => {},
366
+ ...over,
367
+ });
368
+ const captureLlm = (calls) => ({
369
+ isLocal: true,
370
+ chat: async (msgs) => {
371
+ calls.push(msgs);
372
+ return { text: "ok", usage: {} };
373
+ },
374
+ });
375
+
376
+ it("calls sumEventAmount for sum-amount intent and puts AMOUNT_SUM in prompt", async () => {
377
+ const sumCalls = [];
378
+ const fakeVault = baseVault({
379
+ sumEventAmount: (f) => {
380
+ sumCalls.push(f);
381
+ return { total: 888.8, currency: "CNY", count: 5, byDirection: { out: 888.8, in: 0 } };
382
+ },
383
+ });
384
+ const chatCalls = [];
385
+ const engine = new AnalysisEngine({ vault: fakeVault, llm: captureLlm(chatCalls) });
386
+ await engine.ask("我总共花了多少钱");
387
+ expect(sumCalls.length).toBe(1);
388
+ const userMsg = chatCalls[0][1].content;
389
+ expect(userMsg).toContain("AMOUNT_SUM");
390
+ expect(userMsg).toContain('"total": 888.8');
391
+ expect(chatCalls[0][0].content).toMatch(/AMOUNT_SUM.*authoritative/i);
392
+ });
393
+
394
+ it("does NOT call sumEventAmount for non-sum-amount intent", async () => {
395
+ const sumCalls = [];
396
+ const fakeVault = baseVault({
397
+ sumEventAmount: (f) => {
398
+ sumCalls.push(f);
399
+ return { total: 0, currency: "CNY", count: 0, byDirection: { out: 0, in: 0 } };
400
+ },
401
+ });
402
+ const engine = new AnalysisEngine({ vault: fakeVault, llm: captureLlm([]) });
403
+ await engine.ask("列出我的联系人"); // intent=list
404
+ expect(sumCalls.length).toBe(0);
405
+ });
406
+
407
+ it("omits AMOUNT_SUM block when sumEventAmount returns count 0", async () => {
408
+ const fakeVault = baseVault({
409
+ sumEventAmount: () => ({ total: 0, currency: "CNY", count: 0, byDirection: { out: 0, in: 0 } }),
410
+ });
411
+ const chatCalls = [];
412
+ const engine = new AnalysisEngine({ vault: fakeVault, llm: captureLlm(chatCalls) });
413
+ await engine.ask("我总共花了多少钱");
414
+ expect(chatCalls[0][1].content).not.toContain("AMOUNT_SUM");
415
+ });
416
+
417
+ it("legacy vault without sumEventAmount falls back gracefully", async () => {
418
+ const fakeVault = baseVault({}); // no sumEventAmount
419
+ const chatCalls = [];
420
+ const engine = new AnalysisEngine({ vault: fakeVault, llm: captureLlm(chatCalls) });
421
+ await engine.ask("我总共花了多少钱");
422
+ expect(chatCalls[0][1].content).not.toContain("AMOUNT_SUM");
423
+ });
424
+ });
425
+
357
426
  // ─── Cache bypass — PDH ask must always go to LLM, never cached ───────
358
427
  //
359
428
  // Bug 2026-05-21: desktop ResponseCache (7-day TTL) served a stale
@@ -1569,9 +1638,15 @@ describe("AnalysisEngine._gatherFacts intent=sum-amount routing", () => {
1569
1638
  // 2026-05-24 — `intent=count` ("几个 X" / "多少个 Y") is handled by the
1570
1639
  // TOTALS preamble (commit 19c11920e): vault.stats() is rendered before
1571
1640
  // FACTS so the LLM quotes the real number instead of FACTS array length.
1572
- // FACTS itself still goes through the default broader path (no narrow
1573
- // routing). This block isolates the count-specific behavior into its
1574
- // own describe so the audit gap is closed.
1641
+ //
1642
+ // 2026-06-02 FACTS now ALSO hard-caps to COUNT_INTENT_FACT_LIMIT (5)
1643
+ // illustrative rows instead of the full ≤80 default sample: TOTALS already
1644
+ // carries the authoritative count (Rule 6), so a count question only needs a
1645
+ // few examples — saves prompt budget on local small models. Scoped by reliable
1646
+ // adapter+time filters; persons/items skipped (count-of-contacts/apps routes
1647
+ // via entityFocus). 0 hits → fall through to the default broader path (safety
1648
+ // net for a count misclassification of a list question). Memory:
1649
+ // pdh_analysis_engine_intent_routing.md.
1575
1650
 
1576
1651
  describe("AnalysisEngine._gatherFacts intent=count routing", () => {
1577
1652
  const mkEvent = (id, subtype = "order", adapter = "taobao") => ({
@@ -1580,28 +1655,56 @@ describe("AnalysisEngine._gatherFacts intent=count routing", () => {
1580
1655
  source: { adapter, adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
1581
1656
  });
1582
1657
 
1583
- it("(a) intent=count goes through default broader path (no narrow query)", async () => {
1658
+ it("(a) intent=count ≤5 illustrative events (capped), persons/items NOT queried", async () => {
1659
+ const queryEventsCalls = [];
1660
+ const fakeVault = {
1661
+ queryEvents: (q) => {
1662
+ queryEventsCalls.push(q);
1663
+ return Array.from({ length: 20 }, (_, i) => mkEvent("e-" + i)).slice(0, q.limit);
1664
+ },
1665
+ queryPersons: vi.fn(() => []),
1666
+ queryItems: vi.fn(() => []),
1667
+ getEvent: () => null,
1668
+ audit: () => {},
1669
+ stats: () => ({ events: 20, persons: 0, places: 0, items: 0, topics: 0 }),
1670
+ };
1671
+ const llm = new MockLLMClient({ reply: "ok" });
1672
+ const engine = new AnalysisEngine({ vault: fakeVault, llm });
1673
+ const r = await engine.ask("我有多少个订单");
1674
+
1675
+ expect(r.parsed.intent).toBe("count");
1676
+ // Capped to COUNT_INTENT_FACT_LIMIT (5), NOT the old default 200 — TOTALS
1677
+ // carries the authoritative count, FACTS is just a few examples.
1678
+ expect(queryEventsCalls).toHaveLength(1);
1679
+ expect(queryEventsCalls[0].limit).toBe(5);
1680
+ expect(queryEventsCalls[0].subtype).toBeUndefined(); // subtype NOT passed (unreliable)
1681
+ expect(r.facts).toHaveLength(5);
1682
+ // count-of-events doesn't need contacts/apps — skipped (those route via entityFocus).
1683
+ expect(fakeVault.queryPersons).not.toHaveBeenCalled();
1684
+ expect(fakeVault.queryItems).not.toHaveBeenCalled();
1685
+ });
1686
+
1687
+ it("(a2) intent=count with adapter scope → adapter passed through on the capped query", async () => {
1584
1688
  const queryEventsCalls = [];
1585
1689
  const fakeVault = {
1586
1690
  queryEvents: (q) => {
1587
1691
  queryEventsCalls.push(q);
1588
- return [mkEvent("e-1"), mkEvent("e-2")];
1692
+ return [mkEvent("e-1")];
1589
1693
  },
1590
1694
  queryPersons: () => [],
1591
1695
  queryItems: () => [],
1592
1696
  getEvent: () => null,
1593
1697
  audit: () => {},
1594
- stats: () => ({ events: 2, persons: 500, places: 0, items: 0, topics: 0 }),
1698
+ stats: () => ({ events: 1, persons: 0, places: 0, items: 0, topics: 0 }),
1595
1699
  };
1596
- const llm = new MockLLMClient({ reply: "你有 500 个联系人" });
1700
+ const llm = new MockLLMClient({ reply: "ok" });
1597
1701
  const engine = new AnalysisEngine({ vault: fakeVault, llm });
1598
- const r = await engine.ask("我有几个联系人");
1702
+ const r = await engine.ask("我在淘宝有多少个订单");
1599
1703
 
1600
1704
  expect(r.parsed.intent).toBe("count");
1601
- // Single default queryEvents call (limit=200, no subtype filter, no narrow).
1602
1705
  expect(queryEventsCalls).toHaveLength(1);
1603
- expect(queryEventsCalls[0].limit).toBe(200);
1604
- expect(queryEventsCalls[0].subtype).toBeUndefined();
1706
+ expect(queryEventsCalls[0].limit).toBe(5);
1707
+ expect(queryEventsCalls[0].adapter).toBe("taobao");
1605
1708
  });
1606
1709
 
1607
1710
  it("(b) intent=count emits TOTALS block in prompt (authoritative ground truth)", async () => {
@@ -1661,12 +1764,14 @@ describe("AnalysisEngine._gatherFacts intent=count routing", () => {
1661
1764
  const llm = new MockLLMClient({ reply: "ok" });
1662
1765
  const engine = new AnalysisEngine({ vault: fakeVault, llm });
1663
1766
  await engine.ask("几个订单");
1664
- // Single default call NOT 4 subtype calls (those are sum-amount only).
1665
- expect(queryEventsCalls).toHaveLength(1);
1666
- expect(queryEventsCalls[0].subtype).toBeUndefined();
1767
+ // count branch (limit 5, 0 hits) fall through to default (limit 200).
1768
+ // Neither call carries a subtype filter — NOT the 4 subtype-narrowed calls
1769
+ // that are sum-amount only.
1770
+ expect(queryEventsCalls.map((q) => q.limit)).toEqual([5, 200]);
1771
+ expect(queryEventsCalls.every((q) => q.subtype === undefined)).toBe(true);
1667
1772
  });
1668
1773
 
1669
- it("(e) intent=count pulls persons + items in FACTS (default path behavior)", async () => {
1774
+ it("(e) intent=count with 0 events falls through → persons + items in FACTS (safety net)", async () => {
1670
1775
  const fakeVault = {
1671
1776
  queryEvents: () => [],
1672
1777
  queryPersons: ({ limit }) => Array.from({ length: Math.min(limit, 5) }, (_, i) => ({