npm - @chainlesschain/personal-data-hub - Versions diffs - 0.3.7 → 0.3.9 - Mend

@chainlesschain/personal-data-hub 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/__tests__/analysis.test.js +239 -14
package/__tests__/prompt-builder.test.js +22 -2
package/__tests__/query-parser.test.js +86 -0
package/__tests__/vault.test.js +88 -0
package/lib/adapters/ai-chat-history/health-checker.js +11 -0
package/lib/analysis.js +154 -17
package/lib/prompt-builder.js +8 -0
package/lib/query-parser.js +93 -0
package/lib/vault.js +64 -0
package/package.json +1 -1

package/__tests__/analysis.test.js CHANGED Viewed

@@ -403,11 +403,16 @@ describe("AnalysisEngine.ask cache bypass", () => {
 // + items into facts within the maxFacts budget.
 describe("AnalysisEngine._gatherFacts includes persons and items", () => {
-  it("returns persons + items even when events are empty (contacts-only vault)", async () => {
+  it("contact question routes via entityFocus=persons — persons only, no items competition", async () => {
     freshVault();
-    // Use a fake vault that exposes queryPersons / queryItems but no event
-    // history — mimics the post-Path-C-ingest state where contacts +
-    // installed apps are the only data.
+    // 2026-05-27 fix: "我有几个联系人" now matches parseEntityFocus → "persons",
+    // which intentionally skips the items table to give the full prompt
+    // budget to contacts. Pre-fix this test asserted 5 persons + 3 items
+    // (8 facts) because _gatherFacts always pulled both tables; post-fix
+    // items are deliberately excluded — the user asked about contacts, not
+    // apps. Items still surface for generic "what's in my vault" questions
+    // (entityFocus=null) and for explicit "我装了哪些 app" (entityFocus=
+    // "items"). Verified at __tests__:_gatherFacts entityFocus routing.
     const fakeVault = {
       queryEvents: () => [],
       queryPersons: ({ limit }) => {
@@ -448,9 +453,8 @@ describe("AnalysisEngine._gatherFacts includes persons and items", () => {
     const llm = new MockLLMClient({ reply: "你共有 5 个联系人" });
     const engine = new AnalysisEngine({ vault: fakeVault, llm });
     const r = await engine.ask("我有几个联系人");
-    expect(r.facts.length).toBe(8); // 0 events + 5 persons + 3 items
     expect(r.facts.filter((f) => f.type === "person").length).toBe(5);
-    expect(r.facts.filter((f) => f.type === "item").length).toBe(3);
+    expect(r.facts.filter((f) => f.type === "item").length).toBe(0);
   });
   it("respects maxFacts budget — events get majority, persons + items split remainder", async () => {
@@ -498,7 +502,11 @@ describe("AnalysisEngine._gatherFacts includes persons and items", () => {
     expect(r.warning).toBe("no-facts");
   });
-  it("events take majority when budget < events.length (no person/item budget left)", async () => {
+  it("events overflow + empty side tables → events refill the reserved slots", async () => {
+    // 2026-05-27 fix: when events would monopolize effMaxFacts the engine
+    // reserves slots for persons + items; if BOTH side tables return 0 rows
+    // the reserve is refilled with events so a contact-less vault still
+    // sees the full event budget.
     const fakeVault = {
       queryEvents: () => Array.from({ length: 80 }, (_, i) => ({
         id: "e" + i, type: "event", subtype: "order",
@@ -515,10 +523,225 @@ describe("AnalysisEngine._gatherFacts includes persons and items", () => {
     const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 80 });
     const r = await engine.ask("hi");
     expect(r.facts.length).toBe(80);
-    // budget exhausted → queryPersons / queryItems still called but with limit 0
-    // (current impl skips with personBudget <= 0). Verify they're NOT called.
-    expect(fakeVault.queryPersons).not.toHaveBeenCalled();
-    expect(fakeVault.queryItems).not.toHaveBeenCalled();
+    expect(r.facts.filter((f) => f.type === "event").length).toBe(80);
+    // Side queries WERE called (different from pre-fix); they just returned [].
+    expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 16 });
+    expect(fakeVault.queryItems).toHaveBeenCalledWith({ limit: 8 });
+  });
+  it("Android small-model budget — events overflow cap, persons survive", async () => {
+    // Regression: Android local path (effMaxFacts=20, effMaxQueryLimit=50).
+    // Vault returns 50 events; pre-fix _gatherFacts shipped 50 events,
+    // buildPrompt sliced to first 20 events, persons = 0 → "几个联系人"
+    // hallucinated zero. Now events cap at 14 (20*0.7), persons get 3,
+    // items get 3 → contact rows reach the LLM.
+    const fakeVault = {
+      queryEvents: () => Array.from({ length: 50 }, (_, i) => ({
+        id: "e" + i, type: "event", subtype: "message",
+        occurredAt: Date.now(), actor: "self",
+        ingestedAt: Date.now(),
+        source: { adapter: "wechat", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      })),
+      queryPersons: ({ limit }) => Array.from({ length: limit }, (_, i) => ({
+        id: "p" + i, type: "person", subtype: "contact",
+        names: ["联系人" + i], ingestedAt: Date.now(),
+        source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      })),
+      queryItems: ({ limit }) => Array.from({ length: limit }, (_, i) => ({
+        id: "i" + i, type: "item", subtype: "other", name: "App" + i,
+        ingestedAt: Date.now(),
+        source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      })),
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "" });
+    const engine = new AnalysisEngine({
+      vault: fakeVault, llm,
+      maxFacts: 20, maxQueryLimit: 50,
+    });
+    const r = await engine.ask("hi"); // generic question — default path
+    // 20 * 0.2 = 4 persons, 20 * 0.1 = 2 items, remainder 14 for events.
+    expect(r.facts.filter((f) => f.type === "event").length).toBe(14);
+    expect(r.facts.filter((f) => f.type === "person").length).toBe(4);
+    expect(r.facts.filter((f) => f.type === "item").length).toBe(2);
+  });
+});
+// ─── entityFocus routing — persons / items table priority ────────────────
+//
+// 2026-05-27 fix: when the question is explicitly about contacts ("我有
+// 哪些联系人", "妈手机号"), _gatherFacts must NOT compete persons against
+// the events pool. Pre-fix Android small-model budgets (20 facts / 50 row
+// cap) had events drown out the contact slice → user saw "没数据" even
+// when the vault held hundreds of contacts.
+describe("AnalysisEngine._gatherFacts entityFocus routing", () => {
+  it("entityFocus=persons skips events broad scan, prioritizes persons", async () => {
+    const fakeVault = {
+      queryEvents: vi.fn(() => Array.from({ length: 50 }, (_, i) => ({
+        id: "e" + i, type: "event", subtype: "message",
+        occurredAt: Date.now(), actor: "self",
+        ingestedAt: Date.now(),
+        source: { adapter: "wechat", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      }))),
+      queryPersons: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
+        id: "p" + i, type: "person", subtype: "contact",
+        names: ["联系人" + i], ingestedAt: Date.now(),
+        source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      }))),
+      queryItems: vi.fn(() => []),
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "" });
+    const engine = new AnalysisEngine({
+      vault: fakeVault, llm,
+      maxFacts: 20, maxQueryLimit: 50,
+    });
+    const r = await engine.ask("我有哪些联系人");
+    // 95% goes to persons (19), 5% headroom = 1 event slot.
+    expect(r.facts.filter((f) => f.type === "person").length).toBe(19);
+    expect(r.facts.filter((f) => f.type === "event").length).toBeLessThanOrEqual(1);
+    expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 19 });
+  });
+  it("entityFocus=persons falls through to default path when persons table is empty", async () => {
+    const fakeVault = {
+      queryEvents: () => Array.from({ length: 5 }, (_, i) => ({
+        id: "e" + i, type: "event", subtype: "message",
+        occurredAt: Date.now(), actor: "self",
+        ingestedAt: Date.now(),
+        source: { adapter: "wechat", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      })),
+      queryPersons: () => [], // empty contacts table
+      queryItems: () => [],
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "" });
+    const engine = new AnalysisEngine({ vault: fakeVault, llm });
+    const r = await engine.ask("我有哪些联系人");
+    // Fell through to default → 5 events surfaced (no cap since 5 < 80).
+    expect(r.facts.filter((f) => f.type === "event").length).toBe(5);
+  });
+  it("entityFocus=persons with name candidate → searchPersons short-circuit", async () => {
+    // 2026-05-27 S3 治本 — "妈手机号" must hit searchPersons LIKE search
+    // even when vault holds 500 contacts. Pre-S3 _gatherFacts dumped the
+    // first N by ingest_at; the target person rarely landed in the slice.
+    const fakeVault = {
+      queryEvents: () => [],
+      queryPersons: vi.fn(() => [
+        { id: "p-other", type: "person", subtype: "contact", names: ["张三"], ingestedAt: 0,
+          source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" } },
+      ]),
+      searchPersons: vi.fn(({ q, limit }) => {
+        if (q === "妈") {
+          return [{
+            id: "p-mom", type: "person", subtype: "contact", names: ["妈妈"],
+            identifiers: { phone: ["13800138000"] }, ingestedAt: 0,
+            source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
+          }];
+        }
+        return [];
+      }),
+      queryItems: () => [],
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "妈手机号是 13800138000" });
+    const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
+    const r = await engine.ask("妈手机号是多少");
+    expect(fakeVault.searchPersons).toHaveBeenCalledWith({ q: "妈", limit: 19 });
+    expect(fakeVault.queryPersons).not.toHaveBeenCalled(); // search hit → skip fallback
+    expect(r.facts.filter((f) => f.type === "person").length).toBe(1);
+    expect(r.facts.find((f) => f.id === "p-mom")).toBeDefined();
+  });
+  it("entityFocus=persons with name candidate but 0 search hits → falls back to queryPersons", async () => {
+    const fakeVault = {
+      queryEvents: () => [],
+      queryPersons: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
+        id: "p" + i, type: "person", subtype: "contact", names: ["P" + i], ingestedAt: 0,
+        source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
+      }))),
+      searchPersons: vi.fn(() => []), // 0 hits
+      queryItems: () => [],
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "" });
+    const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
+    await engine.ask("张三的电话号码");
+    expect(fakeVault.searchPersons).toHaveBeenCalled();
+    expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 19 });
+  });
+  it("entityFocus=persons without name candidate (pure list) skips searchPersons", async () => {
+    const fakeVault = {
+      queryEvents: () => [],
+      queryPersons: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
+        id: "p" + i, type: "person", subtype: "contact", names: ["P" + i], ingestedAt: 0,
+        source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
+      }))),
+      searchPersons: vi.fn(() => []),
+      queryItems: () => [],
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "" });
+    const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
+    await engine.ask("我有哪些联系人");
+    // Pure list — no name in question → skip searchPersons, go straight to queryPersons.
+    expect(fakeVault.searchPersons).not.toHaveBeenCalled();
+    expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 19 });
+  });
+  it("entityFocus=persons tolerates vault without searchPersons (legacy)", async () => {
+    const fakeVault = {
+      queryEvents: () => [],
+      queryPersons: vi.fn(({ limit }) => Array.from({ length: Math.min(limit, 3) }, (_, i) => ({
+        id: "p" + i, type: "person", subtype: "contact", names: ["P" + i], ingestedAt: 0,
+        source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
+      }))),
+      // No searchPersons method
+      queryItems: () => [],
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "" });
+    const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
+    const r = await engine.ask("妈手机号");
+    expect(fakeVault.queryPersons).toHaveBeenCalled();
+    expect(r.facts.filter((f) => f.type === "person").length).toBe(3);
+  });
+  it("entityFocus=items prioritizes items table over events", async () => {
+    const fakeVault = {
+      queryEvents: () => Array.from({ length: 100 }, (_, i) => ({
+        id: "e" + i, type: "event", subtype: "browse",
+        occurredAt: Date.now(), actor: "self",
+        ingestedAt: Date.now(),
+        source: { adapter: "browser-history", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      })),
+      queryPersons: () => [],
+      queryItems: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
+        id: "i" + i, type: "item", subtype: "other", name: "App" + i,
+        ingestedAt: Date.now(),
+        source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
+      }))),
+      getEvent: () => null,
+      audit: () => {},
+    };
+    const llm = new MockLLMClient({ reply: "" });
+    const engine = new AnalysisEngine({
+      vault: fakeVault, llm,
+      maxFacts: 20, maxQueryLimit: 50,
+    });
+    const r = await engine.ask("我装了哪些 app");
+    expect(r.facts.filter((f) => f.type === "item").length).toBe(19);
+    expect(fakeVault.queryItems).toHaveBeenCalledWith({ limit: 19 });
   });
 });
@@ -682,10 +905,12 @@ describe("AnalysisEngine per-call budget overrides", () => {
     const llm = { isLocal: true, chat: () => { throw new Error("nope"); } };
     const engine = new AnalysisEngine({ vault: fakeVault, llm });
     const r = await engine.retrieveContext("hi", { maxFacts: 10, maxQueryLimit: 50 });
-    // _gatherFacts returns 50 events, but buildPrompt caps factCount to maxFacts=10.
-    // `truncated` is a count of dropped facts, not a boolean.
+    // 2026-05-27 fix: _gatherFacts now respects effMaxFacts upstream
+    // (events would have overflowed → reservation branch; persons/items
+    // returned [] → refill back to events.slice(0,10)). buildPrompt sees
+    // exactly 10 facts, nothing to truncate.
     expect(r.factCount).toBe(10);
-    expect(r.truncated).toBe(40); // 50 gathered - 10 kept = 40 truncated
+    expect(r.truncated).toBe(0);
   });
   it("retrieveContext() honors options.maxFacts and options.maxQueryLimit", async () => {

package/__tests__/prompt-builder.test.js CHANGED Viewed

@@ -38,20 +38,40 @@ describe("summarizeFact", () => {
     expect(s).not.toHaveProperty("extra");
   });
-  it("packs person names + relation; omits source/identifiers", () => {
+  it("packs person names + relation + identifiers + notes; omits source", () => {
+    // 2026-05-27 — identifiers (phone/wechatId/email) MUST reach the LLM,
+    // otherwise "妈手机号是多少" can never be answered even when vault has
+    // the phone. notes too — they're user-written context. source/ingestedAt
+    // are framing metadata, not user data, so still stripped.
     const p = summarizePerson({
       id: "p1",
       type: "person",
       subtype: "contact",
       names: ["妈妈", "陈某某"],
       relation: "母亲",
-      identifiers: { phone: ["13800001111"] },
+      identifiers: { phone: ["13800001111"], wechatId: "wxid_abc" },
+      notes: "best mom ever",
       ingestedAt: 1,
       source: { adapter: "x", adapterVersion: "0.1.0", capturedAt: 1, capturedBy: "api" },
     });
     expect(p.names).toEqual(["妈妈", "陈某某"]);
     expect(p.relation).toBe("母亲");
+    expect(p.identifiers).toEqual({ phone: ["13800001111"], wechatId: "wxid_abc" });
+    expect(p.notes).toBe("best mom ever");
+    expect(p).not.toHaveProperty("source");
+    expect(p).not.toHaveProperty("ingestedAt");
+  });
+  it("omits identifiers / notes fields when absent on the person row", () => {
+    const p = summarizePerson({
+      id: "p2",
+      type: "person",
+      subtype: "contact",
+      names: ["路人甲"],
+    });
     expect(p).not.toHaveProperty("identifiers");
+    expect(p).not.toHaveProperty("notes");
+    expect(p).not.toHaveProperty("relation");
   });
   it("returns null for non-object / unknown types just yields minimal shape", () => {

package/__tests__/query-parser.test.js CHANGED Viewed

@@ -7,7 +7,9 @@ const {
   parseTimeWindow,
   parseFilters,
   parseIntent,
+  parseEntityFocus,
   extractEntityTerm,
+  extractPersonNameCandidate,
 } = require("../lib/query-parser");
 // Pin "now" to 2026-05-19 12:00:00 UTC for deterministic windows
@@ -126,6 +128,38 @@ describe("parseIntent", () => {
   });
 });
+describe("parseEntityFocus", () => {
+  it("returns 'persons' for 联系人 / 通讯录 phrasing", () => {
+    expect(parseEntityFocus("我有哪些联系人")).toBe("persons");
+    expect(parseEntityFocus("通讯录里有多少人")).toBe("persons");
+    expect(parseEntityFocus("好友列表谁是张三")).toBe("persons");
+  });
+  it("returns 'persons' for phone-number phrasing", () => {
+    expect(parseEntityFocus("妈手机号是多少")).toBe("persons");
+    expect(parseEntityFocus("王医生的电话号码")).toBe("persons");
+    expect(parseEntityFocus("show me my contacts")).toBe("persons");
+  });
+  it("returns 'items' for installed-app phrasing", () => {
+    expect(parseEntityFocus("我装了哪些 app")).toBe("items");
+    expect(parseEntityFocus("有哪些游戏")).toBe("items");
+    expect(parseEntityFocus("installed apps")).toBe("items");
+  });
+  it("returns null when no focus signal", () => {
+    expect(parseEntityFocus("上个月在淘宝花了多少")).toBeNull();
+    expect(parseEntityFocus("最近的订单")).toBeNull();
+    expect(parseEntityFocus("hello")).toBeNull();
+  });
+  it("returns null for non-string / empty input", () => {
+    expect(parseEntityFocus("")).toBeNull();
+    expect(parseEntityFocus(null)).toBeNull();
+    expect(parseEntityFocus(undefined)).toBeNull();
+  });
+});
 describe("parseQuery (integration)", () => {
   it("full parse for spending question", () => {
     const r = parseQuery("上个月在淘宝总共花了多少钱？", { now: NOW });
@@ -133,6 +167,13 @@ describe("parseQuery (integration)", () => {
     expect(r.filters.subtype).toBe("payment");
     expect(r.filters.adapter).toBe("taobao");
     expect(r.intent).toBe("sum-amount");
+    expect(r.entityFocus).toBeNull();
+  });
+  it("contact question carries entityFocus=persons", () => {
+    const r = parseQuery("我有哪些联系人", { now: NOW });
+    expect(r.entityFocus).toBe("persons");
+    expect(r.intent).toBe("list");
   });
   it("full parse for footprint question", () => {
@@ -214,3 +255,48 @@ describe("extractEntityTerm", () => {
     expect(r).toBeNull();
   });
 });
+// ─── extractPersonNameCandidate — persons-branch name search ─────────────
+//
+// 2026-05-27 — Powers AnalysisEngine entityFocus=persons name-search
+// short-circuit. Differs from extractEntityTerm in two ways: strips
+// person-FOCUS framing words first (联系人/手机号/etc.) and allows
+// single-char Chinese names from a relation whitelist (妈/爸/姐/...).
+describe("extractPersonNameCandidate", () => {
+  it("extracts multi-char name when present", () => {
+    expect(extractPersonNameCandidate("张三的电话号码")).toBe("张三");
+    expect(extractPersonNameCandidate("王医生手机号是多少")).toBe("王医生");
+  });
+  it("falls back to single-char relation word ('妈', '爸', '姐')", () => {
+    expect(extractPersonNameCandidate("妈手机号是多少")).toBe("妈");
+    expect(extractPersonNameCandidate("爸的电话")).toBe("爸");
+    expect(extractPersonNameCandidate("姐姐的号码")).toBe("姐姐");
+  });
+  it("multi-char wins over single-char fallback", () => {
+    // "王医生" (3 char) preferred over leaked single "医" / "生".
+    expect(extractPersonNameCandidate("王医生的手机号")).toBe("王医生");
+  });
+  it("returns null when no name candidate (pure framing)", () => {
+    expect(extractPersonNameCandidate("我有哪些联系人")).toBeNull();
+    expect(extractPersonNameCandidate("通讯录里有多少人")).toBeNull();
+  });
+  it("ignores single-char Chinese outside the relation whitelist", () => {
+    // "说" / "看" are not relation chars — should NOT slip through as names.
+    expect(extractPersonNameCandidate("说手机号")).toBeNull();
+  });
+  it("returns null for non-string / empty input", () => {
+    expect(extractPersonNameCandidate("")).toBeNull();
+    expect(extractPersonNameCandidate(null)).toBeNull();
+    expect(extractPersonNameCandidate(undefined)).toBeNull();
+  });
+  it("handles ASCII names ≥2 chars", () => {
+    expect(extractPersonNameCandidate("Alice 的电话号码")).toBe("Alice");
+  });
+});

package/__tests__/vault.test.js CHANGED Viewed

@@ -425,6 +425,94 @@ describe("LocalVault.queryEvents + countEvents", () => {
   });
 });
+// ─── searchPersons (LIKE name search) ────────────────────────────────────
+//
+// 2026-05-27 — AnalysisEngine entityFocus="persons" routes to searchPersons
+// when the question carries a name candidate ("妈手机号", "张三的电话").
+// LIKE on names / identifiers / notes / relation, no FTS5 migration.
+describe("LocalVault.searchPersons", () => {
+  it("matches against names column (JSON-serialized array)", () => {
+    freshVault();
+    vault.putPerson(personOk({ names: ["妈妈", "陈某某"] }));
+    vault.putPerson(personOk({ names: ["张三"] }));
+    vault.putPerson(personOk({ names: ["王医生"] }));
+    const r = vault.searchPersons({ q: "妈" });
+    expect(r.length).toBe(1);
+    expect(r[0].names).toContain("妈妈");
+  });
+  it("matches against identifiers (phone numbers)", () => {
+    freshVault();
+    vault.putPerson(personOk({
+      names: ["张三"],
+      identifiers: { phone: ["13800001111"] },
+    }));
+    vault.putPerson(personOk({
+      names: ["李四"],
+      identifiers: { phone: ["13900002222"] },
+    }));
+    const r = vault.searchPersons({ q: "13800" });
+    expect(r.length).toBe(1);
+    expect(r[0].names).toContain("张三");
+  });
+  it("matches against notes + relation", () => {
+    freshVault();
+    vault.putPerson(personOk({
+      names: ["陈某某"], relation: "母亲", notes: "best mom ever",
+    }));
+    vault.putPerson(personOk({ names: ["路人甲"], relation: "stranger" }));
+    expect(vault.searchPersons({ q: "母亲" }).length).toBe(1);
+    expect(vault.searchPersons({ q: "best mom" }).length).toBe(1);
+  });
+  it("empty q delegates to queryPersons (ingest-ordered)", () => {
+    freshVault();
+    vault.putPerson(personOk({ names: ["A"] }));
+    vault.putPerson(personOk({ names: ["B"] }));
+    vault.putPerson(personOk({ names: ["C"] }));
+    const r = vault.searchPersons({ q: "", limit: 2 });
+    expect(r.length).toBe(2);
+  });
+  it("LIKE meta-characters in user input are escaped (no wildcard injection)", () => {
+    freshVault();
+    vault.putPerson(personOk({ names: ["100%棉"] }));
+    vault.putPerson(personOk({ names: ["AAA"] }));
+    // "100%" should match only the literal "100%棉" row, not everything.
+    const r = vault.searchPersons({ q: "100%" });
+    expect(r.length).toBe(1);
+    expect(r[0].names).toContain("100%棉");
+  });
+  it("respects subtype + adapter filters", () => {
+    freshVault();
+    vault.putPerson(personOk({
+      subtype: "contact", names: ["张三"],
+      source: source({ adapter: "wechat" }),
+    }));
+    vault.putPerson(personOk({
+      subtype: "merchant", names: ["张三"],
+      source: source({ adapter: "system-data-android" }),
+    }));
+    expect(vault.searchPersons({ q: "张三", subtype: "merchant" }).length).toBe(1);
+    expect(vault.searchPersons({ q: "张三", adapter: "wechat" }).length).toBe(1);
+  });
+  it("returns empty array when no match", () => {
+    freshVault();
+    vault.putPerson(personOk({ names: ["张三"] }));
+    expect(vault.searchPersons({ q: "完全不存在的名字" })).toEqual([]);
+  });
+});
 // ─── sync watermarks ──────────────────────────────────────────────────────
 describe("LocalVault sync watermarks", () => {

package/lib/adapters/ai-chat-history/health-checker.js CHANGED Viewed

@@ -172,7 +172,18 @@ function createAIChatHealthChecker({
           deps.logger.error("[aichat-health] interval run failed", err && err.message),
         );
       }, intervalMs);
+      // Don't keep the event loop alive on the periodic check alone. Without
+      // unref a one-shot `cc hub list-adapters --json` from in-APK Android
+      // sits idle in epoll_wait until Kotlin LocalCcRunner.waitFor 240s
+      // timeout → false "写入本地数据库失败". Real-device repro 2026-05-27
+      // Xiaomi 24115RA8EC (PID 24828 lingered with vault.db RW handles).
+      if (intervalHandle && typeof intervalHandle.unref === "function") {
+        intervalHandle.unref();
+      }
     }, firstRunDelayMs);
+    if (firstRunHandle && typeof firstRunHandle.unref === "function") {
+      firstRunHandle.unref();
+    }
     return true;
   }

package/lib/analysis.js CHANGED Viewed

@@ -21,7 +21,7 @@
 "use strict";
-const { parseQuery, extractEntityTerm } = require("./query-parser");
+const { parseQuery, extractEntityTerm, extractPersonNameCandidate } = require("./query-parser");
 const {
   buildPrompt,
   parseCitations,
@@ -61,6 +61,27 @@ const SUM_AMOUNT_SUBTYPES = ["order", "payment", "transfer", "income"];
 // 12) doesn't starve any single subtype.
 const SUM_AMOUNT_MIN_PER_SUBTYPE = 20;
+// entityFocus="persons" routing — explicit contact queries ("我有哪些联系人",
+// "妈手机号"). When the user names the target table the engine MUST NOT
+// compete persons against the events pool: small-model Android budgets
+// (20 facts / 50 row cap) get drained by a few hundred Bilibili
+// notifications and the contact slice ends up empty. parseEntityFocus
+// surfaces the signal; we honor it by going persons-first.
+//
+// Keep a TINY events headroom (5%) so questions like "我最近跟妈打过电话吗"
+// still surface 通话 event rows alongside the contact entry.
+const PERSONS_FOCUS_EVENT_HEADROOM_RATIO = 0.05;
+// Default-path budget split when no entityFocus signal. Pre-fix events
+// got the entire effMaxFacts pool first and persons/items shared only the
+// remainder; on a busy vault that meant 0 contacts in the prompt. Cap
+// events at 70%, reserve 20% for persons and 10% for items so a generic
+// "what's going on" question still sees the full data shape.
+const DEFAULT_EVENT_BUDGET_RATIO = 0.7;
+const DEFAULT_PERSON_BUDGET_RATIO = 0.2;
+// Items take whatever remains; intent=count/list questions about contacts
+// already short-circuit via entityFocus before reaching this branch.
 class AnalysisEngine {
   /**
    * @param {object} opts
@@ -426,6 +447,88 @@ class AnalysisEngine {
       // 0 results → fall through to default broader path below.
     }
+    // entityFocus=persons routing — "我有哪些联系人", "妈手机号", "通讯录里
+    // 有多少人". Skip the events broad scan and put the entire fact budget
+    // on the persons table (with a 5% events headroom for adjacent rows
+    // like 通话/短信). Adapter / time window are NOT applied to persons:
+    // contacts are current-state snapshots, not time-stamped events.
+    //
+    // 0 hits → fall through to the default path. A user might say "联系人"
+    // colloquially when they mean "people I've messaged" — the default
+    // events+persons mix is the right safety net.
+    if (parsed.entityFocus === "persons") {
+      const personLimit = effMaxFacts > 1 ? effMaxFacts - 1 : effMaxFacts;
+      let persons = [];
+      // Name-search short-circuit — when the question carries a probable
+      // person-name candidate ("妈手机号", "张三的电话"), try LIKE-search
+      // against names / identifiers / notes / relation. Hits go straight
+      // to FACTS so the LLM sees the target contact even when the vault
+      // holds hundreds of others. Falls back to ingest-ordered queryPersons
+      // when 0 hits or no name candidate.
+      const nameCandidate = extractPersonNameCandidate(parsed.raw);
+      if (nameCandidate && typeof this.vault.searchPersons === "function") {
+        try {
+          persons = this.vault.searchPersons({ q: nameCandidate, limit: personLimit });
+        } catch (_e) { /* tolerate — try ingest-ordered fallback */ }
+      }
+      if (persons.length === 0) {
+        try {
+          persons = this.vault.queryPersons({ limit: personLimit });
+        } catch (_e) {
+          // legacy vault — fall through
+        }
+      }
+      if (persons.length > 0) {
+        const eventHeadroom = Math.max(
+          0,
+          Math.floor(effMaxFacts * PERSONS_FOCUS_EVENT_HEADROOM_RATIO)
+        );
+        let events = [];
+        if (eventHeadroom > 0) {
+          const eq = { limit: eventHeadroom };
+          if (parsed.filters && parsed.filters.adapter) eq.adapter = parsed.filters.adapter;
+          if (parsed.timeWindow) {
+            if (Number.isFinite(parsed.timeWindow.since)) eq.since = parsed.timeWindow.since;
+            if (Number.isFinite(parsed.timeWindow.until)) eq.until = parsed.timeWindow.until;
+          }
+          try {
+            events = this.vault.queryEvents(eq);
+          } catch (_e) { /* tolerate */ }
+        }
+        // persons-first ordering so the LLM reads the contact rows before
+        // the (sparse) event tail.
+        const combined = [...persons, ...events].slice(0, effMaxFacts);
+        return combined;
+      }
+      // 0 persons → fall through.
+    }
+    // entityFocus=items routing — "我装了哪些 app", "有哪些游戏". Mirror
+    // persons branch: skip events, query items table directly, keep a
+    // tiny events headroom for adjacent rows.
+    if (parsed.entityFocus === "items") {
+      const itemLimit = effMaxFacts > 1 ? effMaxFacts - 1 : effMaxFacts;
+      let items = [];
+      try {
+        items = this.vault.queryItems({ limit: itemLimit });
+      } catch (_e) { /* legacy */ }
+      if (items.length > 0) {
+        const eventHeadroom = Math.max(
+          0,
+          Math.floor(effMaxFacts * PERSONS_FOCUS_EVENT_HEADROOM_RATIO)
+        );
+        let events = [];
+        if (eventHeadroom > 0) {
+          const eq = { limit: eventHeadroom };
+          if (parsed.filters && parsed.filters.adapter) eq.adapter = parsed.filters.adapter;
+          try {
+            events = this.vault.queryEvents(eq);
+          } catch (_e) { /* tolerate */ }
+        }
+        return [...items, ...events].slice(0, effMaxFacts);
+      }
+    }
     // intent=sum-amount routing — "总共花了多少" / "在淘宝花了多少钱"
     // only needs events from amount-bearing subtypes (order/payment/
     // transfer/income). Pulling messages / visits / browses wastes
@@ -551,22 +654,40 @@ class AnalysisEngine {
     //  - installed apps land in `items`, not `events`
     //  - places (visited locations) live in `places`
     // Without these the LLM gets 0 facts for "我有几个联系人" style questions
-    // and hallucinates a count. We pull a bounded slice of each entity type
-    // and append; prompt-builder.summarizeFact already handles `person` /
-    // `place` / fallback `item` shapes, so this is additive with no schema
-    // change to the LLM-facing prompt.
+    // and hallucinates a count.
+    //
+    // Sizing — two regimes:
+    //  (a) Events fit (events.length < effMaxFacts): legacy behavior —
+    //      events first, split the remainder evenly between persons + items.
+    //  (b) Events would monopolize (events.length >= effMaxFacts): reserve
+    //      DEFAULT_PERSON_BUDGET_RATIO (20%) + 10% for persons + items so a
+    //      busy event timeline doesn't shove every contact out of the prompt.
+    //      If persons + items tables BOTH return 0 rows, refill the reserve
+    //      with events — no point starving the LLM of facts when the side
+    //      tables are empty (small vaults / pre-Path-C ingest state).
     //
-    // Sizing: keep events as the majority (existing behavior is unchanged for
-    // event-heavy queries like 消费 / 通话); split the remaining 1/2 budget
-    // between persons + items. Time window + adapter filters don't apply to
-    // these tables (persons aren't time-stamped events) — they're current-
-    // state snapshots that should always be visible. Adapter filter is also
-    // skipped because users asking "我有几个联系人" don't say "from
-    // system-data-android".
-    const remaining = Math.max(0, effMaxFacts - events.length);
-    const sideBudget = Math.floor(remaining / 2);
-    const personBudget = sideBudget > 0 ? sideBudget : 0;
-    const itemBudget = remaining - personBudget;
+    // Time window + adapter filters don't apply to persons/items: they're
+    // current-state snapshots, not time-stamped events. A user asking
+    // "上个月联系人变化" is rare enough to leave for a future intent.
+    let cappedEvents = events;
+    let personBudget;
+    let itemBudget;
+    if (events.length >= effMaxFacts) {
+      const personReserve = Math.max(1, Math.floor(effMaxFacts * DEFAULT_PERSON_BUDGET_RATIO));
+      const itemReserve = Math.max(
+        1,
+        Math.floor(effMaxFacts * (1 - DEFAULT_EVENT_BUDGET_RATIO - DEFAULT_PERSON_BUDGET_RATIO))
+      );
+      const eventCap = Math.max(1, effMaxFacts - personReserve - itemReserve);
+      cappedEvents = events.slice(0, eventCap);
+      personBudget = personReserve;
+      itemBudget = itemReserve;
+    } else {
+      const remaining = effMaxFacts - events.length;
+      const sideBudget = Math.floor(remaining / 2);
+      personBudget = sideBudget > 0 ? sideBudget : 0;
+      itemBudget = remaining - personBudget;
+    }
     let persons = [];
     if (personBudget > 0) {
@@ -585,7 +706,20 @@ class AnalysisEngine {
       }
     }
-    return [...events, ...persons, ...items];
+    // Refill backfill — when events overflowed (reservation branch) but
+    // persons + items both returned 0 rows, give the reserved slots back
+    // to events. Small vaults / pre-Path-C state would otherwise see fewer
+    // facts than the budget allowed.
+    if (
+      events.length >= effMaxFacts &&
+      persons.length === 0 &&
+      items.length === 0 &&
+      cappedEvents.length < effMaxFacts
+    ) {
+      cappedEvents = events.slice(0, effMaxFacts);
+    }
+    return [...cappedEvents, ...persons, ...items];
   }
   /**
@@ -630,4 +764,7 @@ module.exports = {
   LIST_INTENT_FTS_LIMIT,
   SUM_AMOUNT_SUBTYPES,
   SUM_AMOUNT_MIN_PER_SUBTYPE,
+  PERSONS_FOCUS_EVENT_HEADROOM_RATIO,
+  DEFAULT_EVENT_BUDGET_RATIO,
+  DEFAULT_PERSON_BUDGET_RATIO,
 };

package/lib/prompt-builder.js CHANGED Viewed

@@ -67,12 +67,20 @@ function summarizeEvent(e) {
 }
 function summarizePerson(p) {
+  // 2026-05-27 — include identifiers (phone / wechatId / email / etc.) +
+  // notes in the LLM-facing summary. Without this, asking "妈手机号是多少"
+  // ships only names+relation to the LLM and it can't possibly answer.
+  // Person rows are dense — keep all identifying fields. The LLM sees this
+  // verbatim under FACTS so user-visible privacy is the same as the user
+  // querying their own vault (which is the whole point of PDH).
   return {
     id: p.id,
     type: "person",
     subtype: p.subtype,
     names: p.names,
     ...(p.relation ? { relation: p.relation } : {}),
+    ...(p.identifiers ? { identifiers: p.identifiers } : {}),
+    ...(p.notes ? { notes: p.notes } : {}),
   };
 }

package/lib/query-parser.js CHANGED Viewed

@@ -219,6 +219,42 @@ function parseIntent(text) {
   return "list";
 }
+// ─── Entity-focus detection (persons / items routing) ────────────────────
+//
+// 2026-05-27 — Bug: user asked "我有哪些联系人" / "我妈手机号" several times;
+// vault held real contacts but the LLM kept replying "没数据" because the
+// default _gatherFacts pulled 200 row-cap of events first and the persons
+// slice got squeezed out of the small-model 20-fact budget. parseIntent
+// already catches "几个 X" as count, but that doesn't tell the engine WHICH
+// table the user means. parseEntityFocus is the missing signal: when the
+// question is explicitly about contacts/apps, the engine prioritizes that
+// table instead of competing with events.
+//
+// Returns null when no focus signal — engine falls back to the existing
+// events-majority + persons/items remainder behavior.
+//
+// Memory: pdh_analysis_engine_intent_routing.md.
+const PERSON_FOCUS_PATTERNS = [
+  /(联系人|通讯录|电话簿|通信录|好友列表|朋友列表)/,
+  /(手机号|电话号|号码是|的电话|的手机)/,
+  /(谁是|是谁|是什么人)/,
+  /\b(contact|contacts|phonebook|address\s*book|phone\s*number)\b/i,
+];
+const ITEM_FOCUS_PATTERNS = [
+  /(装了|安装了|装过|下了什么|下载了什么|有哪些(app|应用|软件|游戏))/i,
+  /(我的(app|应用|软件)|哪些(app|应用|软件|游戏))/i,
+  /\b(installed\s+apps?|my\s+apps?|installed\s+packages?)\b/i,
+];
+function parseEntityFocus(text) {
+  if (typeof text !== "string" || text.length === 0) return null;
+  if (PERSON_FOCUS_PATTERNS.some((re) => re.test(text))) return "persons";
+  if (ITEM_FOCUS_PATTERNS.some((re) => re.test(text))) return "items";
+  return null;
+}
 // ─── Entity-name extraction (FTS5 fulltext routing) ────────────────────
 //
 // Pull a probable entity-name candidate out of the raw question so
@@ -291,6 +327,56 @@ function extractEntityTerm(text) {
   return candidates[0];
 }
+// ─── Person-name extraction (entityFocus=persons routing) ────────────────
+//
+// Specialized extractor for the persons branch in AnalysisEngine. Differs
+// from extractEntityTerm in two ways:
+//
+//  1. Strips person-FOCUS framing words first (联系人/手机号/电话/etc.) —
+//     they're question scaffolding, not the target name. extractEntityTerm
+//     left "妈手机号" intact because it doesn't know that phrase is framing.
+//
+//  2. Allows single-character names from a relation-word whitelist
+//     (妈/爸/姐/弟/...) — extractEntityTerm filtered every 1-char Chinese to
+//     suppress verb false positives, but that also dropped "妈" / "爸" which
+//     are the dominant contact-name shorthands on a personal phonebook.
+//
+// Multi-char candidates always win over single-char fallback so "张三的
+// 手机号" returns "张三" not "三".
+const PERSON_FRAMING_STOP_PATTERNS = [
+  /(联系人|通讯录|电话簿|通信录|好友列表|朋友列表)/g,
+  /(手机号|电话号|号码是|的电话|的手机|号码|电话)/g,
+  /(谁是|是谁|是什么人|是哪位)/g,
+  /\b(contact|contacts|phonebook|address\s*book|phone\s*number)\b/gi,
+];
+// Whitelisted single-character Chinese relation words. Single-char tokens
+// outside this set are dropped to keep verb / particle false-positives from
+// leaking through. Extend cautiously — every new char widens the LIKE
+// surface area and could match unrelated rows.
+const PERSON_RELATION_SINGLE_CHARS_RE =
+  /^[妈爸姐妹哥弟爹娘爷奶姥舅姑叔伯婶嫂嫁公婆]$/;
+function extractPersonNameCandidate(text) {
+  if (typeof text !== "string" || text.length === 0) return null;
+  let s = text;
+  for (const re of PERSON_FRAMING_STOP_PATTERNS) {
+    s = s.replace(re, " ");
+  }
+  for (const re of ENTITY_STOP_PATTERNS) {
+    s = s.replace(re, " ");
+  }
+  const all = s.split(/\s+/).filter((t) => t.length >= 1 && t.length <= 10);
+  if (all.length === 0) return null;
+  const multi = all
+    .filter((t) => t.length >= 2)
+    .sort((a, b) => b.length - a.length);
+  if (multi.length > 0) return multi[0];
+  const single = all.find((t) => t.length === 1 && PERSON_RELATION_SINGLE_CHARS_RE.test(t));
+  return single || null;
+}
 // ─── Full parser ─────────────────────────────────────────────────────────
 /**
@@ -314,6 +400,7 @@ function parseQuery(question, opts = {}) {
     timeWindow: parseTimeWindow(raw, now),
     filters: parseFilters(raw),
     intent: parseIntent(raw),
+    entityFocus: parseEntityFocus(raw),
   };
 }
@@ -322,9 +409,15 @@ module.exports = {
   parseTimeWindow,
   parseFilters,
   parseIntent,
+  parseEntityFocus,
   extractEntityTerm,
+  extractPersonNameCandidate,
   // exposed for tests
   SUBTYPE_KEYWORDS,
   ADAPTER_KEYWORDS,
+  PERSON_FOCUS_PATTERNS,
+  ITEM_FOCUS_PATTERNS,
   ENTITY_STOP_PATTERNS,
+  PERSON_FRAMING_STOP_PATTERNS,
+  PERSON_RELATION_SINGLE_CHARS_RE,
 };

package/lib/vault.js CHANGED Viewed

@@ -865,6 +865,70 @@ class LocalVault {
       .map((row) => this._rowToPerson(row));
   }
+  /**
+   * searchPersons — LIKE-based name/identifier/notes search.
+   *
+   * 2026-05-27 — AnalysisEngine entityFocus="persons" path uses this when the
+   * question carries a probable person-name candidate ("妈手机号", "张三的电话").
+   * Pre-fix the engine dumped the first N contacts by ingest_at and let the
+   * LLM scan — but on small-model (Qwen 0.5B/1.5B, 20-fact budget) and large
+   * contact tables (100+), the target person rarely landed in the slice.
+   * Searching by LIKE %term% against the JSON-serialized `names` column +
+   * `identifiers` (phone numbers) + `notes` + `relation` gives the LLM the
+   * matching contact directly, eliminating that miss.
+   *
+   * No FTS5 schema migration: contact tables are small (typically <2000
+   * rows on Android), full LIKE scan stays sub-millisecond. Sticking with
+   * LIKE also avoids partial-index drift trap #25.
+   *
+   * @param {object} q
+   * @param {string} q.q          term to match. Falls back to queryPersons when empty.
+   * @param {string} [q.subtype]
+   * @param {string} [q.adapter]
+   * @param {number} [q.limit=100]
+   * @param {number} [q.offset=0]
+   */
+  searchPersons(q = {}) {
+    const term = typeof q.q === "string" ? q.q.trim() : "";
+    if (term.length === 0) {
+      return this.queryPersons(q);
+    }
+    const where = [];
+    const params = {};
+    // LIKE-escape % and _ in the user input so a name with literal % won't
+    // wildcard. SQLite LIKE ESCAPE clause handles this.
+    const escaped = term.replace(/([\\%_])/g, "\\$1");
+    params.qPat = "%" + escaped + "%";
+    where.push(
+      "(" +
+        "names LIKE @qPat ESCAPE '\\' OR " +
+        "identifiers LIKE @qPat ESCAPE '\\' OR " +
+        "notes LIKE @qPat ESCAPE '\\' OR " +
+        "relation LIKE @qPat ESCAPE '\\'" +
+        ")"
+    );
+    if (q.subtype) {
+      where.push("subtype = @subtype");
+      params.subtype = q.subtype;
+    }
+    if (q.adapter) {
+      where.push("source_adapter = @adapter");
+      params.adapter = q.adapter;
+    }
+    const limit = Number.isInteger(q.limit) && q.limit > 0 ? Math.min(q.limit, 10000) : 100;
+    const offset = Number.isInteger(q.offset) && q.offset >= 0 ? q.offset : 0;
+    params.limit = limit;
+    params.offset = offset;
+    const sql =
+      "SELECT * FROM persons WHERE " + where.join(" AND ") +
+      " ORDER BY (confidence IS NULL) ASC, confidence DESC, ingested_at DESC" +
+      " LIMIT @limit OFFSET @offset";
+    return this._requireOpen()
+      .prepare(sql)
+      .all(params)
+      .map((row) => this._rowToPerson(row));
+  }
   /**
    * queryItems — list item entities (installed apps, purchases, media...).
    * Pairs with queryPersons for AnalysisEngine fact gathering.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@chainlesschain/personal-data-hub",
-  "version": "0.3.7",
+  "version": "0.3.9",
   "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
   "type": "commonjs",
   "main": "lib/index.js",