@chainlesschain/personal-data-hub 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/analysis.test.js +239 -14
- package/__tests__/prompt-builder.test.js +22 -2
- package/__tests__/query-parser.test.js +86 -0
- package/__tests__/vault.test.js +88 -0
- package/lib/adapters/ai-chat-history/health-checker.js +11 -0
- package/lib/analysis.js +154 -17
- package/lib/prompt-builder.js +8 -0
- package/lib/query-parser.js +93 -0
- package/lib/vault.js +64 -0
- package/package.json +1 -1
|
@@ -403,11 +403,16 @@ describe("AnalysisEngine.ask cache bypass", () => {
|
|
|
403
403
|
// + items into facts within the maxFacts budget.
|
|
404
404
|
|
|
405
405
|
describe("AnalysisEngine._gatherFacts includes persons and items", () => {
|
|
406
|
-
it("
|
|
406
|
+
it("contact question routes via entityFocus=persons — persons only, no items competition", async () => {
|
|
407
407
|
freshVault();
|
|
408
|
-
//
|
|
409
|
-
//
|
|
410
|
-
//
|
|
408
|
+
// 2026-05-27 fix: "我有几个联系人" now matches parseEntityFocus → "persons",
|
|
409
|
+
// which intentionally skips the items table to give the full prompt
|
|
410
|
+
// budget to contacts. Pre-fix this test asserted 5 persons + 3 items
|
|
411
|
+
// (8 facts) because _gatherFacts always pulled both tables; post-fix
|
|
412
|
+
// items are deliberately excluded — the user asked about contacts, not
|
|
413
|
+
// apps. Items still surface for generic "what's in my vault" questions
|
|
414
|
+
// (entityFocus=null) and for explicit "我装了哪些 app" (entityFocus=
|
|
415
|
+
// "items"). Verified at __tests__:_gatherFacts entityFocus routing.
|
|
411
416
|
const fakeVault = {
|
|
412
417
|
queryEvents: () => [],
|
|
413
418
|
queryPersons: ({ limit }) => {
|
|
@@ -448,9 +453,8 @@ describe("AnalysisEngine._gatherFacts includes persons and items", () => {
|
|
|
448
453
|
const llm = new MockLLMClient({ reply: "你共有 5 个联系人" });
|
|
449
454
|
const engine = new AnalysisEngine({ vault: fakeVault, llm });
|
|
450
455
|
const r = await engine.ask("我有几个联系人");
|
|
451
|
-
expect(r.facts.length).toBe(8); // 0 events + 5 persons + 3 items
|
|
452
456
|
expect(r.facts.filter((f) => f.type === "person").length).toBe(5);
|
|
453
|
-
expect(r.facts.filter((f) => f.type === "item").length).toBe(
|
|
457
|
+
expect(r.facts.filter((f) => f.type === "item").length).toBe(0);
|
|
454
458
|
});
|
|
455
459
|
|
|
456
460
|
it("respects maxFacts budget — events get majority, persons + items split remainder", async () => {
|
|
@@ -498,7 +502,11 @@ describe("AnalysisEngine._gatherFacts includes persons and items", () => {
|
|
|
498
502
|
expect(r.warning).toBe("no-facts");
|
|
499
503
|
});
|
|
500
504
|
|
|
501
|
-
it("events
|
|
505
|
+
it("events overflow + empty side tables → events refill the reserved slots", async () => {
|
|
506
|
+
// 2026-05-27 fix: when events would monopolize effMaxFacts the engine
|
|
507
|
+
// reserves slots for persons + items; if BOTH side tables return 0 rows
|
|
508
|
+
// the reserve is refilled with events so a contact-less vault still
|
|
509
|
+
// sees the full event budget.
|
|
502
510
|
const fakeVault = {
|
|
503
511
|
queryEvents: () => Array.from({ length: 80 }, (_, i) => ({
|
|
504
512
|
id: "e" + i, type: "event", subtype: "order",
|
|
@@ -515,10 +523,225 @@ describe("AnalysisEngine._gatherFacts includes persons and items", () => {
|
|
|
515
523
|
const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 80 });
|
|
516
524
|
const r = await engine.ask("hi");
|
|
517
525
|
expect(r.facts.length).toBe(80);
|
|
518
|
-
|
|
519
|
-
//
|
|
520
|
-
expect(fakeVault.queryPersons).
|
|
521
|
-
expect(fakeVault.queryItems).
|
|
526
|
+
expect(r.facts.filter((f) => f.type === "event").length).toBe(80);
|
|
527
|
+
// Side queries WERE called (different from pre-fix); they just returned [].
|
|
528
|
+
expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 16 });
|
|
529
|
+
expect(fakeVault.queryItems).toHaveBeenCalledWith({ limit: 8 });
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
it("Android small-model budget — events overflow cap, persons survive", async () => {
|
|
533
|
+
// Regression: Android local path (effMaxFacts=20, effMaxQueryLimit=50).
|
|
534
|
+
// Vault returns 50 events; pre-fix _gatherFacts shipped 50 events,
|
|
535
|
+
// buildPrompt sliced to first 20 events, persons = 0 → "几个联系人"
|
|
536
|
+
// hallucinated zero. Now events cap at 14 (20*0.7), persons get 3,
|
|
537
|
+
// items get 3 → contact rows reach the LLM.
|
|
538
|
+
const fakeVault = {
|
|
539
|
+
queryEvents: () => Array.from({ length: 50 }, (_, i) => ({
|
|
540
|
+
id: "e" + i, type: "event", subtype: "message",
|
|
541
|
+
occurredAt: Date.now(), actor: "self",
|
|
542
|
+
ingestedAt: Date.now(),
|
|
543
|
+
source: { adapter: "wechat", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
544
|
+
})),
|
|
545
|
+
queryPersons: ({ limit }) => Array.from({ length: limit }, (_, i) => ({
|
|
546
|
+
id: "p" + i, type: "person", subtype: "contact",
|
|
547
|
+
names: ["联系人" + i], ingestedAt: Date.now(),
|
|
548
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
549
|
+
})),
|
|
550
|
+
queryItems: ({ limit }) => Array.from({ length: limit }, (_, i) => ({
|
|
551
|
+
id: "i" + i, type: "item", subtype: "other", name: "App" + i,
|
|
552
|
+
ingestedAt: Date.now(),
|
|
553
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
554
|
+
})),
|
|
555
|
+
getEvent: () => null,
|
|
556
|
+
audit: () => {},
|
|
557
|
+
};
|
|
558
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
559
|
+
const engine = new AnalysisEngine({
|
|
560
|
+
vault: fakeVault, llm,
|
|
561
|
+
maxFacts: 20, maxQueryLimit: 50,
|
|
562
|
+
});
|
|
563
|
+
const r = await engine.ask("hi"); // generic question — default path
|
|
564
|
+
// 20 * 0.2 = 4 persons, 20 * 0.1 = 2 items, remainder 14 for events.
|
|
565
|
+
expect(r.facts.filter((f) => f.type === "event").length).toBe(14);
|
|
566
|
+
expect(r.facts.filter((f) => f.type === "person").length).toBe(4);
|
|
567
|
+
expect(r.facts.filter((f) => f.type === "item").length).toBe(2);
|
|
568
|
+
});
|
|
569
|
+
});
|
|
570
|
+
|
|
571
|
+
// ─── entityFocus routing — persons / items table priority ────────────────
|
|
572
|
+
//
|
|
573
|
+
// 2026-05-27 fix: when the question is explicitly about contacts ("我有
|
|
574
|
+
// 哪些联系人", "妈手机号"), _gatherFacts must NOT compete persons against
|
|
575
|
+
// the events pool. Pre-fix Android small-model budgets (20 facts / 50 row
|
|
576
|
+
// cap) had events drown out the contact slice → user saw "没数据" even
|
|
577
|
+
// when the vault held hundreds of contacts.
|
|
578
|
+
|
|
579
|
+
describe("AnalysisEngine._gatherFacts entityFocus routing", () => {
|
|
580
|
+
it("entityFocus=persons skips events broad scan, prioritizes persons", async () => {
|
|
581
|
+
const fakeVault = {
|
|
582
|
+
queryEvents: vi.fn(() => Array.from({ length: 50 }, (_, i) => ({
|
|
583
|
+
id: "e" + i, type: "event", subtype: "message",
|
|
584
|
+
occurredAt: Date.now(), actor: "self",
|
|
585
|
+
ingestedAt: Date.now(),
|
|
586
|
+
source: { adapter: "wechat", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
587
|
+
}))),
|
|
588
|
+
queryPersons: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
|
|
589
|
+
id: "p" + i, type: "person", subtype: "contact",
|
|
590
|
+
names: ["联系人" + i], ingestedAt: Date.now(),
|
|
591
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
592
|
+
}))),
|
|
593
|
+
queryItems: vi.fn(() => []),
|
|
594
|
+
getEvent: () => null,
|
|
595
|
+
audit: () => {},
|
|
596
|
+
};
|
|
597
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
598
|
+
const engine = new AnalysisEngine({
|
|
599
|
+
vault: fakeVault, llm,
|
|
600
|
+
maxFacts: 20, maxQueryLimit: 50,
|
|
601
|
+
});
|
|
602
|
+
const r = await engine.ask("我有哪些联系人");
|
|
603
|
+
// 95% goes to persons (19), 5% headroom = 1 event slot.
|
|
604
|
+
expect(r.facts.filter((f) => f.type === "person").length).toBe(19);
|
|
605
|
+
expect(r.facts.filter((f) => f.type === "event").length).toBeLessThanOrEqual(1);
|
|
606
|
+
expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 19 });
|
|
607
|
+
});
|
|
608
|
+
|
|
609
|
+
it("entityFocus=persons falls through to default path when persons table is empty", async () => {
|
|
610
|
+
const fakeVault = {
|
|
611
|
+
queryEvents: () => Array.from({ length: 5 }, (_, i) => ({
|
|
612
|
+
id: "e" + i, type: "event", subtype: "message",
|
|
613
|
+
occurredAt: Date.now(), actor: "self",
|
|
614
|
+
ingestedAt: Date.now(),
|
|
615
|
+
source: { adapter: "wechat", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
616
|
+
})),
|
|
617
|
+
queryPersons: () => [], // empty contacts table
|
|
618
|
+
queryItems: () => [],
|
|
619
|
+
getEvent: () => null,
|
|
620
|
+
audit: () => {},
|
|
621
|
+
};
|
|
622
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
623
|
+
const engine = new AnalysisEngine({ vault: fakeVault, llm });
|
|
624
|
+
const r = await engine.ask("我有哪些联系人");
|
|
625
|
+
// Fell through to default → 5 events surfaced (no cap since 5 < 80).
|
|
626
|
+
expect(r.facts.filter((f) => f.type === "event").length).toBe(5);
|
|
627
|
+
});
|
|
628
|
+
|
|
629
|
+
it("entityFocus=persons with name candidate → searchPersons short-circuit", async () => {
|
|
630
|
+
// 2026-05-27 S3 治本 — "妈手机号" must hit searchPersons LIKE search
|
|
631
|
+
// even when vault holds 500 contacts. Pre-S3 _gatherFacts dumped the
|
|
632
|
+
// first N by ingest_at; the target person rarely landed in the slice.
|
|
633
|
+
const fakeVault = {
|
|
634
|
+
queryEvents: () => [],
|
|
635
|
+
queryPersons: vi.fn(() => [
|
|
636
|
+
{ id: "p-other", type: "person", subtype: "contact", names: ["张三"], ingestedAt: 0,
|
|
637
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" } },
|
|
638
|
+
]),
|
|
639
|
+
searchPersons: vi.fn(({ q, limit }) => {
|
|
640
|
+
if (q === "妈") {
|
|
641
|
+
return [{
|
|
642
|
+
id: "p-mom", type: "person", subtype: "contact", names: ["妈妈"],
|
|
643
|
+
identifiers: { phone: ["13800138000"] }, ingestedAt: 0,
|
|
644
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
|
|
645
|
+
}];
|
|
646
|
+
}
|
|
647
|
+
return [];
|
|
648
|
+
}),
|
|
649
|
+
queryItems: () => [],
|
|
650
|
+
getEvent: () => null,
|
|
651
|
+
audit: () => {},
|
|
652
|
+
};
|
|
653
|
+
const llm = new MockLLMClient({ reply: "妈手机号是 13800138000" });
|
|
654
|
+
const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
|
|
655
|
+
const r = await engine.ask("妈手机号是多少");
|
|
656
|
+
expect(fakeVault.searchPersons).toHaveBeenCalledWith({ q: "妈", limit: 19 });
|
|
657
|
+
expect(fakeVault.queryPersons).not.toHaveBeenCalled(); // search hit → skip fallback
|
|
658
|
+
expect(r.facts.filter((f) => f.type === "person").length).toBe(1);
|
|
659
|
+
expect(r.facts.find((f) => f.id === "p-mom")).toBeDefined();
|
|
660
|
+
});
|
|
661
|
+
|
|
662
|
+
it("entityFocus=persons with name candidate but 0 search hits → falls back to queryPersons", async () => {
|
|
663
|
+
const fakeVault = {
|
|
664
|
+
queryEvents: () => [],
|
|
665
|
+
queryPersons: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
|
|
666
|
+
id: "p" + i, type: "person", subtype: "contact", names: ["P" + i], ingestedAt: 0,
|
|
667
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
|
|
668
|
+
}))),
|
|
669
|
+
searchPersons: vi.fn(() => []), // 0 hits
|
|
670
|
+
queryItems: () => [],
|
|
671
|
+
getEvent: () => null,
|
|
672
|
+
audit: () => {},
|
|
673
|
+
};
|
|
674
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
675
|
+
const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
|
|
676
|
+
await engine.ask("张三的电话号码");
|
|
677
|
+
expect(fakeVault.searchPersons).toHaveBeenCalled();
|
|
678
|
+
expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 19 });
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
it("entityFocus=persons without name candidate (pure list) skips searchPersons", async () => {
|
|
682
|
+
const fakeVault = {
|
|
683
|
+
queryEvents: () => [],
|
|
684
|
+
queryPersons: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
|
|
685
|
+
id: "p" + i, type: "person", subtype: "contact", names: ["P" + i], ingestedAt: 0,
|
|
686
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
|
|
687
|
+
}))),
|
|
688
|
+
searchPersons: vi.fn(() => []),
|
|
689
|
+
queryItems: () => [],
|
|
690
|
+
getEvent: () => null,
|
|
691
|
+
audit: () => {},
|
|
692
|
+
};
|
|
693
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
694
|
+
const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
|
|
695
|
+
await engine.ask("我有哪些联系人");
|
|
696
|
+
// Pure list — no name in question → skip searchPersons, go straight to queryPersons.
|
|
697
|
+
expect(fakeVault.searchPersons).not.toHaveBeenCalled();
|
|
698
|
+
expect(fakeVault.queryPersons).toHaveBeenCalledWith({ limit: 19 });
|
|
699
|
+
});
|
|
700
|
+
|
|
701
|
+
it("entityFocus=persons tolerates vault without searchPersons (legacy)", async () => {
|
|
702
|
+
const fakeVault = {
|
|
703
|
+
queryEvents: () => [],
|
|
704
|
+
queryPersons: vi.fn(({ limit }) => Array.from({ length: Math.min(limit, 3) }, (_, i) => ({
|
|
705
|
+
id: "p" + i, type: "person", subtype: "contact", names: ["P" + i], ingestedAt: 0,
|
|
706
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: 0, capturedBy: "api" },
|
|
707
|
+
}))),
|
|
708
|
+
// No searchPersons method
|
|
709
|
+
queryItems: () => [],
|
|
710
|
+
getEvent: () => null,
|
|
711
|
+
audit: () => {},
|
|
712
|
+
};
|
|
713
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
714
|
+
const engine = new AnalysisEngine({ vault: fakeVault, llm, maxFacts: 20 });
|
|
715
|
+
const r = await engine.ask("妈手机号");
|
|
716
|
+
expect(fakeVault.queryPersons).toHaveBeenCalled();
|
|
717
|
+
expect(r.facts.filter((f) => f.type === "person").length).toBe(3);
|
|
718
|
+
});
|
|
719
|
+
|
|
720
|
+
it("entityFocus=items prioritizes items table over events", async () => {
|
|
721
|
+
const fakeVault = {
|
|
722
|
+
queryEvents: () => Array.from({ length: 100 }, (_, i) => ({
|
|
723
|
+
id: "e" + i, type: "event", subtype: "browse",
|
|
724
|
+
occurredAt: Date.now(), actor: "self",
|
|
725
|
+
ingestedAt: Date.now(),
|
|
726
|
+
source: { adapter: "browser-history", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
727
|
+
})),
|
|
728
|
+
queryPersons: () => [],
|
|
729
|
+
queryItems: vi.fn(({ limit }) => Array.from({ length: limit }, (_, i) => ({
|
|
730
|
+
id: "i" + i, type: "item", subtype: "other", name: "App" + i,
|
|
731
|
+
ingestedAt: Date.now(),
|
|
732
|
+
source: { adapter: "system-data-android", adapterVersion: "0", capturedAt: Date.now(), capturedBy: "api" },
|
|
733
|
+
}))),
|
|
734
|
+
getEvent: () => null,
|
|
735
|
+
audit: () => {},
|
|
736
|
+
};
|
|
737
|
+
const llm = new MockLLMClient({ reply: "" });
|
|
738
|
+
const engine = new AnalysisEngine({
|
|
739
|
+
vault: fakeVault, llm,
|
|
740
|
+
maxFacts: 20, maxQueryLimit: 50,
|
|
741
|
+
});
|
|
742
|
+
const r = await engine.ask("我装了哪些 app");
|
|
743
|
+
expect(r.facts.filter((f) => f.type === "item").length).toBe(19);
|
|
744
|
+
expect(fakeVault.queryItems).toHaveBeenCalledWith({ limit: 19 });
|
|
522
745
|
});
|
|
523
746
|
});
|
|
524
747
|
|
|
@@ -682,10 +905,12 @@ describe("AnalysisEngine per-call budget overrides", () => {
|
|
|
682
905
|
const llm = { isLocal: true, chat: () => { throw new Error("nope"); } };
|
|
683
906
|
const engine = new AnalysisEngine({ vault: fakeVault, llm });
|
|
684
907
|
const r = await engine.retrieveContext("hi", { maxFacts: 10, maxQueryLimit: 50 });
|
|
685
|
-
//
|
|
686
|
-
//
|
|
908
|
+
// 2026-05-27 fix: _gatherFacts now respects effMaxFacts upstream
|
|
909
|
+
// (events would have overflowed → reservation branch; persons/items
|
|
910
|
+
// returned [] → refill back to events.slice(0,10)). buildPrompt sees
|
|
911
|
+
// exactly 10 facts, nothing to truncate.
|
|
687
912
|
expect(r.factCount).toBe(10);
|
|
688
|
-
expect(r.truncated).toBe(
|
|
913
|
+
expect(r.truncated).toBe(0);
|
|
689
914
|
});
|
|
690
915
|
|
|
691
916
|
it("retrieveContext() honors options.maxFacts and options.maxQueryLimit", async () => {
|
|
@@ -38,20 +38,40 @@ describe("summarizeFact", () => {
|
|
|
38
38
|
expect(s).not.toHaveProperty("extra");
|
|
39
39
|
});
|
|
40
40
|
|
|
41
|
-
it("packs person names + relation; omits source
|
|
41
|
+
it("packs person names + relation + identifiers + notes; omits source", () => {
|
|
42
|
+
// 2026-05-27 — identifiers (phone/wechatId/email) MUST reach the LLM,
|
|
43
|
+
// otherwise "妈手机号是多少" can never be answered even when vault has
|
|
44
|
+
// the phone. notes too — they're user-written context. source/ingestedAt
|
|
45
|
+
// are framing metadata, not user data, so still stripped.
|
|
42
46
|
const p = summarizePerson({
|
|
43
47
|
id: "p1",
|
|
44
48
|
type: "person",
|
|
45
49
|
subtype: "contact",
|
|
46
50
|
names: ["妈妈", "陈某某"],
|
|
47
51
|
relation: "母亲",
|
|
48
|
-
identifiers: { phone: ["13800001111"] },
|
|
52
|
+
identifiers: { phone: ["13800001111"], wechatId: "wxid_abc" },
|
|
53
|
+
notes: "best mom ever",
|
|
49
54
|
ingestedAt: 1,
|
|
50
55
|
source: { adapter: "x", adapterVersion: "0.1.0", capturedAt: 1, capturedBy: "api" },
|
|
51
56
|
});
|
|
52
57
|
expect(p.names).toEqual(["妈妈", "陈某某"]);
|
|
53
58
|
expect(p.relation).toBe("母亲");
|
|
59
|
+
expect(p.identifiers).toEqual({ phone: ["13800001111"], wechatId: "wxid_abc" });
|
|
60
|
+
expect(p.notes).toBe("best mom ever");
|
|
61
|
+
expect(p).not.toHaveProperty("source");
|
|
62
|
+
expect(p).not.toHaveProperty("ingestedAt");
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("omits identifiers / notes fields when absent on the person row", () => {
|
|
66
|
+
const p = summarizePerson({
|
|
67
|
+
id: "p2",
|
|
68
|
+
type: "person",
|
|
69
|
+
subtype: "contact",
|
|
70
|
+
names: ["路人甲"],
|
|
71
|
+
});
|
|
54
72
|
expect(p).not.toHaveProperty("identifiers");
|
|
73
|
+
expect(p).not.toHaveProperty("notes");
|
|
74
|
+
expect(p).not.toHaveProperty("relation");
|
|
55
75
|
});
|
|
56
76
|
|
|
57
77
|
it("returns null for non-object / unknown types just yields minimal shape", () => {
|
|
@@ -7,7 +7,9 @@ const {
|
|
|
7
7
|
parseTimeWindow,
|
|
8
8
|
parseFilters,
|
|
9
9
|
parseIntent,
|
|
10
|
+
parseEntityFocus,
|
|
10
11
|
extractEntityTerm,
|
|
12
|
+
extractPersonNameCandidate,
|
|
11
13
|
} = require("../lib/query-parser");
|
|
12
14
|
|
|
13
15
|
// Pin "now" to 2026-05-19 12:00:00 UTC for deterministic windows
|
|
@@ -126,6 +128,38 @@ describe("parseIntent", () => {
|
|
|
126
128
|
});
|
|
127
129
|
});
|
|
128
130
|
|
|
131
|
+
describe("parseEntityFocus", () => {
|
|
132
|
+
it("returns 'persons' for 联系人 / 通讯录 phrasing", () => {
|
|
133
|
+
expect(parseEntityFocus("我有哪些联系人")).toBe("persons");
|
|
134
|
+
expect(parseEntityFocus("通讯录里有多少人")).toBe("persons");
|
|
135
|
+
expect(parseEntityFocus("好友列表谁是张三")).toBe("persons");
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it("returns 'persons' for phone-number phrasing", () => {
|
|
139
|
+
expect(parseEntityFocus("妈手机号是多少")).toBe("persons");
|
|
140
|
+
expect(parseEntityFocus("王医生的电话号码")).toBe("persons");
|
|
141
|
+
expect(parseEntityFocus("show me my contacts")).toBe("persons");
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it("returns 'items' for installed-app phrasing", () => {
|
|
145
|
+
expect(parseEntityFocus("我装了哪些 app")).toBe("items");
|
|
146
|
+
expect(parseEntityFocus("有哪些游戏")).toBe("items");
|
|
147
|
+
expect(parseEntityFocus("installed apps")).toBe("items");
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it("returns null when no focus signal", () => {
|
|
151
|
+
expect(parseEntityFocus("上个月在淘宝花了多少")).toBeNull();
|
|
152
|
+
expect(parseEntityFocus("最近的订单")).toBeNull();
|
|
153
|
+
expect(parseEntityFocus("hello")).toBeNull();
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it("returns null for non-string / empty input", () => {
|
|
157
|
+
expect(parseEntityFocus("")).toBeNull();
|
|
158
|
+
expect(parseEntityFocus(null)).toBeNull();
|
|
159
|
+
expect(parseEntityFocus(undefined)).toBeNull();
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
129
163
|
describe("parseQuery (integration)", () => {
|
|
130
164
|
it("full parse for spending question", () => {
|
|
131
165
|
const r = parseQuery("上个月在淘宝总共花了多少钱?", { now: NOW });
|
|
@@ -133,6 +167,13 @@ describe("parseQuery (integration)", () => {
|
|
|
133
167
|
expect(r.filters.subtype).toBe("payment");
|
|
134
168
|
expect(r.filters.adapter).toBe("taobao");
|
|
135
169
|
expect(r.intent).toBe("sum-amount");
|
|
170
|
+
expect(r.entityFocus).toBeNull();
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it("contact question carries entityFocus=persons", () => {
|
|
174
|
+
const r = parseQuery("我有哪些联系人", { now: NOW });
|
|
175
|
+
expect(r.entityFocus).toBe("persons");
|
|
176
|
+
expect(r.intent).toBe("list");
|
|
136
177
|
});
|
|
137
178
|
|
|
138
179
|
it("full parse for footprint question", () => {
|
|
@@ -214,3 +255,48 @@ describe("extractEntityTerm", () => {
|
|
|
214
255
|
expect(r).toBeNull();
|
|
215
256
|
});
|
|
216
257
|
});
|
|
258
|
+
|
|
259
|
+
// ─── extractPersonNameCandidate — persons-branch name search ─────────────
|
|
260
|
+
//
|
|
261
|
+
// 2026-05-27 — Powers AnalysisEngine entityFocus=persons name-search
|
|
262
|
+
// short-circuit. Differs from extractEntityTerm in two ways: strips
|
|
263
|
+
// person-FOCUS framing words first (联系人/手机号/etc.) and allows
|
|
264
|
+
// single-char Chinese names from a relation whitelist (妈/爸/姐/...).
|
|
265
|
+
|
|
266
|
+
describe("extractPersonNameCandidate", () => {
|
|
267
|
+
it("extracts multi-char name when present", () => {
|
|
268
|
+
expect(extractPersonNameCandidate("张三的电话号码")).toBe("张三");
|
|
269
|
+
expect(extractPersonNameCandidate("王医生手机号是多少")).toBe("王医生");
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
it("falls back to single-char relation word ('妈', '爸', '姐')", () => {
|
|
273
|
+
expect(extractPersonNameCandidate("妈手机号是多少")).toBe("妈");
|
|
274
|
+
expect(extractPersonNameCandidate("爸的电话")).toBe("爸");
|
|
275
|
+
expect(extractPersonNameCandidate("姐姐的号码")).toBe("姐姐");
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
it("multi-char wins over single-char fallback", () => {
|
|
279
|
+
// "王医生" (3 char) preferred over leaked single "医" / "生".
|
|
280
|
+
expect(extractPersonNameCandidate("王医生的手机号")).toBe("王医生");
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
it("returns null when no name candidate (pure framing)", () => {
|
|
284
|
+
expect(extractPersonNameCandidate("我有哪些联系人")).toBeNull();
|
|
285
|
+
expect(extractPersonNameCandidate("通讯录里有多少人")).toBeNull();
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
it("ignores single-char Chinese outside the relation whitelist", () => {
|
|
289
|
+
// "说" / "看" are not relation chars — should NOT slip through as names.
|
|
290
|
+
expect(extractPersonNameCandidate("说手机号")).toBeNull();
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
it("returns null for non-string / empty input", () => {
|
|
294
|
+
expect(extractPersonNameCandidate("")).toBeNull();
|
|
295
|
+
expect(extractPersonNameCandidate(null)).toBeNull();
|
|
296
|
+
expect(extractPersonNameCandidate(undefined)).toBeNull();
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
it("handles ASCII names ≥2 chars", () => {
|
|
300
|
+
expect(extractPersonNameCandidate("Alice 的电话号码")).toBe("Alice");
|
|
301
|
+
});
|
|
302
|
+
});
|
package/__tests__/vault.test.js
CHANGED
|
@@ -425,6 +425,94 @@ describe("LocalVault.queryEvents + countEvents", () => {
|
|
|
425
425
|
});
|
|
426
426
|
});
|
|
427
427
|
|
|
428
|
+
// ─── searchPersons (LIKE name search) ────────────────────────────────────
|
|
429
|
+
//
|
|
430
|
+
// 2026-05-27 — AnalysisEngine entityFocus="persons" routes to searchPersons
|
|
431
|
+
// when the question carries a name candidate ("妈手机号", "张三的电话").
|
|
432
|
+
// LIKE on names / identifiers / notes / relation, no FTS5 migration.
|
|
433
|
+
|
|
434
|
+
describe("LocalVault.searchPersons", () => {
|
|
435
|
+
it("matches against names column (JSON-serialized array)", () => {
|
|
436
|
+
freshVault();
|
|
437
|
+
vault.putPerson(personOk({ names: ["妈妈", "陈某某"] }));
|
|
438
|
+
vault.putPerson(personOk({ names: ["张三"] }));
|
|
439
|
+
vault.putPerson(personOk({ names: ["王医生"] }));
|
|
440
|
+
|
|
441
|
+
const r = vault.searchPersons({ q: "妈" });
|
|
442
|
+
expect(r.length).toBe(1);
|
|
443
|
+
expect(r[0].names).toContain("妈妈");
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
it("matches against identifiers (phone numbers)", () => {
|
|
447
|
+
freshVault();
|
|
448
|
+
vault.putPerson(personOk({
|
|
449
|
+
names: ["张三"],
|
|
450
|
+
identifiers: { phone: ["13800001111"] },
|
|
451
|
+
}));
|
|
452
|
+
vault.putPerson(personOk({
|
|
453
|
+
names: ["李四"],
|
|
454
|
+
identifiers: { phone: ["13900002222"] },
|
|
455
|
+
}));
|
|
456
|
+
|
|
457
|
+
const r = vault.searchPersons({ q: "13800" });
|
|
458
|
+
expect(r.length).toBe(1);
|
|
459
|
+
expect(r[0].names).toContain("张三");
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
it("matches against notes + relation", () => {
|
|
463
|
+
freshVault();
|
|
464
|
+
vault.putPerson(personOk({
|
|
465
|
+
names: ["陈某某"], relation: "母亲", notes: "best mom ever",
|
|
466
|
+
}));
|
|
467
|
+
vault.putPerson(personOk({ names: ["路人甲"], relation: "stranger" }));
|
|
468
|
+
|
|
469
|
+
expect(vault.searchPersons({ q: "母亲" }).length).toBe(1);
|
|
470
|
+
expect(vault.searchPersons({ q: "best mom" }).length).toBe(1);
|
|
471
|
+
});
|
|
472
|
+
|
|
473
|
+
it("empty q delegates to queryPersons (ingest-ordered)", () => {
|
|
474
|
+
freshVault();
|
|
475
|
+
vault.putPerson(personOk({ names: ["A"] }));
|
|
476
|
+
vault.putPerson(personOk({ names: ["B"] }));
|
|
477
|
+
vault.putPerson(personOk({ names: ["C"] }));
|
|
478
|
+
|
|
479
|
+
const r = vault.searchPersons({ q: "", limit: 2 });
|
|
480
|
+
expect(r.length).toBe(2);
|
|
481
|
+
});
|
|
482
|
+
|
|
483
|
+
it("LIKE meta-characters in user input are escaped (no wildcard injection)", () => {
|
|
484
|
+
freshVault();
|
|
485
|
+
vault.putPerson(personOk({ names: ["100%棉"] }));
|
|
486
|
+
vault.putPerson(personOk({ names: ["AAA"] }));
|
|
487
|
+
|
|
488
|
+
// "100%" should match only the literal "100%棉" row, not everything.
|
|
489
|
+
const r = vault.searchPersons({ q: "100%" });
|
|
490
|
+
expect(r.length).toBe(1);
|
|
491
|
+
expect(r[0].names).toContain("100%棉");
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
it("respects subtype + adapter filters", () => {
|
|
495
|
+
freshVault();
|
|
496
|
+
vault.putPerson(personOk({
|
|
497
|
+
subtype: "contact", names: ["张三"],
|
|
498
|
+
source: source({ adapter: "wechat" }),
|
|
499
|
+
}));
|
|
500
|
+
vault.putPerson(personOk({
|
|
501
|
+
subtype: "merchant", names: ["张三"],
|
|
502
|
+
source: source({ adapter: "system-data-android" }),
|
|
503
|
+
}));
|
|
504
|
+
|
|
505
|
+
expect(vault.searchPersons({ q: "张三", subtype: "merchant" }).length).toBe(1);
|
|
506
|
+
expect(vault.searchPersons({ q: "张三", adapter: "wechat" }).length).toBe(1);
|
|
507
|
+
});
|
|
508
|
+
|
|
509
|
+
it("returns empty array when no match", () => {
|
|
510
|
+
freshVault();
|
|
511
|
+
vault.putPerson(personOk({ names: ["张三"] }));
|
|
512
|
+
expect(vault.searchPersons({ q: "完全不存在的名字" })).toEqual([]);
|
|
513
|
+
});
|
|
514
|
+
});
|
|
515
|
+
|
|
428
516
|
// ─── sync watermarks ──────────────────────────────────────────────────────
|
|
429
517
|
|
|
430
518
|
describe("LocalVault sync watermarks", () => {
|
|
@@ -172,7 +172,18 @@ function createAIChatHealthChecker({
|
|
|
172
172
|
deps.logger.error("[aichat-health] interval run failed", err && err.message),
|
|
173
173
|
);
|
|
174
174
|
}, intervalMs);
|
|
175
|
+
// Don't keep the event loop alive on the periodic check alone. Without
|
|
176
|
+
// unref a one-shot `cc hub list-adapters --json` from in-APK Android
|
|
177
|
+
// sits idle in epoll_wait until Kotlin LocalCcRunner.waitFor 240s
|
|
178
|
+
// timeout → false "写入本地数据库失败". Real-device repro 2026-05-27
|
|
179
|
+
// Xiaomi 24115RA8EC (PID 24828 lingered with vault.db RW handles).
|
|
180
|
+
if (intervalHandle && typeof intervalHandle.unref === "function") {
|
|
181
|
+
intervalHandle.unref();
|
|
182
|
+
}
|
|
175
183
|
}, firstRunDelayMs);
|
|
184
|
+
if (firstRunHandle && typeof firstRunHandle.unref === "function") {
|
|
185
|
+
firstRunHandle.unref();
|
|
186
|
+
}
|
|
176
187
|
return true;
|
|
177
188
|
}
|
|
178
189
|
|
package/lib/analysis.js
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
|
|
22
22
|
"use strict";
|
|
23
23
|
|
|
24
|
-
const { parseQuery, extractEntityTerm } = require("./query-parser");
|
|
24
|
+
const { parseQuery, extractEntityTerm, extractPersonNameCandidate } = require("./query-parser");
|
|
25
25
|
const {
|
|
26
26
|
buildPrompt,
|
|
27
27
|
parseCitations,
|
|
@@ -61,6 +61,27 @@ const SUM_AMOUNT_SUBTYPES = ["order", "payment", "transfer", "income"];
|
|
|
61
61
|
// 12) doesn't starve any single subtype.
|
|
62
62
|
const SUM_AMOUNT_MIN_PER_SUBTYPE = 20;
|
|
63
63
|
|
|
64
|
+
// entityFocus="persons" routing — explicit contact queries ("我有哪些联系人",
|
|
65
|
+
// "妈手机号"). When the user names the target table the engine MUST NOT
|
|
66
|
+
// compete persons against the events pool: small-model Android budgets
|
|
67
|
+
// (20 facts / 50 row cap) get drained by a few hundred Bilibili
|
|
68
|
+
// notifications and the contact slice ends up empty. parseEntityFocus
|
|
69
|
+
// surfaces the signal; we honor it by going persons-first.
|
|
70
|
+
//
|
|
71
|
+
// Keep a TINY events headroom (5%) so questions like "我最近跟妈打过电话吗"
|
|
72
|
+
// still surface 通话 event rows alongside the contact entry.
|
|
73
|
+
const PERSONS_FOCUS_EVENT_HEADROOM_RATIO = 0.05;
|
|
74
|
+
|
|
75
|
+
// Default-path budget split when no entityFocus signal. Pre-fix events
|
|
76
|
+
// got the entire effMaxFacts pool first and persons/items shared only the
|
|
77
|
+
// remainder; on a busy vault that meant 0 contacts in the prompt. Cap
|
|
78
|
+
// events at 70%, reserve 20% for persons and 10% for items so a generic
|
|
79
|
+
// "what's going on" question still sees the full data shape.
|
|
80
|
+
const DEFAULT_EVENT_BUDGET_RATIO = 0.7;
|
|
81
|
+
const DEFAULT_PERSON_BUDGET_RATIO = 0.2;
|
|
82
|
+
// Items take whatever remains; intent=count/list questions about contacts
|
|
83
|
+
// already short-circuit via entityFocus before reaching this branch.
|
|
84
|
+
|
|
64
85
|
class AnalysisEngine {
|
|
65
86
|
/**
|
|
66
87
|
* @param {object} opts
|
|
@@ -426,6 +447,88 @@ class AnalysisEngine {
|
|
|
426
447
|
// 0 results → fall through to default broader path below.
|
|
427
448
|
}
|
|
428
449
|
|
|
450
|
+
// entityFocus=persons routing — "我有哪些联系人", "妈手机号", "通讯录里
|
|
451
|
+
// 有多少人". Skip the events broad scan and put the entire fact budget
|
|
452
|
+
// on the persons table (with a 5% events headroom for adjacent rows
|
|
453
|
+
// like 通话/短信). Adapter / time window are NOT applied to persons:
|
|
454
|
+
// contacts are current-state snapshots, not time-stamped events.
|
|
455
|
+
//
|
|
456
|
+
// 0 hits → fall through to the default path. A user might say "联系人"
|
|
457
|
+
// colloquially when they mean "people I've messaged" — the default
|
|
458
|
+
// events+persons mix is the right safety net.
|
|
459
|
+
if (parsed.entityFocus === "persons") {
|
|
460
|
+
const personLimit = effMaxFacts > 1 ? effMaxFacts - 1 : effMaxFacts;
|
|
461
|
+
let persons = [];
|
|
462
|
+
// Name-search short-circuit — when the question carries a probable
|
|
463
|
+
// person-name candidate ("妈手机号", "张三的电话"), try LIKE-search
|
|
464
|
+
// against names / identifiers / notes / relation. Hits go straight
|
|
465
|
+
// to FACTS so the LLM sees the target contact even when the vault
|
|
466
|
+
// holds hundreds of others. Falls back to ingest-ordered queryPersons
|
|
467
|
+
// when 0 hits or no name candidate.
|
|
468
|
+
const nameCandidate = extractPersonNameCandidate(parsed.raw);
|
|
469
|
+
if (nameCandidate && typeof this.vault.searchPersons === "function") {
|
|
470
|
+
try {
|
|
471
|
+
persons = this.vault.searchPersons({ q: nameCandidate, limit: personLimit });
|
|
472
|
+
} catch (_e) { /* tolerate — try ingest-ordered fallback */ }
|
|
473
|
+
}
|
|
474
|
+
if (persons.length === 0) {
|
|
475
|
+
try {
|
|
476
|
+
persons = this.vault.queryPersons({ limit: personLimit });
|
|
477
|
+
} catch (_e) {
|
|
478
|
+
// legacy vault — fall through
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
if (persons.length > 0) {
|
|
482
|
+
const eventHeadroom = Math.max(
|
|
483
|
+
0,
|
|
484
|
+
Math.floor(effMaxFacts * PERSONS_FOCUS_EVENT_HEADROOM_RATIO)
|
|
485
|
+
);
|
|
486
|
+
let events = [];
|
|
487
|
+
if (eventHeadroom > 0) {
|
|
488
|
+
const eq = { limit: eventHeadroom };
|
|
489
|
+
if (parsed.filters && parsed.filters.adapter) eq.adapter = parsed.filters.adapter;
|
|
490
|
+
if (parsed.timeWindow) {
|
|
491
|
+
if (Number.isFinite(parsed.timeWindow.since)) eq.since = parsed.timeWindow.since;
|
|
492
|
+
if (Number.isFinite(parsed.timeWindow.until)) eq.until = parsed.timeWindow.until;
|
|
493
|
+
}
|
|
494
|
+
try {
|
|
495
|
+
events = this.vault.queryEvents(eq);
|
|
496
|
+
} catch (_e) { /* tolerate */ }
|
|
497
|
+
}
|
|
498
|
+
// persons-first ordering so the LLM reads the contact rows before
|
|
499
|
+
// the (sparse) event tail.
|
|
500
|
+
const combined = [...persons, ...events].slice(0, effMaxFacts);
|
|
501
|
+
return combined;
|
|
502
|
+
}
|
|
503
|
+
// 0 persons → fall through.
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// entityFocus=items routing — "我装了哪些 app", "有哪些游戏". Mirror
|
|
507
|
+
// persons branch: skip events, query items table directly, keep a
|
|
508
|
+
// tiny events headroom for adjacent rows.
|
|
509
|
+
if (parsed.entityFocus === "items") {
|
|
510
|
+
const itemLimit = effMaxFacts > 1 ? effMaxFacts - 1 : effMaxFacts;
|
|
511
|
+
let items = [];
|
|
512
|
+
try {
|
|
513
|
+
items = this.vault.queryItems({ limit: itemLimit });
|
|
514
|
+
} catch (_e) { /* legacy */ }
|
|
515
|
+
if (items.length > 0) {
|
|
516
|
+
const eventHeadroom = Math.max(
|
|
517
|
+
0,
|
|
518
|
+
Math.floor(effMaxFacts * PERSONS_FOCUS_EVENT_HEADROOM_RATIO)
|
|
519
|
+
);
|
|
520
|
+
let events = [];
|
|
521
|
+
if (eventHeadroom > 0) {
|
|
522
|
+
const eq = { limit: eventHeadroom };
|
|
523
|
+
if (parsed.filters && parsed.filters.adapter) eq.adapter = parsed.filters.adapter;
|
|
524
|
+
try {
|
|
525
|
+
events = this.vault.queryEvents(eq);
|
|
526
|
+
} catch (_e) { /* tolerate */ }
|
|
527
|
+
}
|
|
528
|
+
return [...items, ...events].slice(0, effMaxFacts);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
429
532
|
// intent=sum-amount routing — "总共花了多少" / "在淘宝花了多少钱"
|
|
430
533
|
// only needs events from amount-bearing subtypes (order/payment/
|
|
431
534
|
// transfer/income). Pulling messages / visits / browses wastes
|
|
@@ -551,22 +654,40 @@ class AnalysisEngine {
|
|
|
551
654
|
// - installed apps land in `items`, not `events`
|
|
552
655
|
// - places (visited locations) live in `places`
|
|
553
656
|
// Without these the LLM gets 0 facts for "我有几个联系人" style questions
|
|
554
|
-
// and hallucinates a count.
|
|
555
|
-
//
|
|
556
|
-
//
|
|
557
|
-
//
|
|
657
|
+
// and hallucinates a count.
|
|
658
|
+
//
|
|
659
|
+
// Sizing — two regimes:
|
|
660
|
+
// (a) Events fit (events.length < effMaxFacts): legacy behavior —
|
|
661
|
+
// events first, split the remainder evenly between persons + items.
|
|
662
|
+
// (b) Events would monopolize (events.length >= effMaxFacts): reserve
|
|
663
|
+
// DEFAULT_PERSON_BUDGET_RATIO (20%) + 10% for persons + items so a
|
|
664
|
+
// busy event timeline doesn't shove every contact out of the prompt.
|
|
665
|
+
// If persons + items tables BOTH return 0 rows, refill the reserve
|
|
666
|
+
// with events — no point starving the LLM of facts when the side
|
|
667
|
+
// tables are empty (small vaults / pre-Path-C ingest state).
|
|
558
668
|
//
|
|
559
|
-
//
|
|
560
|
-
//
|
|
561
|
-
//
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
669
|
+
// Time window + adapter filters don't apply to persons/items: they're
|
|
670
|
+
// current-state snapshots, not time-stamped events. A user asking
|
|
671
|
+
// "上个月联系人变化" is rare enough to leave for a future intent.
|
|
672
|
+
let cappedEvents = events;
|
|
673
|
+
let personBudget;
|
|
674
|
+
let itemBudget;
|
|
675
|
+
if (events.length >= effMaxFacts) {
|
|
676
|
+
const personReserve = Math.max(1, Math.floor(effMaxFacts * DEFAULT_PERSON_BUDGET_RATIO));
|
|
677
|
+
const itemReserve = Math.max(
|
|
678
|
+
1,
|
|
679
|
+
Math.floor(effMaxFacts * (1 - DEFAULT_EVENT_BUDGET_RATIO - DEFAULT_PERSON_BUDGET_RATIO))
|
|
680
|
+
);
|
|
681
|
+
const eventCap = Math.max(1, effMaxFacts - personReserve - itemReserve);
|
|
682
|
+
cappedEvents = events.slice(0, eventCap);
|
|
683
|
+
personBudget = personReserve;
|
|
684
|
+
itemBudget = itemReserve;
|
|
685
|
+
} else {
|
|
686
|
+
const remaining = effMaxFacts - events.length;
|
|
687
|
+
const sideBudget = Math.floor(remaining / 2);
|
|
688
|
+
personBudget = sideBudget > 0 ? sideBudget : 0;
|
|
689
|
+
itemBudget = remaining - personBudget;
|
|
690
|
+
}
|
|
570
691
|
|
|
571
692
|
let persons = [];
|
|
572
693
|
if (personBudget > 0) {
|
|
@@ -585,7 +706,20 @@ class AnalysisEngine {
|
|
|
585
706
|
}
|
|
586
707
|
}
|
|
587
708
|
|
|
588
|
-
|
|
709
|
+
// Refill backfill — when events overflowed (reservation branch) but
|
|
710
|
+
// persons + items both returned 0 rows, give the reserved slots back
|
|
711
|
+
// to events. Small vaults / pre-Path-C state would otherwise see fewer
|
|
712
|
+
// facts than the budget allowed.
|
|
713
|
+
if (
|
|
714
|
+
events.length >= effMaxFacts &&
|
|
715
|
+
persons.length === 0 &&
|
|
716
|
+
items.length === 0 &&
|
|
717
|
+
cappedEvents.length < effMaxFacts
|
|
718
|
+
) {
|
|
719
|
+
cappedEvents = events.slice(0, effMaxFacts);
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
return [...cappedEvents, ...persons, ...items];
|
|
589
723
|
}
|
|
590
724
|
|
|
591
725
|
/**
|
|
@@ -630,4 +764,7 @@ module.exports = {
|
|
|
630
764
|
LIST_INTENT_FTS_LIMIT,
|
|
631
765
|
SUM_AMOUNT_SUBTYPES,
|
|
632
766
|
SUM_AMOUNT_MIN_PER_SUBTYPE,
|
|
767
|
+
PERSONS_FOCUS_EVENT_HEADROOM_RATIO,
|
|
768
|
+
DEFAULT_EVENT_BUDGET_RATIO,
|
|
769
|
+
DEFAULT_PERSON_BUDGET_RATIO,
|
|
633
770
|
};
|
package/lib/prompt-builder.js
CHANGED
|
@@ -67,12 +67,20 @@ function summarizeEvent(e) {
|
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
function summarizePerson(p) {
|
|
70
|
+
// 2026-05-27 — include identifiers (phone / wechatId / email / etc.) +
|
|
71
|
+
// notes in the LLM-facing summary. Without this, asking "妈手机号是多少"
|
|
72
|
+
// ships only names+relation to the LLM and it can't possibly answer.
|
|
73
|
+
// Person rows are dense — keep all identifying fields. The LLM sees this
|
|
74
|
+
// verbatim under FACTS so user-visible privacy is the same as the user
|
|
75
|
+
// querying their own vault (which is the whole point of PDH).
|
|
70
76
|
return {
|
|
71
77
|
id: p.id,
|
|
72
78
|
type: "person",
|
|
73
79
|
subtype: p.subtype,
|
|
74
80
|
names: p.names,
|
|
75
81
|
...(p.relation ? { relation: p.relation } : {}),
|
|
82
|
+
...(p.identifiers ? { identifiers: p.identifiers } : {}),
|
|
83
|
+
...(p.notes ? { notes: p.notes } : {}),
|
|
76
84
|
};
|
|
77
85
|
}
|
|
78
86
|
|
package/lib/query-parser.js
CHANGED
|
@@ -219,6 +219,42 @@ function parseIntent(text) {
|
|
|
219
219
|
return "list";
|
|
220
220
|
}
|
|
221
221
|
|
|
222
|
+
// ─── Entity-focus detection (persons / items routing) ────────────────────
|
|
223
|
+
//
|
|
224
|
+
// 2026-05-27 — Bug: user asked "我有哪些联系人" / "我妈手机号" several times;
|
|
225
|
+
// vault held real contacts but the LLM kept replying "没数据" because the
|
|
226
|
+
// default _gatherFacts pulled 200 row-cap of events first and the persons
|
|
227
|
+
// slice got squeezed out of the small-model 20-fact budget. parseIntent
|
|
228
|
+
// already catches "几个 X" as count, but that doesn't tell the engine WHICH
|
|
229
|
+
// table the user means. parseEntityFocus is the missing signal: when the
|
|
230
|
+
// question is explicitly about contacts/apps, the engine prioritizes that
|
|
231
|
+
// table instead of competing with events.
|
|
232
|
+
//
|
|
233
|
+
// Returns null when no focus signal — engine falls back to the existing
|
|
234
|
+
// events-majority + persons/items remainder behavior.
|
|
235
|
+
//
|
|
236
|
+
// Memory: pdh_analysis_engine_intent_routing.md.
|
|
237
|
+
|
|
238
|
+
const PERSON_FOCUS_PATTERNS = [
|
|
239
|
+
/(联系人|通讯录|电话簿|通信录|好友列表|朋友列表)/,
|
|
240
|
+
/(手机号|电话号|号码是|的电话|的手机)/,
|
|
241
|
+
/(谁是|是谁|是什么人)/,
|
|
242
|
+
/\b(contact|contacts|phonebook|address\s*book|phone\s*number)\b/i,
|
|
243
|
+
];
|
|
244
|
+
|
|
245
|
+
const ITEM_FOCUS_PATTERNS = [
|
|
246
|
+
/(装了|安装了|装过|下了什么|下载了什么|有哪些(app|应用|软件|游戏))/i,
|
|
247
|
+
/(我的(app|应用|软件)|哪些(app|应用|软件|游戏))/i,
|
|
248
|
+
/\b(installed\s+apps?|my\s+apps?|installed\s+packages?)\b/i,
|
|
249
|
+
];
|
|
250
|
+
|
|
251
|
+
function parseEntityFocus(text) {
|
|
252
|
+
if (typeof text !== "string" || text.length === 0) return null;
|
|
253
|
+
if (PERSON_FOCUS_PATTERNS.some((re) => re.test(text))) return "persons";
|
|
254
|
+
if (ITEM_FOCUS_PATTERNS.some((re) => re.test(text))) return "items";
|
|
255
|
+
return null;
|
|
256
|
+
}
|
|
257
|
+
|
|
222
258
|
// ─── Entity-name extraction (FTS5 fulltext routing) ────────────────────
|
|
223
259
|
//
|
|
224
260
|
// Pull a probable entity-name candidate out of the raw question so
|
|
@@ -291,6 +327,56 @@ function extractEntityTerm(text) {
|
|
|
291
327
|
return candidates[0];
|
|
292
328
|
}
|
|
293
329
|
|
|
330
|
+
// ─── Person-name extraction (entityFocus=persons routing) ────────────────
|
|
331
|
+
//
|
|
332
|
+
// Specialized extractor for the persons branch in AnalysisEngine. Differs
|
|
333
|
+
// from extractEntityTerm in two ways:
|
|
334
|
+
//
|
|
335
|
+
// 1. Strips person-FOCUS framing words first (联系人/手机号/电话/etc.) —
|
|
336
|
+
// they're question scaffolding, not the target name. extractEntityTerm
|
|
337
|
+
// left "妈手机号" intact because it doesn't know that phrase is framing.
|
|
338
|
+
//
|
|
339
|
+
// 2. Allows single-character names from a relation-word whitelist
|
|
340
|
+
// (妈/爸/姐/弟/...) — extractEntityTerm filtered every 1-char Chinese to
|
|
341
|
+
// suppress verb false positives, but that also dropped "妈" / "爸" which
|
|
342
|
+
// are the dominant contact-name shorthands on a personal phonebook.
|
|
343
|
+
//
|
|
344
|
+
// Multi-char candidates always win over single-char fallback so "张三的
|
|
345
|
+
// 手机号" returns "张三" not "三".
|
|
346
|
+
|
|
347
|
+
const PERSON_FRAMING_STOP_PATTERNS = [
|
|
348
|
+
/(联系人|通讯录|电话簿|通信录|好友列表|朋友列表)/g,
|
|
349
|
+
/(手机号|电话号|号码是|的电话|的手机|号码|电话)/g,
|
|
350
|
+
/(谁是|是谁|是什么人|是哪位)/g,
|
|
351
|
+
/\b(contact|contacts|phonebook|address\s*book|phone\s*number)\b/gi,
|
|
352
|
+
];
|
|
353
|
+
|
|
354
|
+
// Whitelisted single-character Chinese relation words. Single-char tokens
|
|
355
|
+
// outside this set are dropped to keep verb / particle false-positives from
|
|
356
|
+
// leaking through. Extend cautiously — every new char widens the LIKE
|
|
357
|
+
// surface area and could match unrelated rows.
|
|
358
|
+
const PERSON_RELATION_SINGLE_CHARS_RE =
|
|
359
|
+
/^[妈爸姐妹哥弟爹娘爷奶姥舅姑叔伯婶嫂嫁公婆]$/;
|
|
360
|
+
|
|
361
|
+
function extractPersonNameCandidate(text) {
|
|
362
|
+
if (typeof text !== "string" || text.length === 0) return null;
|
|
363
|
+
let s = text;
|
|
364
|
+
for (const re of PERSON_FRAMING_STOP_PATTERNS) {
|
|
365
|
+
s = s.replace(re, " ");
|
|
366
|
+
}
|
|
367
|
+
for (const re of ENTITY_STOP_PATTERNS) {
|
|
368
|
+
s = s.replace(re, " ");
|
|
369
|
+
}
|
|
370
|
+
const all = s.split(/\s+/).filter((t) => t.length >= 1 && t.length <= 10);
|
|
371
|
+
if (all.length === 0) return null;
|
|
372
|
+
const multi = all
|
|
373
|
+
.filter((t) => t.length >= 2)
|
|
374
|
+
.sort((a, b) => b.length - a.length);
|
|
375
|
+
if (multi.length > 0) return multi[0];
|
|
376
|
+
const single = all.find((t) => t.length === 1 && PERSON_RELATION_SINGLE_CHARS_RE.test(t));
|
|
377
|
+
return single || null;
|
|
378
|
+
}
|
|
379
|
+
|
|
294
380
|
// ─── Full parser ─────────────────────────────────────────────────────────
|
|
295
381
|
|
|
296
382
|
/**
|
|
@@ -314,6 +400,7 @@ function parseQuery(question, opts = {}) {
|
|
|
314
400
|
timeWindow: parseTimeWindow(raw, now),
|
|
315
401
|
filters: parseFilters(raw),
|
|
316
402
|
intent: parseIntent(raw),
|
|
403
|
+
entityFocus: parseEntityFocus(raw),
|
|
317
404
|
};
|
|
318
405
|
}
|
|
319
406
|
|
|
@@ -322,9 +409,15 @@ module.exports = {
|
|
|
322
409
|
parseTimeWindow,
|
|
323
410
|
parseFilters,
|
|
324
411
|
parseIntent,
|
|
412
|
+
parseEntityFocus,
|
|
325
413
|
extractEntityTerm,
|
|
414
|
+
extractPersonNameCandidate,
|
|
326
415
|
// exposed for tests
|
|
327
416
|
SUBTYPE_KEYWORDS,
|
|
328
417
|
ADAPTER_KEYWORDS,
|
|
418
|
+
PERSON_FOCUS_PATTERNS,
|
|
419
|
+
ITEM_FOCUS_PATTERNS,
|
|
329
420
|
ENTITY_STOP_PATTERNS,
|
|
421
|
+
PERSON_FRAMING_STOP_PATTERNS,
|
|
422
|
+
PERSON_RELATION_SINGLE_CHARS_RE,
|
|
330
423
|
};
|
package/lib/vault.js
CHANGED
|
@@ -865,6 +865,70 @@ class LocalVault {
|
|
|
865
865
|
.map((row) => this._rowToPerson(row));
|
|
866
866
|
}
|
|
867
867
|
|
|
868
|
+
/**
|
|
869
|
+
* searchPersons — LIKE-based name/identifier/notes search.
|
|
870
|
+
*
|
|
871
|
+
* 2026-05-27 — AnalysisEngine entityFocus="persons" path uses this when the
|
|
872
|
+
* question carries a probable person-name candidate ("妈手机号", "张三的电话").
|
|
873
|
+
* Pre-fix the engine dumped the first N contacts by ingest_at and let the
|
|
874
|
+
* LLM scan — but on small-model (Qwen 0.5B/1.5B, 20-fact budget) and large
|
|
875
|
+
* contact tables (100+), the target person rarely landed in the slice.
|
|
876
|
+
* Searching by LIKE %term% against the JSON-serialized `names` column +
|
|
877
|
+
* `identifiers` (phone numbers) + `notes` + `relation` gives the LLM the
|
|
878
|
+
* matching contact directly, eliminating that miss.
|
|
879
|
+
*
|
|
880
|
+
* No FTS5 schema migration: contact tables are small (typically <2000
|
|
881
|
+
* rows on Android), full LIKE scan stays sub-millisecond. Sticking with
|
|
882
|
+
* LIKE also avoids partial-index drift trap #25.
|
|
883
|
+
*
|
|
884
|
+
* @param {object} q
|
|
885
|
+
* @param {string} q.q term to match. Falls back to queryPersons when empty.
|
|
886
|
+
* @param {string} [q.subtype]
|
|
887
|
+
* @param {string} [q.adapter]
|
|
888
|
+
* @param {number} [q.limit=100]
|
|
889
|
+
* @param {number} [q.offset=0]
|
|
890
|
+
*/
|
|
891
|
+
searchPersons(q = {}) {
|
|
892
|
+
const term = typeof q.q === "string" ? q.q.trim() : "";
|
|
893
|
+
if (term.length === 0) {
|
|
894
|
+
return this.queryPersons(q);
|
|
895
|
+
}
|
|
896
|
+
const where = [];
|
|
897
|
+
const params = {};
|
|
898
|
+
// LIKE-escape % and _ in the user input so a name with literal % won't
|
|
899
|
+
// wildcard. SQLite LIKE ESCAPE clause handles this.
|
|
900
|
+
const escaped = term.replace(/([\\%_])/g, "\\$1");
|
|
901
|
+
params.qPat = "%" + escaped + "%";
|
|
902
|
+
where.push(
|
|
903
|
+
"(" +
|
|
904
|
+
"names LIKE @qPat ESCAPE '\\' OR " +
|
|
905
|
+
"identifiers LIKE @qPat ESCAPE '\\' OR " +
|
|
906
|
+
"notes LIKE @qPat ESCAPE '\\' OR " +
|
|
907
|
+
"relation LIKE @qPat ESCAPE '\\'" +
|
|
908
|
+
")"
|
|
909
|
+
);
|
|
910
|
+
if (q.subtype) {
|
|
911
|
+
where.push("subtype = @subtype");
|
|
912
|
+
params.subtype = q.subtype;
|
|
913
|
+
}
|
|
914
|
+
if (q.adapter) {
|
|
915
|
+
where.push("source_adapter = @adapter");
|
|
916
|
+
params.adapter = q.adapter;
|
|
917
|
+
}
|
|
918
|
+
const limit = Number.isInteger(q.limit) && q.limit > 0 ? Math.min(q.limit, 10000) : 100;
|
|
919
|
+
const offset = Number.isInteger(q.offset) && q.offset >= 0 ? q.offset : 0;
|
|
920
|
+
params.limit = limit;
|
|
921
|
+
params.offset = offset;
|
|
922
|
+
const sql =
|
|
923
|
+
"SELECT * FROM persons WHERE " + where.join(" AND ") +
|
|
924
|
+
" ORDER BY (confidence IS NULL) ASC, confidence DESC, ingested_at DESC" +
|
|
925
|
+
" LIMIT @limit OFFSET @offset";
|
|
926
|
+
return this._requireOpen()
|
|
927
|
+
.prepare(sql)
|
|
928
|
+
.all(params)
|
|
929
|
+
.map((row) => this._rowToPerson(row));
|
|
930
|
+
}
|
|
931
|
+
|
|
868
932
|
/**
|
|
869
933
|
* queryItems — list item entities (installed apps, purchases, media...).
|
|
870
934
|
* Pairs with queryPersons for AnalysisEngine fact gathering.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.9",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|