@chainlesschain/personal-data-hub 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +432 -0
  2. package/__tests__/adapters/social-kuaishou-adb-collector.test.js +276 -0
  3. package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +141 -0
  4. package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +178 -0
  5. package/__tests__/adapters/social-toutiao-adb-api-client.test.js +537 -0
  6. package/__tests__/adapters/social-toutiao-adb-collector.test.js +285 -0
  7. package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +163 -0
  8. package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +196 -0
  9. package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +351 -0
  10. package/__tests__/analysis.test.js +239 -14
  11. package/__tests__/query-parser.test.js +86 -0
  12. package/__tests__/vault.test.js +88 -0
  13. package/lib/adapters/ai-chat-history/health-checker.js +11 -0
  14. package/lib/adapters/social-kuaishou-adb/api-client.js +397 -0
  15. package/lib/adapters/social-kuaishou-adb/collector.js +196 -0
  16. package/lib/adapters/social-kuaishou-adb/cookies-extension.js +261 -0
  17. package/lib/adapters/social-kuaishou-adb/index.js +53 -0
  18. package/lib/adapters/social-kuaishou-adb/snapshot-builder.js +145 -0
  19. package/lib/adapters/social-toutiao-adb/api-client.js +377 -0
  20. package/lib/adapters/social-toutiao-adb/collector.js +200 -0
  21. package/lib/adapters/social-toutiao-adb/cookies-extension.js +266 -0
  22. package/lib/adapters/social-toutiao-adb/index.js +52 -0
  23. package/lib/adapters/social-toutiao-adb/snapshot-builder.js +148 -0
  24. package/lib/adapters/social-xiaohongshu-adb/api-client.js +36 -5
  25. package/lib/adapters/social-xiaohongshu-adb/collector.js +102 -51
  26. package/lib/analysis.js +154 -17
  27. package/lib/query-parser.js +93 -0
  28. package/lib/vault.js +64 -0
  29. package/package.json +5 -1
@@ -36,7 +36,12 @@ async function collect(bridge, opts = {}) {
36
36
  );
37
37
  }
38
38
  const now = opts.now || Date.now;
39
- const client = opts.apiClient || new XhsApiClient({ now });
39
+ // Phase 6b: signProvider opt desktop wiring injects XhsSignBridge for
40
+ // ~100% X-S hit rate; cli wiring leaves undefined → client falls back
41
+ // to in-process best-effort md5 (~60% GET / <30% POST).
42
+ const signProvider = opts.signProvider || undefined;
43
+ const client =
44
+ opts.apiClient || new XhsApiClient({ now, signProvider });
40
45
  const limits = opts.limits || {};
41
46
 
42
47
  const cookieResult = await bridge.invoke("xhs.cookies");
@@ -54,67 +59,108 @@ async function collect(bridge, opts = {}) {
54
59
  }
55
60
  const { cookie, a1, diagnostic: cookieDiagnostic } = cookieResult;
56
61
 
57
- // fetchMe no X-S required
58
- const me = await client.fetchMe(cookie);
59
- if (!me) {
60
- // Cookie expired or web_session missing — write empty snapshot
61
- // (build requires userId, use sentinel "0" + emit 0 events).
62
+ // Phase 6b: warm up the sign bridge with the captured cookie BEFORE
63
+ // calling any X-S endpoint. warmUp is idempotent (no-op when already
64
+ // warm). NullSignProvider.warmUp doesn't exist (only on the abstract
65
+ // base + ElectronWebSignBridge), so we feature-detect.
66
+ if (signProvider && typeof signProvider.warmUp === "function") {
67
+ try {
68
+ await signProvider.warmUp(cookie);
69
+ } catch (e) {
70
+ // Bridge warm-up failed (timeout / xhs.com 403 / IPC error).
71
+ // Fall through — api-client will use in-process fallback. Surface
72
+ // the reason via lastErrorMessage so UI can hint "Electron bridge
73
+ // unavailable, command-line precision degraded".
74
+ client._setLastError(
75
+ -98,
76
+ `signProvider warm-up failed: ${e && e.message ? e.message : String(e)}`,
77
+ );
78
+ }
79
+ }
80
+
81
+ try {
82
+ // fetchMe — no X-S required
83
+ const me = await client.fetchMe(cookie);
84
+ if (!me) {
85
+ // Cookie expired or web_session missing — write empty snapshot
86
+ // (build requires userId, use sentinel "0" + emit 0 events).
87
+ const snapshot = buildSnapshot({
88
+ userId: "unknown-user",
89
+ nickname: opts.displayName,
90
+ snapshottedAt: now(),
91
+ });
92
+ const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
93
+ return {
94
+ snapshotPath,
95
+ userId: null,
96
+ nickname: null,
97
+ eventCounts: { note: 0, liked: 0, follow: 0, total: 0 },
98
+ lastErrorCode: client.lastErrorCode,
99
+ lastErrorMessage: client.lastErrorMessage,
100
+ cookieDiagnostic: cookieDiagnostic || null,
101
+ meFetchFailed: true,
102
+ signProviderUsed: signProvider
103
+ ? signProvider.constructor.name
104
+ : "none",
105
+ signProviderHits: client._bridgeHits,
106
+ signProviderFallbacks: client._fallbackHits,
107
+ };
108
+ }
109
+
110
+ // Parallel 3 endpoints — partial failure tolerated; bridge-signed
111
+ // requests should hit ~100% while fallback hits ~60% GET / <30% POST.
112
+ const [notes, liked, follows] = await Promise.all([
113
+ client.fetchNotes(cookie, a1, me.userId, {
114
+ limit: Number.isInteger(limits.note) ? limits.note : undefined,
115
+ }),
116
+ client.fetchLiked(cookie, a1, {
117
+ limit: Number.isInteger(limits.liked) ? limits.liked : undefined,
118
+ }),
119
+ client.fetchFollows(cookie, a1, me.userId, {
120
+ limit: Number.isInteger(limits.follow) ? limits.follow : undefined,
121
+ }),
122
+ ]);
123
+
62
124
  const snapshot = buildSnapshot({
63
- userId: "unknown-user",
64
- nickname: opts.displayName,
125
+ userId: me.userId,
126
+ nickname: opts.displayName || me.nickname,
127
+ notes,
128
+ liked,
129
+ follows,
65
130
  snapshottedAt: now(),
66
131
  });
67
132
  const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
133
+
68
134
  return {
69
135
  snapshotPath,
70
- userId: null,
71
- nickname: null,
72
- eventCounts: { note: 0, liked: 0, follow: 0, total: 0 },
136
+ userId: me.userId,
137
+ nickname: me.nickname,
138
+ eventCounts: {
139
+ note: notes.length,
140
+ liked: liked.length,
141
+ follow: follows.length,
142
+ total: snapshot.events.length,
143
+ },
73
144
  lastErrorCode: client.lastErrorCode,
74
145
  lastErrorMessage: client.lastErrorMessage,
75
146
  cookieDiagnostic: cookieDiagnostic || null,
76
- meFetchFailed: true,
147
+ meFetchFailed: false,
148
+ signProviderUsed: signProvider ? signProvider.constructor.name : "none",
149
+ signProviderHits: client._bridgeHits,
150
+ signProviderFallbacks: client._fallbackHits,
77
151
  };
152
+ } finally {
153
+ // Always release the WebContentsView heap (~30-50MB) — even on
154
+ // throw. shutdown is idempotent so collectAndSync's outer cleanup
155
+ // calling it again is safe.
156
+ if (signProvider && typeof signProvider.shutdown === "function") {
157
+ try {
158
+ await signProvider.shutdown();
159
+ } catch (_e) {
160
+ // Best-effort — shutdown errors don't block sync result.
161
+ }
162
+ }
78
163
  }
79
-
80
- // Parallel 3 endpoints — partial failure tolerated (~60% X-S hit rate)
81
- const [notes, liked, follows] = await Promise.all([
82
- client.fetchNotes(cookie, a1, me.userId, {
83
- limit: Number.isInteger(limits.note) ? limits.note : undefined,
84
- }),
85
- client.fetchLiked(cookie, a1, {
86
- limit: Number.isInteger(limits.liked) ? limits.liked : undefined,
87
- }),
88
- client.fetchFollows(cookie, a1, me.userId, {
89
- limit: Number.isInteger(limits.follow) ? limits.follow : undefined,
90
- }),
91
- ]);
92
-
93
- const snapshot = buildSnapshot({
94
- userId: me.userId,
95
- nickname: opts.displayName || me.nickname,
96
- notes,
97
- liked,
98
- follows,
99
- snapshottedAt: now(),
100
- });
101
- const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
102
-
103
- return {
104
- snapshotPath,
105
- userId: me.userId,
106
- nickname: me.nickname,
107
- eventCounts: {
108
- note: notes.length,
109
- liked: liked.length,
110
- follow: follows.length,
111
- total: snapshot.events.length,
112
- },
113
- lastErrorCode: client.lastErrorCode,
114
- lastErrorMessage: client.lastErrorMessage,
115
- cookieDiagnostic: cookieDiagnostic || null,
116
- meFetchFailed: false,
117
- };
118
164
  }
119
165
 
120
166
  async function collectAndSync(bridge, registry, opts = {}) {
@@ -147,6 +193,11 @@ async function collectAndSync(bridge, registry, opts = {}) {
147
193
  lastErrorMessage: collectResult.lastErrorMessage,
148
194
  cookieDiagnostic: collectResult.cookieDiagnostic,
149
195
  meFetchFailed: collectResult.meFetchFailed,
196
+ // Phase 6b diagnostic — UI can highlight when bridge upgraded
197
+ // X-S signing from ~60% best-effort to ~100% bridge.
198
+ signProviderUsed: collectResult.signProviderUsed,
199
+ signProviderHits: collectResult.signProviderHits,
200
+ signProviderFallbacks: collectResult.signProviderFallbacks,
150
201
  cleanupFailed,
151
202
  },
152
203
  };
package/lib/analysis.js CHANGED
@@ -21,7 +21,7 @@
21
21
 
22
22
  "use strict";
23
23
 
24
- const { parseQuery, extractEntityTerm } = require("./query-parser");
24
+ const { parseQuery, extractEntityTerm, extractPersonNameCandidate } = require("./query-parser");
25
25
  const {
26
26
  buildPrompt,
27
27
  parseCitations,
@@ -61,6 +61,27 @@ const SUM_AMOUNT_SUBTYPES = ["order", "payment", "transfer", "income"];
61
61
  // 12) doesn't starve any single subtype.
62
62
  const SUM_AMOUNT_MIN_PER_SUBTYPE = 20;
63
63
 
64
+ // entityFocus="persons" routing — explicit contact queries ("我有哪些联系人",
65
+ // "妈手机号"). When the user names the target table the engine MUST NOT
66
+ // compete persons against the events pool: small-model Android budgets
67
+ // (20 facts / 50 row cap) get drained by a few hundred Bilibili
68
+ // notifications and the contact slice ends up empty. parseEntityFocus
69
+ // surfaces the signal; we honor it by going persons-first.
70
+ //
71
+ // Keep a TINY events headroom (5%) so questions like "我最近跟妈打过电话吗"
72
+ // still surface 通话 event rows alongside the contact entry.
73
+ const PERSONS_FOCUS_EVENT_HEADROOM_RATIO = 0.05;
74
+
75
+ // Default-path budget split when no entityFocus signal. Pre-fix events
76
+ // got the entire effMaxFacts pool first and persons/items shared only the
77
+ // remainder; on a busy vault that meant 0 contacts in the prompt. Cap
78
+ // events at 70%, reserve 20% for persons and 10% for items so a generic
79
+ // "what's going on" question still sees the full data shape.
80
+ const DEFAULT_EVENT_BUDGET_RATIO = 0.7;
81
+ const DEFAULT_PERSON_BUDGET_RATIO = 0.2;
82
+ // Items take whatever remains; intent=count/list questions about contacts
83
+ // already short-circuit via entityFocus before reaching this branch.
84
+
64
85
  class AnalysisEngine {
65
86
  /**
66
87
  * @param {object} opts
@@ -426,6 +447,88 @@ class AnalysisEngine {
426
447
  // 0 results → fall through to default broader path below.
427
448
  }
428
449
 
450
+ // entityFocus=persons routing — "我有哪些联系人", "妈手机号", "通讯录里
451
+ // 有多少人". Skip the events broad scan and put the entire fact budget
452
+ // on the persons table (with a 5% events headroom for adjacent rows
453
+ // like 通话/短信). Adapter / time window are NOT applied to persons:
454
+ // contacts are current-state snapshots, not time-stamped events.
455
+ //
456
+ // 0 hits → fall through to the default path. A user might say "联系人"
457
+ // colloquially when they mean "people I've messaged" — the default
458
+ // events+persons mix is the right safety net.
459
+ if (parsed.entityFocus === "persons") {
460
+ const personLimit = effMaxFacts > 1 ? effMaxFacts - 1 : effMaxFacts;
461
+ let persons = [];
462
+ // Name-search short-circuit — when the question carries a probable
463
+ // person-name candidate ("妈手机号", "张三的电话"), try LIKE-search
464
+ // against names / identifiers / notes / relation. Hits go straight
465
+ // to FACTS so the LLM sees the target contact even when the vault
466
+ // holds hundreds of others. Falls back to ingest-ordered queryPersons
467
+ // when 0 hits or no name candidate.
468
+ const nameCandidate = extractPersonNameCandidate(parsed.raw);
469
+ if (nameCandidate && typeof this.vault.searchPersons === "function") {
470
+ try {
471
+ persons = this.vault.searchPersons({ q: nameCandidate, limit: personLimit });
472
+ } catch (_e) { /* tolerate — try ingest-ordered fallback */ }
473
+ }
474
+ if (persons.length === 0) {
475
+ try {
476
+ persons = this.vault.queryPersons({ limit: personLimit });
477
+ } catch (_e) {
478
+ // legacy vault — fall through
479
+ }
480
+ }
481
+ if (persons.length > 0) {
482
+ const eventHeadroom = Math.max(
483
+ 0,
484
+ Math.floor(effMaxFacts * PERSONS_FOCUS_EVENT_HEADROOM_RATIO)
485
+ );
486
+ let events = [];
487
+ if (eventHeadroom > 0) {
488
+ const eq = { limit: eventHeadroom };
489
+ if (parsed.filters && parsed.filters.adapter) eq.adapter = parsed.filters.adapter;
490
+ if (parsed.timeWindow) {
491
+ if (Number.isFinite(parsed.timeWindow.since)) eq.since = parsed.timeWindow.since;
492
+ if (Number.isFinite(parsed.timeWindow.until)) eq.until = parsed.timeWindow.until;
493
+ }
494
+ try {
495
+ events = this.vault.queryEvents(eq);
496
+ } catch (_e) { /* tolerate */ }
497
+ }
498
+ // persons-first ordering so the LLM reads the contact rows before
499
+ // the (sparse) event tail.
500
+ const combined = [...persons, ...events].slice(0, effMaxFacts);
501
+ return combined;
502
+ }
503
+ // 0 persons → fall through.
504
+ }
505
+
506
+ // entityFocus=items routing — "我装了哪些 app", "有哪些游戏". Mirror
507
+ // persons branch: skip events, query items table directly, keep a
508
+ // tiny events headroom for adjacent rows.
509
+ if (parsed.entityFocus === "items") {
510
+ const itemLimit = effMaxFacts > 1 ? effMaxFacts - 1 : effMaxFacts;
511
+ let items = [];
512
+ try {
513
+ items = this.vault.queryItems({ limit: itemLimit });
514
+ } catch (_e) { /* legacy */ }
515
+ if (items.length > 0) {
516
+ const eventHeadroom = Math.max(
517
+ 0,
518
+ Math.floor(effMaxFacts * PERSONS_FOCUS_EVENT_HEADROOM_RATIO)
519
+ );
520
+ let events = [];
521
+ if (eventHeadroom > 0) {
522
+ const eq = { limit: eventHeadroom };
523
+ if (parsed.filters && parsed.filters.adapter) eq.adapter = parsed.filters.adapter;
524
+ try {
525
+ events = this.vault.queryEvents(eq);
526
+ } catch (_e) { /* tolerate */ }
527
+ }
528
+ return [...items, ...events].slice(0, effMaxFacts);
529
+ }
530
+ }
531
+
429
532
  // intent=sum-amount routing — "总共花了多少" / "在淘宝花了多少钱"
430
533
  // only needs events from amount-bearing subtypes (order/payment/
431
534
  // transfer/income). Pulling messages / visits / browses wastes
@@ -551,22 +654,40 @@ class AnalysisEngine {
551
654
  // - installed apps land in `items`, not `events`
552
655
  // - places (visited locations) live in `places`
553
656
  // Without these the LLM gets 0 facts for "我有几个联系人" style questions
554
- // and hallucinates a count. We pull a bounded slice of each entity type
555
- // and append; prompt-builder.summarizeFact already handles `person` /
556
- // `place` / fallback `item` shapes, so this is additive with no schema
557
- // change to the LLM-facing prompt.
657
+ // and hallucinates a count.
658
+ //
659
+ // Sizing two regimes:
660
+ // (a) Events fit (events.length < effMaxFacts): legacy behavior —
661
+ // events first, split the remainder evenly between persons + items.
662
+ // (b) Events would monopolize (events.length >= effMaxFacts): reserve
663
+ // DEFAULT_PERSON_BUDGET_RATIO (20%) + 10% for persons + items so a
664
+ // busy event timeline doesn't shove every contact out of the prompt.
665
+ // If persons + items tables BOTH return 0 rows, refill the reserve
666
+ // with events — no point starving the LLM of facts when the side
667
+ // tables are empty (small vaults / pre-Path-C ingest state).
558
668
  //
559
- // Sizing: keep events as the majority (existing behavior is unchanged for
560
- // event-heavy queries like 消费 / 通话); split the remaining 1/2 budget
561
- // between persons + items. Time window + adapter filters don't apply to
562
- // these tables (persons aren't time-stamped events) — they're current-
563
- // state snapshots that should always be visible. Adapter filter is also
564
- // skipped because users asking "我有几个联系人" don't say "from
565
- // system-data-android".
566
- const remaining = Math.max(0, effMaxFacts - events.length);
567
- const sideBudget = Math.floor(remaining / 2);
568
- const personBudget = sideBudget > 0 ? sideBudget : 0;
569
- const itemBudget = remaining - personBudget;
669
+ // Time window + adapter filters don't apply to persons/items: they're
670
+ // current-state snapshots, not time-stamped events. A user asking
671
+ // "上个月联系人变化" is rare enough to leave for a future intent.
672
+ let cappedEvents = events;
673
+ let personBudget;
674
+ let itemBudget;
675
+ if (events.length >= effMaxFacts) {
676
+ const personReserve = Math.max(1, Math.floor(effMaxFacts * DEFAULT_PERSON_BUDGET_RATIO));
677
+ const itemReserve = Math.max(
678
+ 1,
679
+ Math.floor(effMaxFacts * (1 - DEFAULT_EVENT_BUDGET_RATIO - DEFAULT_PERSON_BUDGET_RATIO))
680
+ );
681
+ const eventCap = Math.max(1, effMaxFacts - personReserve - itemReserve);
682
+ cappedEvents = events.slice(0, eventCap);
683
+ personBudget = personReserve;
684
+ itemBudget = itemReserve;
685
+ } else {
686
+ const remaining = effMaxFacts - events.length;
687
+ const sideBudget = Math.floor(remaining / 2);
688
+ personBudget = sideBudget > 0 ? sideBudget : 0;
689
+ itemBudget = remaining - personBudget;
690
+ }
570
691
 
571
692
  let persons = [];
572
693
  if (personBudget > 0) {
@@ -585,7 +706,20 @@ class AnalysisEngine {
585
706
  }
586
707
  }
587
708
 
588
- return [...events, ...persons, ...items];
709
+ // Refill backfill — when events overflowed (reservation branch) but
710
+ // persons + items both returned 0 rows, give the reserved slots back
711
+ // to events. Small vaults / pre-Path-C state would otherwise see fewer
712
+ // facts than the budget allowed.
713
+ if (
714
+ events.length >= effMaxFacts &&
715
+ persons.length === 0 &&
716
+ items.length === 0 &&
717
+ cappedEvents.length < effMaxFacts
718
+ ) {
719
+ cappedEvents = events.slice(0, effMaxFacts);
720
+ }
721
+
722
+ return [...cappedEvents, ...persons, ...items];
589
723
  }
590
724
 
591
725
  /**
@@ -630,4 +764,7 @@ module.exports = {
630
764
  LIST_INTENT_FTS_LIMIT,
631
765
  SUM_AMOUNT_SUBTYPES,
632
766
  SUM_AMOUNT_MIN_PER_SUBTYPE,
767
+ PERSONS_FOCUS_EVENT_HEADROOM_RATIO,
768
+ DEFAULT_EVENT_BUDGET_RATIO,
769
+ DEFAULT_PERSON_BUDGET_RATIO,
633
770
  };
@@ -219,6 +219,42 @@ function parseIntent(text) {
219
219
  return "list";
220
220
  }
221
221
 
222
+ // ─── Entity-focus detection (persons / items routing) ────────────────────
223
+ //
224
+ // 2026-05-27 — Bug: user asked "我有哪些联系人" / "我妈手机号" several times;
225
+ // vault held real contacts but the LLM kept replying "没数据" because the
226
+ // default _gatherFacts pulled 200 row-cap of events first and the persons
227
+ // slice got squeezed out of the small-model 20-fact budget. parseIntent
228
+ // already catches "几个 X" as count, but that doesn't tell the engine WHICH
229
+ // table the user means. parseEntityFocus is the missing signal: when the
230
+ // question is explicitly about contacts/apps, the engine prioritizes that
231
+ // table instead of competing with events.
232
+ //
233
+ // Returns null when no focus signal — engine falls back to the existing
234
+ // events-majority + persons/items remainder behavior.
235
+ //
236
+ // Memory: pdh_analysis_engine_intent_routing.md.
237
+
238
+ const PERSON_FOCUS_PATTERNS = [
239
+ /(联系人|通讯录|电话簿|通信录|好友列表|朋友列表)/,
240
+ /(手机号|电话号|号码是|的电话|的手机)/,
241
+ /(谁是|是谁|是什么人)/,
242
+ /\b(contact|contacts|phonebook|address\s*book|phone\s*number)\b/i,
243
+ ];
244
+
245
+ const ITEM_FOCUS_PATTERNS = [
246
+ /(装了|安装了|装过|下了什么|下载了什么|有哪些(app|应用|软件|游戏))/i,
247
+ /(我的(app|应用|软件)|哪些(app|应用|软件|游戏))/i,
248
+ /\b(installed\s+apps?|my\s+apps?|installed\s+packages?)\b/i,
249
+ ];
250
+
251
+ function parseEntityFocus(text) {
252
+ if (typeof text !== "string" || text.length === 0) return null;
253
+ if (PERSON_FOCUS_PATTERNS.some((re) => re.test(text))) return "persons";
254
+ if (ITEM_FOCUS_PATTERNS.some((re) => re.test(text))) return "items";
255
+ return null;
256
+ }
257
+
222
258
  // ─── Entity-name extraction (FTS5 fulltext routing) ────────────────────
223
259
  //
224
260
  // Pull a probable entity-name candidate out of the raw question so
@@ -291,6 +327,56 @@ function extractEntityTerm(text) {
291
327
  return candidates[0];
292
328
  }
293
329
 
330
+ // ─── Person-name extraction (entityFocus=persons routing) ────────────────
331
+ //
332
+ // Specialized extractor for the persons branch in AnalysisEngine. Differs
333
+ // from extractEntityTerm in two ways:
334
+ //
335
+ // 1. Strips person-FOCUS framing words first (联系人/手机号/电话/etc.) —
336
+ // they're question scaffolding, not the target name. extractEntityTerm
337
+ // left "妈手机号" intact because it doesn't know that phrase is framing.
338
+ //
339
+ // 2. Allows single-character names from a relation-word whitelist
340
+ // (妈/爸/姐/弟/...) — extractEntityTerm filtered every 1-char Chinese to
341
+ // suppress verb false positives, but that also dropped "妈" / "爸" which
342
+ // are the dominant contact-name shorthands on a personal phonebook.
343
+ //
344
+ // Multi-char candidates always win over single-char fallback so "张三的
345
+ // 手机号" returns "张三" not "三".
346
+
347
+ const PERSON_FRAMING_STOP_PATTERNS = [
348
+ /(联系人|通讯录|电话簿|通信录|好友列表|朋友列表)/g,
349
+ /(手机号|电话号|号码是|的电话|的手机|号码|电话)/g,
350
+ /(谁是|是谁|是什么人|是哪位)/g,
351
+ /\b(contact|contacts|phonebook|address\s*book|phone\s*number)\b/gi,
352
+ ];
353
+
354
+ // Whitelisted single-character Chinese relation words. Single-char tokens
355
+ // outside this set are dropped to keep verb / particle false-positives from
356
+ // leaking through. Extend cautiously — every new char widens the LIKE
357
+ // surface area and could match unrelated rows.
358
+ const PERSON_RELATION_SINGLE_CHARS_RE =
359
+ /^[妈爸姐妹哥弟爹娘爷奶姥舅姑叔伯婶嫂嫁公婆]$/;
360
+
361
+ function extractPersonNameCandidate(text) {
362
+ if (typeof text !== "string" || text.length === 0) return null;
363
+ let s = text;
364
+ for (const re of PERSON_FRAMING_STOP_PATTERNS) {
365
+ s = s.replace(re, " ");
366
+ }
367
+ for (const re of ENTITY_STOP_PATTERNS) {
368
+ s = s.replace(re, " ");
369
+ }
370
+ const all = s.split(/\s+/).filter((t) => t.length >= 1 && t.length <= 10);
371
+ if (all.length === 0) return null;
372
+ const multi = all
373
+ .filter((t) => t.length >= 2)
374
+ .sort((a, b) => b.length - a.length);
375
+ if (multi.length > 0) return multi[0];
376
+ const single = all.find((t) => t.length === 1 && PERSON_RELATION_SINGLE_CHARS_RE.test(t));
377
+ return single || null;
378
+ }
379
+
294
380
  // ─── Full parser ─────────────────────────────────────────────────────────
295
381
 
296
382
  /**
@@ -314,6 +400,7 @@ function parseQuery(question, opts = {}) {
314
400
  timeWindow: parseTimeWindow(raw, now),
315
401
  filters: parseFilters(raw),
316
402
  intent: parseIntent(raw),
403
+ entityFocus: parseEntityFocus(raw),
317
404
  };
318
405
  }
319
406
 
@@ -322,9 +409,15 @@ module.exports = {
322
409
  parseTimeWindow,
323
410
  parseFilters,
324
411
  parseIntent,
412
+ parseEntityFocus,
325
413
  extractEntityTerm,
414
+ extractPersonNameCandidate,
326
415
  // exposed for tests
327
416
  SUBTYPE_KEYWORDS,
328
417
  ADAPTER_KEYWORDS,
418
+ PERSON_FOCUS_PATTERNS,
419
+ ITEM_FOCUS_PATTERNS,
329
420
  ENTITY_STOP_PATTERNS,
421
+ PERSON_FRAMING_STOP_PATTERNS,
422
+ PERSON_RELATION_SINGLE_CHARS_RE,
330
423
  };
package/lib/vault.js CHANGED
@@ -865,6 +865,70 @@ class LocalVault {
865
865
  .map((row) => this._rowToPerson(row));
866
866
  }
867
867
 
868
+ /**
869
+ * searchPersons — LIKE-based name/identifier/notes search.
870
+ *
871
+ * 2026-05-27 — AnalysisEngine entityFocus="persons" path uses this when the
872
+ * question carries a probable person-name candidate ("妈手机号", "张三的电话").
873
+ * Pre-fix the engine dumped the first N contacts by ingest_at and let the
874
+ * LLM scan — but on small-model (Qwen 0.5B/1.5B, 20-fact budget) and large
875
+ * contact tables (100+), the target person rarely landed in the slice.
876
+ * Searching by LIKE %term% against the JSON-serialized `names` column +
877
+ * `identifiers` (phone numbers) + `notes` + `relation` gives the LLM the
878
+ * matching contact directly, eliminating that miss.
879
+ *
880
+ * No FTS5 schema migration: contact tables are small (typically <2000
881
+ * rows on Android), full LIKE scan stays sub-millisecond. Sticking with
882
+ * LIKE also avoids partial-index drift trap #25.
883
+ *
884
+ * @param {object} q
885
+ * @param {string} q.q term to match. Falls back to queryPersons when empty.
886
+ * @param {string} [q.subtype]
887
+ * @param {string} [q.adapter]
888
+ * @param {number} [q.limit=100]
889
+ * @param {number} [q.offset=0]
890
+ */
891
+ searchPersons(q = {}) {
892
+ const term = typeof q.q === "string" ? q.q.trim() : "";
893
+ if (term.length === 0) {
894
+ return this.queryPersons(q);
895
+ }
896
+ const where = [];
897
+ const params = {};
898
+ // LIKE-escape % and _ in the user input so a name with literal % won't
899
+ // wildcard. SQLite LIKE ESCAPE clause handles this.
900
+ const escaped = term.replace(/([\\%_])/g, "\\$1");
901
+ params.qPat = "%" + escaped + "%";
902
+ where.push(
903
+ "(" +
904
+ "names LIKE @qPat ESCAPE '\\' OR " +
905
+ "identifiers LIKE @qPat ESCAPE '\\' OR " +
906
+ "notes LIKE @qPat ESCAPE '\\' OR " +
907
+ "relation LIKE @qPat ESCAPE '\\'" +
908
+ ")"
909
+ );
910
+ if (q.subtype) {
911
+ where.push("subtype = @subtype");
912
+ params.subtype = q.subtype;
913
+ }
914
+ if (q.adapter) {
915
+ where.push("source_adapter = @adapter");
916
+ params.adapter = q.adapter;
917
+ }
918
+ const limit = Number.isInteger(q.limit) && q.limit > 0 ? Math.min(q.limit, 10000) : 100;
919
+ const offset = Number.isInteger(q.offset) && q.offset >= 0 ? q.offset : 0;
920
+ params.limit = limit;
921
+ params.offset = offset;
922
+ const sql =
923
+ "SELECT * FROM persons WHERE " + where.join(" AND ") +
924
+ " ORDER BY (confidence IS NULL) ASC, confidence DESC, ingested_at DESC" +
925
+ " LIMIT @limit OFFSET @offset";
926
+ return this._requireOpen()
927
+ .prepare(sql)
928
+ .all(params)
929
+ .map((row) => this._rowToPerson(row));
930
+ }
931
+
868
932
  /**
869
933
  * queryItems — list item entities (installed apps, purchases, media...).
870
934
  * Pairs with queryPersons for AnalysisEngine fact gathering.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.3.6",
3
+ "version": "0.3.8",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",
@@ -61,6 +61,10 @@
61
61
  "./adapters/social-douyin-adb": "./lib/adapters/social-douyin-adb/index.js",
62
62
  "./adapters/social-xiaohongshu": "./lib/adapters/social-xiaohongshu/index.js",
63
63
  "./adapters/social-xiaohongshu-adb": "./lib/adapters/social-xiaohongshu-adb/index.js",
64
+ "./adapters/social-toutiao": "./lib/adapters/social-toutiao/index.js",
65
+ "./adapters/social-toutiao-adb": "./lib/adapters/social-toutiao-adb/index.js",
66
+ "./adapters/social-kuaishou": "./lib/adapters/social-kuaishou/index.js",
67
+ "./adapters/social-kuaishou-adb": "./lib/adapters/social-kuaishou-adb/index.js",
64
68
  "./adapters/messaging-qq": "./lib/adapters/messaging-qq/index.js",
65
69
  "./adapters/messaging-telegram": "./lib/adapters/messaging-telegram/index.js",
66
70
  "./adapters/messaging-whatsapp": "./lib/adapters/messaging-whatsapp/index.js",