@chainlesschain/personal-data-hub 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/__tests__/adapters/browser-history-chrome.test.js +377 -0
  2. package/__tests__/adapters/browser-history-edge.test.js +159 -0
  3. package/__tests__/adapters/git-activity.test.js +216 -0
  4. package/__tests__/adapters/local-files.test.js +264 -0
  5. package/__tests__/adapters/shell-history.test.js +180 -0
  6. package/__tests__/adapters/system-data-android.test.js +104 -3
  7. package/__tests__/adapters/vscode.test.js +299 -0
  8. package/__tests__/adapters/win-recent.test.js +192 -0
  9. package/__tests__/analysis.test.js +841 -2
  10. package/__tests__/categories.test.js +92 -0
  11. package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +146 -0
  12. package/__tests__/entity-resolver-vault.test.js +5 -2
  13. package/__tests__/integration/local-data-adapters-pipeline.test.js +373 -0
  14. package/__tests__/longtail-adapters.test.js +7 -2
  15. package/__tests__/query-parser.test.js +66 -0
  16. package/__tests__/registry.test.js +114 -0
  17. package/__tests__/sidecar-contacts-cross-validate.test.js +24 -1
  18. package/__tests__/sidecar-supervisor.test.js +9 -1
  19. package/__tests__/social-kuaishou-snapshot.test.js +55 -2
  20. package/__tests__/social-toutiao-snapshot.test.js +54 -2
  21. package/__tests__/vault-search-helpers.test.js +104 -0
  22. package/__tests__/vault-search.test.js +423 -0
  23. package/__tests__/vault.test.js +77 -3
  24. package/lib/adapters/browser-history-chrome/adapter.js +247 -0
  25. package/lib/adapters/browser-history-chrome/bookmarks-reader.js +79 -0
  26. package/lib/adapters/browser-history-chrome/chrome-db-reader.js +223 -0
  27. package/lib/adapters/browser-history-chrome/index.js +23 -0
  28. package/lib/adapters/browser-history-edge/adapter.js +34 -0
  29. package/lib/adapters/browser-history-edge/index.js +13 -0
  30. package/lib/adapters/git-activity/adapter.js +155 -0
  31. package/lib/adapters/git-activity/git-reader.js +125 -0
  32. package/lib/adapters/git-activity/index.js +17 -0
  33. package/lib/adapters/local-files/adapter.js +149 -0
  34. package/lib/adapters/local-files/file-walker.js +125 -0
  35. package/lib/adapters/local-files/index.js +18 -0
  36. package/lib/adapters/shell-history/adapter.js +137 -0
  37. package/lib/adapters/shell-history/index.js +17 -0
  38. package/lib/adapters/shell-history/shell-reader.js +100 -0
  39. package/lib/adapters/social-kuaishou/index.js +57 -1
  40. package/lib/adapters/social-toutiao/index.js +59 -1
  41. package/lib/adapters/system-data-android/adapter.js +220 -3
  42. package/lib/adapters/vscode/adapter.js +285 -0
  43. package/lib/adapters/vscode/index.js +18 -0
  44. package/lib/adapters/vscode/vscode-reader.js +191 -0
  45. package/lib/adapters/win-recent/adapter.js +150 -0
  46. package/lib/adapters/win-recent/index.js +16 -0
  47. package/lib/adapters/win-recent/win-recent-reader.js +72 -0
  48. package/lib/analysis.js +227 -9
  49. package/lib/categories.js +101 -0
  50. package/lib/index.js +61 -0
  51. package/lib/migrations.js +146 -0
  52. package/lib/query-parser.js +74 -0
  53. package/lib/registry.js +162 -0
  54. package/lib/vault.js +363 -2
  55. package/package.json +2 -1
  56. package/scripts/run-native-tests-sandbox.sh +53 -0
package/lib/analysis.js CHANGED
@@ -21,7 +21,7 @@
21
21
 
22
22
  "use strict";
23
23
 
24
- const { parseQuery } = require("./query-parser");
24
+ const { parseQuery, extractEntityTerm } = require("./query-parser");
25
25
  const {
26
26
  buildPrompt,
27
27
  parseCitations,
@@ -33,6 +33,34 @@ const { toError } = require("./adapter-spec");
33
33
  const DEFAULT_MAX_FACTS = 80;
34
34
  const DEFAULT_MAX_QUERY_LIMIT = 200;
35
35
 
36
+ // intent=latest hard cap when no time window is set. "最近的订单" / "最新消息"
37
+ // want the newest 1-3 rows, not 80 — freeing prompt budget lets the LLM
38
+ // actually read the row content instead of skimming. Memory:
39
+ // pdh_analysis_engine_intent_routing.md. When the user also gives a time
40
+ // window ("最近 30 天的消费") we treat it as list-with-window and fall
41
+ // through to the default broader path — see _gatherFacts.
42
+ const LATEST_INTENT_FACT_LIMIT = 3;
43
+
44
+ // intent=list FTS5 augmentation cap. When the question carries a probable
45
+ // entity-name ("提到王老板的消息", "苹果的订单") we run an extra
46
+ // vault.searchEvents(q=term) and append non-duplicate hits to FACTS. Cap
47
+ // at 10 so a popular term ("订单") can't drown out the adapter+time slice
48
+ // the user explicitly asked for. Stays additive (never replaces events).
49
+ const LIST_INTENT_FTS_LIMIT = 10;
50
+
51
+ // intent=sum-amount routing — the only event subtypes that carry an
52
+ // amount field worth summing. Order keeps "order" first because it's the
53
+ // most common shopping flow (taobao/jd/meituan/pdd all map to it). When
54
+ // the user asks "总共花了多少" we only want events from this set; pulling
55
+ // `message` / `visit` / `browse` would waste prompt budget on rows the
56
+ // LLM cannot use to compute a sum.
57
+ const SUM_AMOUNT_SUBTYPES = ["order", "payment", "transfer", "income"];
58
+ // Per-subtype query cap divider — split the effMaxQueryLimit across the
59
+ // 4 subtypes so a popular `payment` slice can't crowd out `transfer`.
60
+ // Floor at 20 so per-call small-model budget (effMaxQueryLimit=50 →
61
+ // 12) doesn't starve any single subtype.
62
+ const SUM_AMOUNT_MIN_PER_SUBTYPE = 20;
63
+
36
64
  class AnalysisEngine {
37
65
  /**
38
66
  * @param {object} opts
@@ -72,6 +100,8 @@ class AnalysisEngine {
72
100
  * @param {boolean} [options.acceptNonLocal=false] required true for cloud LLMs
73
101
  * @param {number} [options.now]
74
102
  * @param {boolean} [options.skipAudit=false]
103
+ * @param {number} [options.maxFacts] per-call override of constructor `maxFacts` (e.g. on-device 1.5B model wants ~20)
104
+ * @param {number} [options.maxQueryLimit] per-call override of constructor `maxQueryLimit`
75
105
  * @returns {Promise<AskResult>}
76
106
  *
77
107
  * @typedef {object} AskResult
@@ -99,8 +129,34 @@ class AnalysisEngine {
99
129
  const startedAt = Date.now();
100
130
  const parsed = parseQuery(question, { now: options.now });
101
131
 
132
+ // Per-call budget overrides — on-device small models (Qwen2.5-1.5B etc.)
133
+ // need a much tighter prompt than desktop 7B+. Fall back to constructor
134
+ // defaults if not passed. Non-positive overrides are ignored.
135
+ const effMaxFacts =
136
+ Number.isInteger(options.maxFacts) && options.maxFacts > 0
137
+ ? options.maxFacts
138
+ : this.maxFacts;
139
+ const effMaxQueryLimit =
140
+ Number.isInteger(options.maxQueryLimit) && options.maxQueryLimit > 0
141
+ ? options.maxQueryLimit
142
+ : this.maxQueryLimit;
143
+
102
144
  // Gather facts from the vault.
103
- const facts = this._gatherFacts(parsed);
145
+ const facts = this._gatherFacts(parsed, { maxFacts: effMaxFacts, maxQueryLimit: effMaxQueryLimit });
146
+
147
+ // Telemetry: prove the budget is reaching the engine. Goes to stderr so
148
+ // the Android side's stderrBuilder + logcat can surface it.
149
+ // Grep: `adb logcat | grep PDH-ASK`.
150
+ try {
151
+ process.stderr.write(
152
+ `[PDH-ASK] ask effMaxFacts=${effMaxFacts} effMaxQueryLimit=${effMaxQueryLimit} ` +
153
+ `gathered=${facts.length} (events=${facts.filter((f) => f.type === "event").length} ` +
154
+ `persons=${facts.filter((f) => f.type === "person").length} ` +
155
+ `items=${facts.filter((f) => f.type === "item").length}) ` +
156
+ `adapter=${(parsed.filters && parsed.filters.adapter) || "*"} ` +
157
+ `intent=${parsed.intent || "*"}\n`
158
+ );
159
+ } catch (_e) { /* stderr write failures are non-fatal */ }
104
160
 
105
161
  // Optional RAG augmentation.
106
162
  let ragContext = [];
@@ -135,10 +191,20 @@ class AnalysisEngine {
135
191
  systemPrompt: this.systemPrompt,
136
192
  intent: parsed.intent,
137
193
  timeWindow: parsed.timeWindow,
138
- maxFacts: this.maxFacts,
194
+ maxFacts: effMaxFacts,
139
195
  vaultTotals: this._gatherVaultTotals(),
140
196
  });
141
197
 
198
+ // Telemetry: post-cap prompt size + truncation count. If `truncated` > 0
199
+ // the LLM is seeing fewer facts than _gatherFacts found.
200
+ try {
201
+ const promptChars = messages.reduce((s, m) => s + (m.content || "").length, 0);
202
+ process.stderr.write(
203
+ `[PDH-ASK] prompt factCount=${factCount} truncated=${truncated} ` +
204
+ `messages=${messages.length} promptChars=${promptChars}\n`
205
+ );
206
+ } catch (_e) { /* non-fatal */ }
207
+
142
208
  // Call LLM. **skipCache: true** is critical: PDH answers depend on
143
209
  // current vault state (new contacts / events / items ingested between
144
210
  // asks). The desktop LLMManager has a 7-day ResponseCache keyed on
@@ -224,6 +290,8 @@ class AnalysisEngine {
224
290
  * @param {object} [options]
225
291
  * @param {number} [options.now]
226
292
  * @param {boolean} [options.skipAudit=false]
293
+ * @param {number} [options.maxFacts] per-call override (small-model budget)
294
+ * @param {number} [options.maxQueryLimit] per-call override
227
295
  * @returns {Promise<RetrieveContextResult>}
228
296
  *
229
297
  * @typedef {object} RetrieveContextResult
@@ -232,7 +300,7 @@ class AnalysisEngine {
232
300
  * @property {Array<object>} facts
233
301
  * @property {string[]} factIds
234
302
  * @property {number} factCount
235
- * @property {boolean} truncated
303
+ * @property {number} truncated Count of facts dropped at the maxFacts cap (0 = nothing truncated)
236
304
  * @property {string[]} ragContextIds
237
305
  * @property {Array<{role: string, content: string}>} messages prompt-builder output, LLM-ready
238
306
  * @property {string} systemPrompt
@@ -246,7 +314,17 @@ class AnalysisEngine {
246
314
 
247
315
  const startedAt = Date.now();
248
316
  const parsed = parseQuery(question, { now: options.now });
249
- const facts = this._gatherFacts(parsed);
317
+
318
+ const effMaxFacts =
319
+ Number.isInteger(options.maxFacts) && options.maxFacts > 0
320
+ ? options.maxFacts
321
+ : this.maxFacts;
322
+ const effMaxQueryLimit =
323
+ Number.isInteger(options.maxQueryLimit) && options.maxQueryLimit > 0
324
+ ? options.maxQueryLimit
325
+ : this.maxQueryLimit;
326
+
327
+ const facts = this._gatherFacts(parsed, { maxFacts: effMaxFacts, maxQueryLimit: effMaxQueryLimit });
250
328
 
251
329
  const ragContextIds = [];
252
330
  if (this.ragRetriever) {
@@ -276,7 +354,7 @@ class AnalysisEngine {
276
354
  systemPrompt: this.systemPrompt,
277
355
  intent: parsed.intent,
278
356
  timeWindow: parsed.timeWindow,
279
- maxFacts: this.maxFacts,
357
+ maxFacts: effMaxFacts,
280
358
  vaultTotals: this._gatherVaultTotals(),
281
359
  });
282
360
 
@@ -312,7 +390,91 @@ class AnalysisEngine {
312
390
 
313
391
  // ─── Internals ─────────────────────────────────────────────────────
314
392
 
315
- _gatherFacts(parsed) {
393
+ _gatherFacts(parsed, budget = {}) {
394
+ // Per-call budget overrides constructor defaults — small-model callers
395
+ // (Android Qwen2.5-1.5B) pass tighter caps here.
396
+ const effMaxFacts =
397
+ Number.isInteger(budget.maxFacts) && budget.maxFacts > 0
398
+ ? budget.maxFacts
399
+ : this.maxFacts;
400
+ const effMaxQueryLimit =
401
+ Number.isInteger(budget.maxQueryLimit) && budget.maxQueryLimit > 0
402
+ ? budget.maxQueryLimit
403
+ : this.maxQueryLimit;
404
+
405
+ // Intent routing — intent=latest WITHOUT a time window means "newest
406
+ // few" (e.g. "最近的订单", "最新消息"). Hard-cap to
407
+ // LATEST_INTENT_FACT_LIMIT and skip persons/items entirely: the user
408
+ // is asking about an event timeline, not their contact list.
409
+ //
410
+ // When timeWindow IS set ("最近 30 天的消费" hits BOTH parseTimeWindow
411
+ // AND intent=latest), fall through to the default list-with-window
412
+ // path — a user asking for 30 days doesn't want 3 newest rows.
413
+ //
414
+ // Fallback: if the targeted query returns 0 events, fall through to
415
+ // the broader default behavior. Protects against low-confidence
416
+ // classifier picks (see pdh_analysis_engine_intent_routing memory).
417
+ if (parsed.intent === "latest" && !parsed.timeWindow) {
418
+ const latestQ = {
419
+ limit: Math.min(LATEST_INTENT_FACT_LIMIT, effMaxFacts),
420
+ };
421
+ if (parsed.filters && parsed.filters.adapter) {
422
+ latestQ.adapter = parsed.filters.adapter;
423
+ }
424
+ const latestEvents = this.vault.queryEvents(latestQ);
425
+ if (latestEvents.length > 0) return latestEvents;
426
+ // 0 results → fall through to default broader path below.
427
+ }
428
+
429
+ // intent=sum-amount routing — "总共花了多少" / "在淘宝花了多少钱"
430
+ // only needs events from amount-bearing subtypes (order/payment/
431
+ // transfer/income). Pulling messages / visits / browses wastes
432
+ // prompt budget on rows the LLM can't aggregate into a sum.
433
+ //
434
+ // We split the budget across the 4 subtypes (min 20 each, floor),
435
+ // union the results, dedup by id (an event would only appear once
436
+ // anyway since subtype is unique per event — defensive), and sort
437
+ // by occurredAt DESC. Adapter + time window are passed through so
438
+ // "上个月在淘宝总共花了多少" stays scoped.
439
+ //
440
+ // Skip persons/items — they don't carry amounts.
441
+ //
442
+ // 0 hits → return EMPTY (do NOT fall through). If the user asks
443
+ // "总共花了多少" and the vault has zero amount-bearing events under
444
+ // adapter+time scope, the default path would pull messages / visits /
445
+ // browsing rows the LLM might wrongly try to sum. Empty FACTS +
446
+ // warning="no-facts" + TOTALS preamble lets the model say "找不到
447
+ // 相关花费记录" cleanly. This diverges from latest's fallback (which
448
+ // surfaces persons/items for general "what's recent" context); for
449
+ // sum-amount that fallback would actively mislead.
450
+ if (parsed.intent === "sum-amount") {
451
+ const perSubtype = Math.max(
452
+ SUM_AMOUNT_MIN_PER_SUBTYPE,
453
+ Math.floor(effMaxQueryLimit / SUM_AMOUNT_SUBTYPES.length)
454
+ );
455
+ const seen = new Set();
456
+ const amountEvents = [];
457
+ for (const sub of SUM_AMOUNT_SUBTYPES) {
458
+ const subQ = { limit: perSubtype, subtype: sub };
459
+ if (parsed.filters && parsed.filters.adapter) {
460
+ subQ.adapter = parsed.filters.adapter;
461
+ }
462
+ if (parsed.timeWindow) {
463
+ if (Number.isFinite(parsed.timeWindow.since)) subQ.since = parsed.timeWindow.since;
464
+ if (Number.isFinite(parsed.timeWindow.until)) subQ.until = parsed.timeWindow.until;
465
+ }
466
+ const rows = this.vault.queryEvents(subQ);
467
+ for (const e of rows) {
468
+ if (e && e.id && !seen.has(e.id)) {
469
+ seen.add(e.id);
470
+ amountEvents.push(e);
471
+ }
472
+ }
473
+ }
474
+ amountEvents.sort((a, b) => (b.occurredAt || 0) - (a.occurredAt || 0));
475
+ return amountEvents.slice(0, effMaxFacts);
476
+ }
477
+
316
478
  // Deliberately do NOT pass parsed.filters.subtype as a vault filter:
317
479
  // the keyword heuristic (`order` vs `payment` vs `transfer`) is too
318
480
  // crude to reliably narrow without false negatives. E.g. a user
@@ -323,7 +485,7 @@ class AnalysisEngine {
323
485
  // apply on prose. The LLM is good at filtering; SQL keyword guessing
324
486
  // is brittle.
325
487
  const q = {
326
- limit: this.maxQueryLimit,
488
+ limit: effMaxQueryLimit,
327
489
  };
328
490
  if (parsed.filters && parsed.filters.adapter) q.adapter = parsed.filters.adapter;
329
491
  if (parsed.timeWindow) {
@@ -332,6 +494,58 @@ class AnalysisEngine {
332
494
  }
333
495
  const events = this.vault.queryEvents(q);
334
496
 
497
+ // intent=list + entity-name FTS5 augmentation — when the question
498
+ // carries a probable entity-name candidate ("提到王老板的消息",
499
+ // "苹果的订单"), run an extra vault.searchEvents(q=term) and append
500
+ // hits not already in `events`. Adapter + time window are passed
501
+ // through so the FTS slice stays consistent with the main query.
502
+ //
503
+ // Strictly additive: the FTS hits are appended to `events` (no
504
+ // replacement). Wrong term extraction at worst returns 0 rows; FTS
505
+ // errors are swallowed — main path (events + persons + items) stays
506
+ // intact. See pdh_analysis_engine_intent_routing.md.
507
+ //
508
+ // Skipped for intent ∈ {count, sum-amount, latest}:
509
+ // - count uses TOTALS preamble; FACTS sample doesn't need padding
510
+ // - sum-amount is value-aggregation; entity-name hits don't help
511
+ // - latest already returned earlier via narrow path
512
+ if (
513
+ parsed.intent === "list" &&
514
+ typeof this.vault.searchEvents === "function"
515
+ ) {
516
+ const entityTerm = extractEntityTerm(parsed.raw);
517
+ if (entityTerm) {
518
+ const headroom = effMaxFacts - events.length;
519
+ if (headroom > 0) {
520
+ try {
521
+ const ftsQ = {
522
+ q: entityTerm,
523
+ limit: Math.min(headroom, LIST_INTENT_FTS_LIMIT),
524
+ };
525
+ if (parsed.filters && parsed.filters.adapter) {
526
+ ftsQ.adapter = parsed.filters.adapter;
527
+ }
528
+ if (parsed.timeWindow) {
529
+ if (Number.isFinite(parsed.timeWindow.since)) ftsQ.since = parsed.timeWindow.since;
530
+ if (Number.isFinite(parsed.timeWindow.until)) ftsQ.until = parsed.timeWindow.until;
531
+ }
532
+ const ftsResult = this.vault.searchEvents(ftsQ);
533
+ if (ftsResult && Array.isArray(ftsResult.rows)) {
534
+ const existingIds = new Set(events.map((e) => e.id));
535
+ for (const row of ftsResult.rows) {
536
+ if (row && row.id && !existingIds.has(row.id)) {
537
+ events.push(row);
538
+ existingIds.add(row.id);
539
+ }
540
+ }
541
+ }
542
+ } catch (_e) {
543
+ // FTS failure is non-fatal — main events array already populated.
544
+ }
545
+ }
546
+ }
547
+ }
548
+
335
549
  // Path C follow-up — events alone miss whole categories of facts:
336
550
  // - contacts (system-data-android) land in `persons`, not `events`
337
551
  // - installed apps land in `items`, not `events`
@@ -349,7 +563,7 @@ class AnalysisEngine {
349
563
  // state snapshots that should always be visible. Adapter filter is also
350
564
  // skipped because users asking "我有几个联系人" don't say "from
351
565
  // system-data-android".
352
- const remaining = Math.max(0, this.maxFacts - events.length);
566
+ const remaining = Math.max(0, effMaxFacts - events.length);
353
567
  const sideBudget = Math.floor(remaining / 2);
354
568
  const personBudget = sideBudget > 0 ? sideBudget : 0;
355
569
  const itemBudget = remaining - personBudget;
@@ -412,4 +626,8 @@ module.exports = {
412
626
  AnalysisEngine,
413
627
  DEFAULT_MAX_FACTS,
414
628
  DEFAULT_MAX_QUERY_LIMIT,
629
+ LATEST_INTENT_FACT_LIMIT,
630
+ LIST_INTENT_FTS_LIMIT,
631
+ SUM_AMOUNT_SUBTYPES,
632
+ SUM_AMOUNT_MIN_PER_SUBTYPE,
415
633
  };
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Shared adapter → category taxonomy for the PDH Vault Browser UI.
3
+ *
4
+ * Single source of truth consumed by:
5
+ * - packages/web-panel (desktop browser view)
6
+ * - packages/cli (cc hub search --category)
7
+ * - android-app (mirrored as Kotlin enum in PdhCategoryMap.kt; keep in sync)
8
+ *
9
+ * Categories are stable user-facing buckets (社交聊天 / 内容平台 / ...) — the
10
+ * browser sidebar keys off these, not raw adapter names. New adapters get
11
+ * mapped here once and surface in the right bucket on both shells.
12
+ *
13
+ * Matching is prefix-based by adapter name so we don't have to touch this
14
+ * file for every adapter variant (e.g. `email-imap-qq`, `email-imap-gmail`).
15
+ * First matching prefix wins; order in PREFIX_RULES matters for overlapping
16
+ * prefixes (none today, but reserve the right).
17
+ */
18
+
19
+ "use strict";
20
+
21
+ const CATEGORIES = Object.freeze([
22
+ "chat", // 即时通讯 / 私聊
23
+ "social", // 内容平台 / 短视频 / 微博
24
+ "email", // 邮件
25
+ "shopping", // 支付 / 订单 / 购物
26
+ "travel", // 出行 / 地图 / 票务
27
+ "system", // 系统数据(通讯录 / 应用列表)
28
+ "ai-chat", // AI 助手对话历史
29
+ "other", // 兜底
30
+ ]);
31
+
32
+ const CATEGORY_LABELS = Object.freeze({
33
+ chat: "社交聊天",
34
+ social: "内容平台",
35
+ email: "邮件",
36
+ shopping: "支付订单",
37
+ travel: "出行",
38
+ system: "系统数据",
39
+ "ai-chat": "AI 对话",
40
+ other: "其他",
41
+ });
42
+
43
+ // Ordered prefix → category rules. First match wins.
44
+ // Each entry: [prefixOrExact, category].
45
+ // Use a trailing `*` to mean "prefix match"; absent `*` means exact match.
46
+ const PREFIX_RULES = Object.freeze([
47
+ ["wechat", "chat"],
48
+ ["messaging-*", "chat"],
49
+ ["social-*", "social"],
50
+ ["email-*", "email"],
51
+ ["shopping-*", "shopping"],
52
+ ["alipay-*", "shopping"],
53
+ ["travel-*", "travel"],
54
+ ["system-data*", "system"],
55
+ ["browser-*", "system"],
56
+ ["vscode", "system"],
57
+ ["win-recent", "system"],
58
+ ["git-activity", "system"],
59
+ ["shell-history", "system"],
60
+ ["local-files", "system"],
61
+ ["ai-chat-*", "ai-chat"],
62
+ ]);
63
+
64
+ /**
65
+ * Map an adapter name to its category.
66
+ * @param {string} adapterName e.g. "social-bilibili" / "email-imap-qq" / "wechat"
67
+ * @returns {string} category id from CATEGORIES (never throws — falls back to "other")
68
+ */
69
+ function getCategory(adapterName) {
70
+ if (typeof adapterName !== "string" || adapterName.length === 0) return "other";
71
+ for (const [rule, cat] of PREFIX_RULES) {
72
+ if (rule.endsWith("*")) {
73
+ const prefix = rule.slice(0, -1);
74
+ if (adapterName.startsWith(prefix)) return cat;
75
+ } else if (adapterName === rule) {
76
+ return cat;
77
+ }
78
+ }
79
+ return "other";
80
+ }
81
+
82
+ /**
83
+ * Group a list of adapter names by category. Returns
84
+ * `{ [category]: string[] }` with empty categories omitted.
85
+ */
86
+ function groupByCategory(adapterNames) {
87
+ const out = {};
88
+ for (const name of adapterNames || []) {
89
+ const c = getCategory(name);
90
+ (out[c] ||= []).push(name);
91
+ }
92
+ return out;
93
+ }
94
+
95
+ module.exports = {
96
+ CATEGORIES,
97
+ CATEGORY_LABELS,
98
+ PREFIX_RULES,
99
+ getCategory,
100
+ groupByCategory,
101
+ };
package/lib/index.js CHANGED
@@ -58,6 +58,14 @@ const entityResolver = require("./entity-resolver");
58
58
  const analysisSkills = require("./analysis-skills");
59
59
  const mobileExtractor = require("./mobile-extractor");
60
60
  const systemDataAndroid = require("./adapters/system-data-android");
61
+ const browserHistoryChrome = require("./adapters/browser-history-chrome");
62
+ const browserHistoryEdge = require("./adapters/browser-history-edge");
63
+ const vscodeAdapter = require("./adapters/vscode");
64
+ const winRecentAdapter = require("./adapters/win-recent");
65
+ const gitActivityAdapter = require("./adapters/git-activity");
66
+ const shellHistoryAdapter = require("./adapters/shell-history");
67
+ const localFilesAdapter = require("./adapters/local-files");
68
+ const categories = require("./categories");
61
69
 
62
70
  module.exports = {
63
71
  // Constants / enums
@@ -87,6 +95,7 @@ module.exports = {
87
95
  TARGET_SCHEMA_VERSION: migrations.TARGET_VERSION,
88
96
  applyMigrations: migrations.applyMigrations,
89
97
  getSchemaVersion: migrations.getSchemaVersion,
98
+ getFtsMode: migrations.getFtsMode,
90
99
 
91
100
  // Key providers
92
101
  KEY_HEX_LEN: keyProviders.KEY_HEX_LEN,
@@ -262,6 +271,51 @@ module.exports = {
262
271
  ingestSystemDataAndroidSnapshot:
263
272
  systemDataAndroid.ingestSystemDataAndroidSnapshot,
264
273
 
274
+ // Phase 17 (2026-05-24) — desktop Chrome local browser history + bookmarks.
275
+ // SQLite snapshot copy + Bookmarks JSON parse; no network, no extension.
276
+ BrowserHistoryChromeAdapter: browserHistoryChrome.BrowserHistoryChromeAdapter,
277
+ BROWSER_HISTORY_CHROME_NAME: browserHistoryChrome.BROWSER_HISTORY_CHROME_NAME,
278
+ BROWSER_HISTORY_CHROME_VERSION: browserHistoryChrome.BROWSER_HISTORY_CHROME_VERSION,
279
+ defaultChromeProfileDir: browserHistoryChrome.defaultChromeProfileDir,
280
+
281
+ // Edge — Chromium under the hood, same readers, different profile root.
282
+ BrowserHistoryEdgeAdapter: browserHistoryEdge.BrowserHistoryEdgeAdapter,
283
+ BROWSER_HISTORY_EDGE_NAME: browserHistoryEdge.BROWSER_HISTORY_EDGE_NAME,
284
+ BROWSER_HISTORY_EDGE_VERSION: browserHistoryEdge.BROWSER_HISTORY_EDGE_VERSION,
285
+
286
+ // VS Code — workspace history + global terminal command/dir history.
287
+ VSCodeAdapter: vscodeAdapter.VSCodeAdapter,
288
+ VSCODE_NAME: vscodeAdapter.VSCODE_NAME,
289
+ VSCODE_VERSION: vscodeAdapter.VSCODE_VERSION,
290
+ defaultVscodeRoot: vscodeAdapter.defaultVscodeRoot,
291
+
292
+ // Windows Recent — .lnk shortcut list from %APPDATA%\Microsoft\Windows\Recent.
293
+ // Cross-application "what did I open and when" timeline (Win-only adapter).
294
+ WinRecentAdapter: winRecentAdapter.WinRecentAdapter,
295
+ WIN_RECENT_NAME: winRecentAdapter.WIN_RECENT_NAME,
296
+ WIN_RECENT_VERSION: winRecentAdapter.WIN_RECENT_VERSION,
297
+ defaultWinRecentDir: winRecentAdapter.defaultRecentDir,
298
+
299
+ // Phase 18 — git activity (commit timeline across local code repos).
300
+ GitActivityAdapter: gitActivityAdapter.GitActivityAdapter,
301
+ GIT_ACTIVITY_NAME: gitActivityAdapter.GIT_ACTIVITY_NAME,
302
+ GIT_ACTIVITY_VERSION: gitActivityAdapter.GIT_ACTIVITY_VERSION,
303
+ defaultCodeRoots: gitActivityAdapter.defaultCodeRoots,
304
+
305
+ // Phase 18 — shell history (PowerShell / bash / zsh command timelines).
306
+ ShellHistoryAdapter: shellHistoryAdapter.ShellHistoryAdapter,
307
+ SHELL_HISTORY_NAME: shellHistoryAdapter.SHELL_HISTORY_NAME,
308
+ SHELL_HISTORY_VERSION: shellHistoryAdapter.SHELL_HISTORY_VERSION,
309
+ defaultShellHistorySources: shellHistoryAdapter.defaultHistorySources,
310
+
311
+ // Phase 18 — local files (file walk under Documents / Desktop / Downloads /
312
+ // Pictures / Videos / Music). Cross-application "what files do I have"
313
+ // timeline rooted in mtime, with app-cache excludes baked in.
314
+ LocalFilesAdapter: localFilesAdapter.LocalFilesAdapter,
315
+ LOCAL_FILES_NAME: localFilesAdapter.LOCAL_FILES_NAME,
316
+ LOCAL_FILES_VERSION: localFilesAdapter.LOCAL_FILES_VERSION,
317
+ defaultLocalFileRoots: localFilesAdapter.defaultRoots,
318
+
265
319
  // Phase 6 — AlipayBillAdapter (CSV import)
266
320
  AlipayBillAdapter: alipayBillAdapter.AlipayBillAdapter,
267
321
  ALIPAY_BILL_NAME: alipayBillAdapter.ALIPAY_BILL_NAME,
@@ -273,4 +327,11 @@ module.exports = {
273
327
  alipayCounterpartyToPersonId: alipayBillAdapter.alipayCounterpartyToPersonId,
274
328
  ALIPAY_KNOWN_MERCHANTS: alipayBillAdapter.ALIPAY_KNOWN_MERCHANTS,
275
329
  mapAlipayTypeToSubtype: alipayBillAdapter.mapAlipayTypeToSubtype,
330
+
331
+ // Phase 16 — Vault Browser shared taxonomy (categories + adapter mapping)
332
+ CATEGORIES: categories.CATEGORIES,
333
+ CATEGORY_LABELS: categories.CATEGORY_LABELS,
334
+ PDH_PREFIX_RULES: categories.PREFIX_RULES,
335
+ getAdapterCategory: categories.getCategory,
336
+ groupAdaptersByCategory: categories.groupByCategory,
276
337
  };