@chainlesschain/personal-data-hub 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/browser-history-chrome.test.js +377 -0
- package/__tests__/adapters/browser-history-edge.test.js +159 -0
- package/__tests__/adapters/git-activity.test.js +216 -0
- package/__tests__/adapters/local-files.test.js +264 -0
- package/__tests__/adapters/shell-history.test.js +180 -0
- package/__tests__/adapters/system-data-android.test.js +104 -3
- package/__tests__/adapters/vscode.test.js +299 -0
- package/__tests__/adapters/win-recent.test.js +192 -0
- package/__tests__/analysis.test.js +841 -2
- package/__tests__/categories.test.js +92 -0
- package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +146 -0
- package/__tests__/entity-resolver-vault.test.js +5 -2
- package/__tests__/integration/local-data-adapters-pipeline.test.js +373 -0
- package/__tests__/longtail-adapters.test.js +7 -2
- package/__tests__/query-parser.test.js +66 -0
- package/__tests__/registry.test.js +114 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +24 -1
- package/__tests__/sidecar-supervisor.test.js +9 -1
- package/__tests__/social-kuaishou-snapshot.test.js +55 -2
- package/__tests__/social-toutiao-snapshot.test.js +54 -2
- package/__tests__/vault-search-helpers.test.js +104 -0
- package/__tests__/vault-search.test.js +423 -0
- package/__tests__/vault.test.js +77 -3
- package/lib/adapters/browser-history-chrome/adapter.js +247 -0
- package/lib/adapters/browser-history-chrome/bookmarks-reader.js +79 -0
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +223 -0
- package/lib/adapters/browser-history-chrome/index.js +23 -0
- package/lib/adapters/browser-history-edge/adapter.js +34 -0
- package/lib/adapters/browser-history-edge/index.js +13 -0
- package/lib/adapters/git-activity/adapter.js +155 -0
- package/lib/adapters/git-activity/git-reader.js +125 -0
- package/lib/adapters/git-activity/index.js +17 -0
- package/lib/adapters/local-files/adapter.js +149 -0
- package/lib/adapters/local-files/file-walker.js +125 -0
- package/lib/adapters/local-files/index.js +18 -0
- package/lib/adapters/shell-history/adapter.js +137 -0
- package/lib/adapters/shell-history/index.js +17 -0
- package/lib/adapters/shell-history/shell-reader.js +100 -0
- package/lib/adapters/social-kuaishou/index.js +57 -1
- package/lib/adapters/social-toutiao/index.js +59 -1
- package/lib/adapters/system-data-android/adapter.js +220 -3
- package/lib/adapters/vscode/adapter.js +285 -0
- package/lib/adapters/vscode/index.js +18 -0
- package/lib/adapters/vscode/vscode-reader.js +191 -0
- package/lib/adapters/win-recent/adapter.js +150 -0
- package/lib/adapters/win-recent/index.js +16 -0
- package/lib/adapters/win-recent/win-recent-reader.js +72 -0
- package/lib/analysis.js +227 -9
- package/lib/categories.js +101 -0
- package/lib/index.js +61 -0
- package/lib/migrations.js +146 -0
- package/lib/query-parser.js +74 -0
- package/lib/registry.js +162 -0
- package/lib/vault.js +363 -2
- package/package.json +2 -1
- package/scripts/run-native-tests-sandbox.sh +53 -0
package/lib/analysis.js
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
|
|
22
22
|
"use strict";
|
|
23
23
|
|
|
24
|
-
const { parseQuery } = require("./query-parser");
|
|
24
|
+
const { parseQuery, extractEntityTerm } = require("./query-parser");
|
|
25
25
|
const {
|
|
26
26
|
buildPrompt,
|
|
27
27
|
parseCitations,
|
|
@@ -33,6 +33,34 @@ const { toError } = require("./adapter-spec");
|
|
|
33
33
|
const DEFAULT_MAX_FACTS = 80;
|
|
34
34
|
const DEFAULT_MAX_QUERY_LIMIT = 200;
|
|
35
35
|
|
|
36
|
+
// intent=latest hard cap when no time window is set. "最近的订单" / "最新消息"
|
|
37
|
+
// want the newest 1-3 rows, not 80 — freeing prompt budget lets the LLM
|
|
38
|
+
// actually read the row content instead of skimming. Memory:
|
|
39
|
+
// pdh_analysis_engine_intent_routing.md. When the user also gives a time
|
|
40
|
+
// window ("最近 30 天的消费") we treat it as list-with-window and fall
|
|
41
|
+
// through to the default broader path — see _gatherFacts.
|
|
42
|
+
const LATEST_INTENT_FACT_LIMIT = 3;
|
|
43
|
+
|
|
44
|
+
// intent=list FTS5 augmentation cap. When the question carries a probable
|
|
45
|
+
// entity-name ("提到王老板的消息", "苹果的订单") we run an extra
|
|
46
|
+
// vault.searchEvents(q=term) and append non-duplicate hits to FACTS. Cap
|
|
47
|
+
// at 10 so a popular term ("订单") can't drown out the adapter+time slice
|
|
48
|
+
// the user explicitly asked for. Stays additive (never replaces events).
|
|
49
|
+
const LIST_INTENT_FTS_LIMIT = 10;
|
|
50
|
+
|
|
51
|
+
// intent=sum-amount routing — the only event subtypes that carry an
|
|
52
|
+
// amount field worth summing. Order keeps "order" first because it's the
|
|
53
|
+
// most common shopping flow (taobao/jd/meituan/pdd all map to it). When
|
|
54
|
+
// the user asks "总共花了多少" we only want events from this set; pulling
|
|
55
|
+
// `message` / `visit` / `browse` would waste prompt budget on rows the
|
|
56
|
+
// LLM cannot use to compute a sum.
|
|
57
|
+
const SUM_AMOUNT_SUBTYPES = ["order", "payment", "transfer", "income"];
|
|
58
|
+
// Per-subtype query cap divider — split the effMaxQueryLimit across the
|
|
59
|
+
// 4 subtypes so a popular `payment` slice can't crowd out `transfer`.
|
|
60
|
+
// Floor at 20 so per-call small-model budget (effMaxQueryLimit=50 →
|
|
61
|
+
// 12) doesn't starve any single subtype.
|
|
62
|
+
const SUM_AMOUNT_MIN_PER_SUBTYPE = 20;
|
|
63
|
+
|
|
36
64
|
class AnalysisEngine {
|
|
37
65
|
/**
|
|
38
66
|
* @param {object} opts
|
|
@@ -72,6 +100,8 @@ class AnalysisEngine {
|
|
|
72
100
|
* @param {boolean} [options.acceptNonLocal=false] required true for cloud LLMs
|
|
73
101
|
* @param {number} [options.now]
|
|
74
102
|
* @param {boolean} [options.skipAudit=false]
|
|
103
|
+
* @param {number} [options.maxFacts] per-call override of constructor `maxFacts` (e.g. on-device 1.5B model wants ~20)
|
|
104
|
+
* @param {number} [options.maxQueryLimit] per-call override of constructor `maxQueryLimit`
|
|
75
105
|
* @returns {Promise<AskResult>}
|
|
76
106
|
*
|
|
77
107
|
* @typedef {object} AskResult
|
|
@@ -99,8 +129,34 @@ class AnalysisEngine {
|
|
|
99
129
|
const startedAt = Date.now();
|
|
100
130
|
const parsed = parseQuery(question, { now: options.now });
|
|
101
131
|
|
|
132
|
+
// Per-call budget overrides — on-device small models (Qwen2.5-1.5B etc.)
|
|
133
|
+
// need a much tighter prompt than desktop 7B+. Fall back to constructor
|
|
134
|
+
// defaults if not passed. Non-positive overrides are ignored.
|
|
135
|
+
const effMaxFacts =
|
|
136
|
+
Number.isInteger(options.maxFacts) && options.maxFacts > 0
|
|
137
|
+
? options.maxFacts
|
|
138
|
+
: this.maxFacts;
|
|
139
|
+
const effMaxQueryLimit =
|
|
140
|
+
Number.isInteger(options.maxQueryLimit) && options.maxQueryLimit > 0
|
|
141
|
+
? options.maxQueryLimit
|
|
142
|
+
: this.maxQueryLimit;
|
|
143
|
+
|
|
102
144
|
// Gather facts from the vault.
|
|
103
|
-
const facts = this._gatherFacts(parsed);
|
|
145
|
+
const facts = this._gatherFacts(parsed, { maxFacts: effMaxFacts, maxQueryLimit: effMaxQueryLimit });
|
|
146
|
+
|
|
147
|
+
// Telemetry: prove the budget is reaching the engine. Goes to stderr so
|
|
148
|
+
// the Android side's stderrBuilder + logcat can surface it.
|
|
149
|
+
// Grep: `adb logcat | grep PDH-ASK`.
|
|
150
|
+
try {
|
|
151
|
+
process.stderr.write(
|
|
152
|
+
`[PDH-ASK] ask effMaxFacts=${effMaxFacts} effMaxQueryLimit=${effMaxQueryLimit} ` +
|
|
153
|
+
`gathered=${facts.length} (events=${facts.filter((f) => f.type === "event").length} ` +
|
|
154
|
+
`persons=${facts.filter((f) => f.type === "person").length} ` +
|
|
155
|
+
`items=${facts.filter((f) => f.type === "item").length}) ` +
|
|
156
|
+
`adapter=${(parsed.filters && parsed.filters.adapter) || "*"} ` +
|
|
157
|
+
`intent=${parsed.intent || "*"}\n`
|
|
158
|
+
);
|
|
159
|
+
} catch (_e) { /* stderr write failures are non-fatal */ }
|
|
104
160
|
|
|
105
161
|
// Optional RAG augmentation.
|
|
106
162
|
let ragContext = [];
|
|
@@ -135,10 +191,20 @@ class AnalysisEngine {
|
|
|
135
191
|
systemPrompt: this.systemPrompt,
|
|
136
192
|
intent: parsed.intent,
|
|
137
193
|
timeWindow: parsed.timeWindow,
|
|
138
|
-
maxFacts:
|
|
194
|
+
maxFacts: effMaxFacts,
|
|
139
195
|
vaultTotals: this._gatherVaultTotals(),
|
|
140
196
|
});
|
|
141
197
|
|
|
198
|
+
// Telemetry: post-cap prompt size + truncation count. If `truncated` > 0
|
|
199
|
+
// the LLM is seeing fewer facts than _gatherFacts found.
|
|
200
|
+
try {
|
|
201
|
+
const promptChars = messages.reduce((s, m) => s + (m.content || "").length, 0);
|
|
202
|
+
process.stderr.write(
|
|
203
|
+
`[PDH-ASK] prompt factCount=${factCount} truncated=${truncated} ` +
|
|
204
|
+
`messages=${messages.length} promptChars=${promptChars}\n`
|
|
205
|
+
);
|
|
206
|
+
} catch (_e) { /* non-fatal */ }
|
|
207
|
+
|
|
142
208
|
// Call LLM. **skipCache: true** is critical: PDH answers depend on
|
|
143
209
|
// current vault state (new contacts / events / items ingested between
|
|
144
210
|
// asks). The desktop LLMManager has a 7-day ResponseCache keyed on
|
|
@@ -224,6 +290,8 @@ class AnalysisEngine {
|
|
|
224
290
|
* @param {object} [options]
|
|
225
291
|
* @param {number} [options.now]
|
|
226
292
|
* @param {boolean} [options.skipAudit=false]
|
|
293
|
+
* @param {number} [options.maxFacts] per-call override (small-model budget)
|
|
294
|
+
* @param {number} [options.maxQueryLimit] per-call override
|
|
227
295
|
* @returns {Promise<RetrieveContextResult>}
|
|
228
296
|
*
|
|
229
297
|
* @typedef {object} RetrieveContextResult
|
|
@@ -232,7 +300,7 @@ class AnalysisEngine {
|
|
|
232
300
|
* @property {Array<object>} facts
|
|
233
301
|
* @property {string[]} factIds
|
|
234
302
|
* @property {number} factCount
|
|
235
|
-
* @property {
|
|
303
|
+
* @property {number} truncated Count of facts dropped at the maxFacts cap (0 = nothing truncated)
|
|
236
304
|
* @property {string[]} ragContextIds
|
|
237
305
|
* @property {Array<{role: string, content: string}>} messages prompt-builder output, LLM-ready
|
|
238
306
|
* @property {string} systemPrompt
|
|
@@ -246,7 +314,17 @@ class AnalysisEngine {
|
|
|
246
314
|
|
|
247
315
|
const startedAt = Date.now();
|
|
248
316
|
const parsed = parseQuery(question, { now: options.now });
|
|
249
|
-
|
|
317
|
+
|
|
318
|
+
const effMaxFacts =
|
|
319
|
+
Number.isInteger(options.maxFacts) && options.maxFacts > 0
|
|
320
|
+
? options.maxFacts
|
|
321
|
+
: this.maxFacts;
|
|
322
|
+
const effMaxQueryLimit =
|
|
323
|
+
Number.isInteger(options.maxQueryLimit) && options.maxQueryLimit > 0
|
|
324
|
+
? options.maxQueryLimit
|
|
325
|
+
: this.maxQueryLimit;
|
|
326
|
+
|
|
327
|
+
const facts = this._gatherFacts(parsed, { maxFacts: effMaxFacts, maxQueryLimit: effMaxQueryLimit });
|
|
250
328
|
|
|
251
329
|
const ragContextIds = [];
|
|
252
330
|
if (this.ragRetriever) {
|
|
@@ -276,7 +354,7 @@ class AnalysisEngine {
|
|
|
276
354
|
systemPrompt: this.systemPrompt,
|
|
277
355
|
intent: parsed.intent,
|
|
278
356
|
timeWindow: parsed.timeWindow,
|
|
279
|
-
maxFacts:
|
|
357
|
+
maxFacts: effMaxFacts,
|
|
280
358
|
vaultTotals: this._gatherVaultTotals(),
|
|
281
359
|
});
|
|
282
360
|
|
|
@@ -312,7 +390,91 @@ class AnalysisEngine {
|
|
|
312
390
|
|
|
313
391
|
// ─── Internals ─────────────────────────────────────────────────────
|
|
314
392
|
|
|
315
|
-
_gatherFacts(parsed) {
|
|
393
|
+
_gatherFacts(parsed, budget = {}) {
|
|
394
|
+
// Per-call budget overrides constructor defaults — small-model callers
|
|
395
|
+
// (Android Qwen2.5-1.5B) pass tighter caps here.
|
|
396
|
+
const effMaxFacts =
|
|
397
|
+
Number.isInteger(budget.maxFacts) && budget.maxFacts > 0
|
|
398
|
+
? budget.maxFacts
|
|
399
|
+
: this.maxFacts;
|
|
400
|
+
const effMaxQueryLimit =
|
|
401
|
+
Number.isInteger(budget.maxQueryLimit) && budget.maxQueryLimit > 0
|
|
402
|
+
? budget.maxQueryLimit
|
|
403
|
+
: this.maxQueryLimit;
|
|
404
|
+
|
|
405
|
+
// Intent routing — intent=latest WITHOUT a time window means "newest
|
|
406
|
+
// few" (e.g. "最近的订单", "最新消息"). Hard-cap to
|
|
407
|
+
// LATEST_INTENT_FACT_LIMIT and skip persons/items entirely: the user
|
|
408
|
+
// is asking about an event timeline, not their contact list.
|
|
409
|
+
//
|
|
410
|
+
// When timeWindow IS set ("最近 30 天的消费" hits BOTH parseTimeWindow
|
|
411
|
+
// AND intent=latest), fall through to the default list-with-window
|
|
412
|
+
// path — a user asking for 30 days doesn't want 3 newest rows.
|
|
413
|
+
//
|
|
414
|
+
// Fallback: if the targeted query returns 0 events, fall through to
|
|
415
|
+
// the broader default behavior. Protects against low-confidence
|
|
416
|
+
// classifier picks (see pdh_analysis_engine_intent_routing memory).
|
|
417
|
+
if (parsed.intent === "latest" && !parsed.timeWindow) {
|
|
418
|
+
const latestQ = {
|
|
419
|
+
limit: Math.min(LATEST_INTENT_FACT_LIMIT, effMaxFacts),
|
|
420
|
+
};
|
|
421
|
+
if (parsed.filters && parsed.filters.adapter) {
|
|
422
|
+
latestQ.adapter = parsed.filters.adapter;
|
|
423
|
+
}
|
|
424
|
+
const latestEvents = this.vault.queryEvents(latestQ);
|
|
425
|
+
if (latestEvents.length > 0) return latestEvents;
|
|
426
|
+
// 0 results → fall through to default broader path below.
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// intent=sum-amount routing — "总共花了多少" / "在淘宝花了多少钱"
|
|
430
|
+
// only needs events from amount-bearing subtypes (order/payment/
|
|
431
|
+
// transfer/income). Pulling messages / visits / browses wastes
|
|
432
|
+
// prompt budget on rows the LLM can't aggregate into a sum.
|
|
433
|
+
//
|
|
434
|
+
// We split the budget across the 4 subtypes (min 20 each, floor),
|
|
435
|
+
// union the results, dedup by id (an event would only appear once
|
|
436
|
+
// anyway since subtype is unique per event — defensive), and sort
|
|
437
|
+
// by occurredAt DESC. Adapter + time window are passed through so
|
|
438
|
+
// "上个月在淘宝总共花了多少" stays scoped.
|
|
439
|
+
//
|
|
440
|
+
// Skip persons/items — they don't carry amounts.
|
|
441
|
+
//
|
|
442
|
+
// 0 hits → return EMPTY (do NOT fall through). If the user asks
|
|
443
|
+
// "总共花了多少" and the vault has zero amount-bearing events under
|
|
444
|
+
// adapter+time scope, the default path would pull messages / visits /
|
|
445
|
+
// browsing rows the LLM might wrongly try to sum. Empty FACTS +
|
|
446
|
+
// warning="no-facts" + TOTALS preamble lets the model say "找不到
|
|
447
|
+
// 相关花费记录" cleanly. This diverges from latest's fallback (which
|
|
448
|
+
// surfaces persons/items for general "what's recent" context); for
|
|
449
|
+
// sum-amount that fallback would actively mislead.
|
|
450
|
+
if (parsed.intent === "sum-amount") {
|
|
451
|
+
const perSubtype = Math.max(
|
|
452
|
+
SUM_AMOUNT_MIN_PER_SUBTYPE,
|
|
453
|
+
Math.floor(effMaxQueryLimit / SUM_AMOUNT_SUBTYPES.length)
|
|
454
|
+
);
|
|
455
|
+
const seen = new Set();
|
|
456
|
+
const amountEvents = [];
|
|
457
|
+
for (const sub of SUM_AMOUNT_SUBTYPES) {
|
|
458
|
+
const subQ = { limit: perSubtype, subtype: sub };
|
|
459
|
+
if (parsed.filters && parsed.filters.adapter) {
|
|
460
|
+
subQ.adapter = parsed.filters.adapter;
|
|
461
|
+
}
|
|
462
|
+
if (parsed.timeWindow) {
|
|
463
|
+
if (Number.isFinite(parsed.timeWindow.since)) subQ.since = parsed.timeWindow.since;
|
|
464
|
+
if (Number.isFinite(parsed.timeWindow.until)) subQ.until = parsed.timeWindow.until;
|
|
465
|
+
}
|
|
466
|
+
const rows = this.vault.queryEvents(subQ);
|
|
467
|
+
for (const e of rows) {
|
|
468
|
+
if (e && e.id && !seen.has(e.id)) {
|
|
469
|
+
seen.add(e.id);
|
|
470
|
+
amountEvents.push(e);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
amountEvents.sort((a, b) => (b.occurredAt || 0) - (a.occurredAt || 0));
|
|
475
|
+
return amountEvents.slice(0, effMaxFacts);
|
|
476
|
+
}
|
|
477
|
+
|
|
316
478
|
// Deliberately do NOT pass parsed.filters.subtype as a vault filter:
|
|
317
479
|
// the keyword heuristic (`order` vs `payment` vs `transfer`) is too
|
|
318
480
|
// crude to reliably narrow without false negatives. E.g. a user
|
|
@@ -323,7 +485,7 @@ class AnalysisEngine {
|
|
|
323
485
|
// apply on prose. The LLM is good at filtering; SQL keyword guessing
|
|
324
486
|
// is brittle.
|
|
325
487
|
const q = {
|
|
326
|
-
limit:
|
|
488
|
+
limit: effMaxQueryLimit,
|
|
327
489
|
};
|
|
328
490
|
if (parsed.filters && parsed.filters.adapter) q.adapter = parsed.filters.adapter;
|
|
329
491
|
if (parsed.timeWindow) {
|
|
@@ -332,6 +494,58 @@ class AnalysisEngine {
|
|
|
332
494
|
}
|
|
333
495
|
const events = this.vault.queryEvents(q);
|
|
334
496
|
|
|
497
|
+
// intent=list + entity-name FTS5 augmentation — when the question
|
|
498
|
+
// carries a probable entity-name candidate ("提到王老板的消息",
|
|
499
|
+
// "苹果的订单"), run an extra vault.searchEvents(q=term) and append
|
|
500
|
+
// hits not already in `events`. Adapter + time window are passed
|
|
501
|
+
// through so the FTS slice stays consistent with the main query.
|
|
502
|
+
//
|
|
503
|
+
// Strictly additive: the FTS hits are appended to `events` (no
|
|
504
|
+
// replacement). Wrong term extraction at worst returns 0 rows; FTS
|
|
505
|
+
// errors are swallowed — main path (events + persons + items) stays
|
|
506
|
+
// intact. See pdh_analysis_engine_intent_routing.md.
|
|
507
|
+
//
|
|
508
|
+
// Skipped for intent ∈ {count, sum-amount, latest}:
|
|
509
|
+
// - count uses TOTALS preamble; FACTS sample doesn't need padding
|
|
510
|
+
// - sum-amount is value-aggregation; entity-name hits don't help
|
|
511
|
+
// - latest already returned earlier via narrow path
|
|
512
|
+
if (
|
|
513
|
+
parsed.intent === "list" &&
|
|
514
|
+
typeof this.vault.searchEvents === "function"
|
|
515
|
+
) {
|
|
516
|
+
const entityTerm = extractEntityTerm(parsed.raw);
|
|
517
|
+
if (entityTerm) {
|
|
518
|
+
const headroom = effMaxFacts - events.length;
|
|
519
|
+
if (headroom > 0) {
|
|
520
|
+
try {
|
|
521
|
+
const ftsQ = {
|
|
522
|
+
q: entityTerm,
|
|
523
|
+
limit: Math.min(headroom, LIST_INTENT_FTS_LIMIT),
|
|
524
|
+
};
|
|
525
|
+
if (parsed.filters && parsed.filters.adapter) {
|
|
526
|
+
ftsQ.adapter = parsed.filters.adapter;
|
|
527
|
+
}
|
|
528
|
+
if (parsed.timeWindow) {
|
|
529
|
+
if (Number.isFinite(parsed.timeWindow.since)) ftsQ.since = parsed.timeWindow.since;
|
|
530
|
+
if (Number.isFinite(parsed.timeWindow.until)) ftsQ.until = parsed.timeWindow.until;
|
|
531
|
+
}
|
|
532
|
+
const ftsResult = this.vault.searchEvents(ftsQ);
|
|
533
|
+
if (ftsResult && Array.isArray(ftsResult.rows)) {
|
|
534
|
+
const existingIds = new Set(events.map((e) => e.id));
|
|
535
|
+
for (const row of ftsResult.rows) {
|
|
536
|
+
if (row && row.id && !existingIds.has(row.id)) {
|
|
537
|
+
events.push(row);
|
|
538
|
+
existingIds.add(row.id);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
} catch (_e) {
|
|
543
|
+
// FTS failure is non-fatal — main events array already populated.
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
335
549
|
// Path C follow-up — events alone miss whole categories of facts:
|
|
336
550
|
// - contacts (system-data-android) land in `persons`, not `events`
|
|
337
551
|
// - installed apps land in `items`, not `events`
|
|
@@ -349,7 +563,7 @@ class AnalysisEngine {
|
|
|
349
563
|
// state snapshots that should always be visible. Adapter filter is also
|
|
350
564
|
// skipped because users asking "我有几个联系人" don't say "from
|
|
351
565
|
// system-data-android".
|
|
352
|
-
const remaining = Math.max(0,
|
|
566
|
+
const remaining = Math.max(0, effMaxFacts - events.length);
|
|
353
567
|
const sideBudget = Math.floor(remaining / 2);
|
|
354
568
|
const personBudget = sideBudget > 0 ? sideBudget : 0;
|
|
355
569
|
const itemBudget = remaining - personBudget;
|
|
@@ -412,4 +626,8 @@ module.exports = {
|
|
|
412
626
|
AnalysisEngine,
|
|
413
627
|
DEFAULT_MAX_FACTS,
|
|
414
628
|
DEFAULT_MAX_QUERY_LIMIT,
|
|
629
|
+
LATEST_INTENT_FACT_LIMIT,
|
|
630
|
+
LIST_INTENT_FTS_LIMIT,
|
|
631
|
+
SUM_AMOUNT_SUBTYPES,
|
|
632
|
+
SUM_AMOUNT_MIN_PER_SUBTYPE,
|
|
415
633
|
};
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared adapter → category taxonomy for the PDH Vault Browser UI.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth consumed by:
|
|
5
|
+
* - packages/web-panel (desktop browser view)
|
|
6
|
+
* - packages/cli (cc hub search --category)
|
|
7
|
+
* - android-app (mirrored as Kotlin enum in PdhCategoryMap.kt; keep in sync)
|
|
8
|
+
*
|
|
9
|
+
* Categories are stable user-facing buckets (社交聊天 / 内容平台 / ...) — the
|
|
10
|
+
* browser sidebar keys off these, not raw adapter names. New adapters get
|
|
11
|
+
* mapped here once and surface in the right bucket on both shells.
|
|
12
|
+
*
|
|
13
|
+
* Matching is prefix-based by adapter name so we don't have to touch this
|
|
14
|
+
* file for every adapter variant (e.g. `email-imap-qq`, `email-imap-gmail`).
|
|
15
|
+
* First matching prefix wins; order in PREFIX_RULES matters for overlapping
|
|
16
|
+
* prefixes (none today, but reserve the right).
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
"use strict";
|
|
20
|
+
|
|
21
|
+
const CATEGORIES = Object.freeze([
|
|
22
|
+
"chat", // 即时通讯 / 私聊
|
|
23
|
+
"social", // 内容平台 / 短视频 / 微博
|
|
24
|
+
"email", // 邮件
|
|
25
|
+
"shopping", // 支付 / 订单 / 购物
|
|
26
|
+
"travel", // 出行 / 地图 / 票务
|
|
27
|
+
"system", // 系统数据(通讯录 / 应用列表)
|
|
28
|
+
"ai-chat", // AI 助手对话历史
|
|
29
|
+
"other", // 兜底
|
|
30
|
+
]);
|
|
31
|
+
|
|
32
|
+
const CATEGORY_LABELS = Object.freeze({
|
|
33
|
+
chat: "社交聊天",
|
|
34
|
+
social: "内容平台",
|
|
35
|
+
email: "邮件",
|
|
36
|
+
shopping: "支付订单",
|
|
37
|
+
travel: "出行",
|
|
38
|
+
system: "系统数据",
|
|
39
|
+
"ai-chat": "AI 对话",
|
|
40
|
+
other: "其他",
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Ordered prefix → category rules. First match wins.
|
|
44
|
+
// Each entry: [prefixOrExact, category].
|
|
45
|
+
// Use a trailing `*` to mean "prefix match"; absent `*` means exact match.
|
|
46
|
+
const PREFIX_RULES = Object.freeze([
|
|
47
|
+
["wechat", "chat"],
|
|
48
|
+
["messaging-*", "chat"],
|
|
49
|
+
["social-*", "social"],
|
|
50
|
+
["email-*", "email"],
|
|
51
|
+
["shopping-*", "shopping"],
|
|
52
|
+
["alipay-*", "shopping"],
|
|
53
|
+
["travel-*", "travel"],
|
|
54
|
+
["system-data*", "system"],
|
|
55
|
+
["browser-*", "system"],
|
|
56
|
+
["vscode", "system"],
|
|
57
|
+
["win-recent", "system"],
|
|
58
|
+
["git-activity", "system"],
|
|
59
|
+
["shell-history", "system"],
|
|
60
|
+
["local-files", "system"],
|
|
61
|
+
["ai-chat-*", "ai-chat"],
|
|
62
|
+
]);
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Map an adapter name to its category.
|
|
66
|
+
* @param {string} adapterName e.g. "social-bilibili" / "email-imap-qq" / "wechat"
|
|
67
|
+
* @returns {string} category id from CATEGORIES (never throws — falls back to "other")
|
|
68
|
+
*/
|
|
69
|
+
function getCategory(adapterName) {
|
|
70
|
+
if (typeof adapterName !== "string" || adapterName.length === 0) return "other";
|
|
71
|
+
for (const [rule, cat] of PREFIX_RULES) {
|
|
72
|
+
if (rule.endsWith("*")) {
|
|
73
|
+
const prefix = rule.slice(0, -1);
|
|
74
|
+
if (adapterName.startsWith(prefix)) return cat;
|
|
75
|
+
} else if (adapterName === rule) {
|
|
76
|
+
return cat;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return "other";
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Group a list of adapter names by category. Returns
|
|
84
|
+
* `{ [category]: string[] }` with empty categories omitted.
|
|
85
|
+
*/
|
|
86
|
+
function groupByCategory(adapterNames) {
|
|
87
|
+
const out = {};
|
|
88
|
+
for (const name of adapterNames || []) {
|
|
89
|
+
const c = getCategory(name);
|
|
90
|
+
(out[c] ||= []).push(name);
|
|
91
|
+
}
|
|
92
|
+
return out;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
module.exports = {
|
|
96
|
+
CATEGORIES,
|
|
97
|
+
CATEGORY_LABELS,
|
|
98
|
+
PREFIX_RULES,
|
|
99
|
+
getCategory,
|
|
100
|
+
groupByCategory,
|
|
101
|
+
};
|
package/lib/index.js
CHANGED
|
@@ -58,6 +58,14 @@ const entityResolver = require("./entity-resolver");
|
|
|
58
58
|
const analysisSkills = require("./analysis-skills");
|
|
59
59
|
const mobileExtractor = require("./mobile-extractor");
|
|
60
60
|
const systemDataAndroid = require("./adapters/system-data-android");
|
|
61
|
+
const browserHistoryChrome = require("./adapters/browser-history-chrome");
|
|
62
|
+
const browserHistoryEdge = require("./adapters/browser-history-edge");
|
|
63
|
+
const vscodeAdapter = require("./adapters/vscode");
|
|
64
|
+
const winRecentAdapter = require("./adapters/win-recent");
|
|
65
|
+
const gitActivityAdapter = require("./adapters/git-activity");
|
|
66
|
+
const shellHistoryAdapter = require("./adapters/shell-history");
|
|
67
|
+
const localFilesAdapter = require("./adapters/local-files");
|
|
68
|
+
const categories = require("./categories");
|
|
61
69
|
|
|
62
70
|
module.exports = {
|
|
63
71
|
// Constants / enums
|
|
@@ -87,6 +95,7 @@ module.exports = {
|
|
|
87
95
|
TARGET_SCHEMA_VERSION: migrations.TARGET_VERSION,
|
|
88
96
|
applyMigrations: migrations.applyMigrations,
|
|
89
97
|
getSchemaVersion: migrations.getSchemaVersion,
|
|
98
|
+
getFtsMode: migrations.getFtsMode,
|
|
90
99
|
|
|
91
100
|
// Key providers
|
|
92
101
|
KEY_HEX_LEN: keyProviders.KEY_HEX_LEN,
|
|
@@ -262,6 +271,51 @@ module.exports = {
|
|
|
262
271
|
ingestSystemDataAndroidSnapshot:
|
|
263
272
|
systemDataAndroid.ingestSystemDataAndroidSnapshot,
|
|
264
273
|
|
|
274
|
+
// Phase 17 (2026-05-24) — desktop Chrome local browser history + bookmarks.
|
|
275
|
+
// SQLite snapshot copy + Bookmarks JSON parse; no network, no extension.
|
|
276
|
+
BrowserHistoryChromeAdapter: browserHistoryChrome.BrowserHistoryChromeAdapter,
|
|
277
|
+
BROWSER_HISTORY_CHROME_NAME: browserHistoryChrome.BROWSER_HISTORY_CHROME_NAME,
|
|
278
|
+
BROWSER_HISTORY_CHROME_VERSION: browserHistoryChrome.BROWSER_HISTORY_CHROME_VERSION,
|
|
279
|
+
defaultChromeProfileDir: browserHistoryChrome.defaultChromeProfileDir,
|
|
280
|
+
|
|
281
|
+
// Edge — Chromium under the hood, same readers, different profile root.
|
|
282
|
+
BrowserHistoryEdgeAdapter: browserHistoryEdge.BrowserHistoryEdgeAdapter,
|
|
283
|
+
BROWSER_HISTORY_EDGE_NAME: browserHistoryEdge.BROWSER_HISTORY_EDGE_NAME,
|
|
284
|
+
BROWSER_HISTORY_EDGE_VERSION: browserHistoryEdge.BROWSER_HISTORY_EDGE_VERSION,
|
|
285
|
+
|
|
286
|
+
// VS Code — workspace history + global terminal command/dir history.
|
|
287
|
+
VSCodeAdapter: vscodeAdapter.VSCodeAdapter,
|
|
288
|
+
VSCODE_NAME: vscodeAdapter.VSCODE_NAME,
|
|
289
|
+
VSCODE_VERSION: vscodeAdapter.VSCODE_VERSION,
|
|
290
|
+
defaultVscodeRoot: vscodeAdapter.defaultVscodeRoot,
|
|
291
|
+
|
|
292
|
+
// Windows Recent — .lnk shortcut list from %APPDATA%\Microsoft\Windows\Recent.
|
|
293
|
+
// Cross-application "what did I open and when" timeline (Win-only adapter).
|
|
294
|
+
WinRecentAdapter: winRecentAdapter.WinRecentAdapter,
|
|
295
|
+
WIN_RECENT_NAME: winRecentAdapter.WIN_RECENT_NAME,
|
|
296
|
+
WIN_RECENT_VERSION: winRecentAdapter.WIN_RECENT_VERSION,
|
|
297
|
+
defaultWinRecentDir: winRecentAdapter.defaultRecentDir,
|
|
298
|
+
|
|
299
|
+
// Phase 18 — git activity (commit timeline across local code repos).
|
|
300
|
+
GitActivityAdapter: gitActivityAdapter.GitActivityAdapter,
|
|
301
|
+
GIT_ACTIVITY_NAME: gitActivityAdapter.GIT_ACTIVITY_NAME,
|
|
302
|
+
GIT_ACTIVITY_VERSION: gitActivityAdapter.GIT_ACTIVITY_VERSION,
|
|
303
|
+
defaultCodeRoots: gitActivityAdapter.defaultCodeRoots,
|
|
304
|
+
|
|
305
|
+
// Phase 18 — shell history (PowerShell / bash / zsh command timelines).
|
|
306
|
+
ShellHistoryAdapter: shellHistoryAdapter.ShellHistoryAdapter,
|
|
307
|
+
SHELL_HISTORY_NAME: shellHistoryAdapter.SHELL_HISTORY_NAME,
|
|
308
|
+
SHELL_HISTORY_VERSION: shellHistoryAdapter.SHELL_HISTORY_VERSION,
|
|
309
|
+
defaultShellHistorySources: shellHistoryAdapter.defaultHistorySources,
|
|
310
|
+
|
|
311
|
+
// Phase 18 — local files (file walk under Documents / Desktop / Downloads /
|
|
312
|
+
// Pictures / Videos / Music). Cross-application "what files do I have"
|
|
313
|
+
// timeline rooted in mtime, with app-cache excludes baked in.
|
|
314
|
+
LocalFilesAdapter: localFilesAdapter.LocalFilesAdapter,
|
|
315
|
+
LOCAL_FILES_NAME: localFilesAdapter.LOCAL_FILES_NAME,
|
|
316
|
+
LOCAL_FILES_VERSION: localFilesAdapter.LOCAL_FILES_VERSION,
|
|
317
|
+
defaultLocalFileRoots: localFilesAdapter.defaultRoots,
|
|
318
|
+
|
|
265
319
|
// Phase 6 — AlipayBillAdapter (CSV import)
|
|
266
320
|
AlipayBillAdapter: alipayBillAdapter.AlipayBillAdapter,
|
|
267
321
|
ALIPAY_BILL_NAME: alipayBillAdapter.ALIPAY_BILL_NAME,
|
|
@@ -273,4 +327,11 @@ module.exports = {
|
|
|
273
327
|
alipayCounterpartyToPersonId: alipayBillAdapter.alipayCounterpartyToPersonId,
|
|
274
328
|
ALIPAY_KNOWN_MERCHANTS: alipayBillAdapter.ALIPAY_KNOWN_MERCHANTS,
|
|
275
329
|
mapAlipayTypeToSubtype: alipayBillAdapter.mapAlipayTypeToSubtype,
|
|
330
|
+
|
|
331
|
+
// Phase 16 — Vault Browser shared taxonomy (categories + adapter mapping)
|
|
332
|
+
CATEGORIES: categories.CATEGORIES,
|
|
333
|
+
CATEGORY_LABELS: categories.CATEGORY_LABELS,
|
|
334
|
+
PDH_PREFIX_RULES: categories.PREFIX_RULES,
|
|
335
|
+
getAdapterCategory: categories.getCategory,
|
|
336
|
+
groupAdaptersByCategory: categories.groupByCategory,
|
|
276
337
|
};
|