@chainlesschain/personal-data-hub 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +241 -0
  2. package/__tests__/adapter-spec.test.js +78 -0
  3. package/__tests__/adapters/email-adapter.test.js +605 -0
  4. package/__tests__/adapters/email-imap-session.test.js +334 -0
  5. package/__tests__/adapters/email-parser.test.js +244 -0
  6. package/__tests__/adapters/email-providers.test.js +84 -0
  7. package/__tests__/analysis.test.js +302 -0
  8. package/__tests__/batch.test.js +133 -0
  9. package/__tests__/bridges-cc-kg.test.js +231 -0
  10. package/__tests__/bridges-cc-llm.test.js +191 -0
  11. package/__tests__/bridges-cc-rag.test.js +162 -0
  12. package/__tests__/ids.test.js +45 -0
  13. package/__tests__/key-providers.test.js +126 -0
  14. package/__tests__/kg-derive.test.js +219 -0
  15. package/__tests__/llm-client.test.js +122 -0
  16. package/__tests__/mock-adapter.test.js +93 -0
  17. package/__tests__/prompt-builder.test.js +204 -0
  18. package/__tests__/query-parser.test.js +150 -0
  19. package/__tests__/rag-derive.test.js +169 -0
  20. package/__tests__/registry.test.js +304 -0
  21. package/__tests__/schemas.test.js +331 -0
  22. package/__tests__/vault.test.js +506 -0
  23. package/lib/adapter-spec.js +155 -0
  24. package/lib/adapters/email-imap/email-adapter.js +398 -0
  25. package/lib/adapters/email-imap/email-parser.js +177 -0
  26. package/lib/adapters/email-imap/imap-session.js +294 -0
  27. package/lib/adapters/email-imap/index.js +26 -0
  28. package/lib/adapters/email-imap/providers.js +111 -0
  29. package/lib/analysis.js +226 -0
  30. package/lib/batch.js +123 -0
  31. package/lib/bridges/cc-kg-sink.js +264 -0
  32. package/lib/bridges/cc-llm-adapter.js +169 -0
  33. package/lib/bridges/cc-rag-sink.js +118 -0
  34. package/lib/bridges/index.js +44 -0
  35. package/lib/constants.js +92 -0
  36. package/lib/ids.js +103 -0
  37. package/lib/index.js +141 -0
  38. package/lib/key-providers.js +146 -0
  39. package/lib/kg-derive.js +214 -0
  40. package/lib/llm-client.js +171 -0
  41. package/lib/migrations.js +246 -0
  42. package/lib/mock-adapter.js +199 -0
  43. package/lib/prompt-builder.js +205 -0
  44. package/lib/query-parser.js +250 -0
  45. package/lib/rag-derive.js +186 -0
  46. package/lib/registry.js +398 -0
  47. package/lib/schemas.js +379 -0
  48. package/lib/vault.js +883 -0
  49. package/package.json +63 -0
  50. package/vitest.config.js +10 -0
@@ -0,0 +1,294 @@
1
+ /**
2
+ * Thin async-iterator wrapper around imapflow.
3
+ *
4
+ * The hub never imports imapflow directly — this file is the single
5
+ * static require boundary. EmailAdapter takes an ImapSession instance
6
+ * (or any object with the same surface) so tests can inject a mock
7
+ * without imapflow even being installed.
8
+ *
9
+ * Errors are surfaced with normalized `code`:
10
+ * AUTH_FAILED login rejected
11
+ * CONNECTION_FAILED TCP / TLS failure
12
+ * MAILBOX_NOT_FOUND folder doesn't exist
13
+ */
14
+
15
+ "use strict";
16
+
17
+ class ImapAuthFailedError extends Error {
18
+ constructor(message, cause) {
19
+ super(message || "IMAP authentication failed");
20
+ this.name = "ImapAuthFailedError";
21
+ this.code = "AUTH_FAILED";
22
+ if (cause) this.cause = cause;
23
+ }
24
+ }
25
+
26
+ class ImapConnectionFailedError extends Error {
27
+ constructor(message, cause) {
28
+ super(message || "IMAP connection failed");
29
+ this.name = "ImapConnectionFailedError";
30
+ this.code = "CONNECTION_FAILED";
31
+ if (cause) this.cause = cause;
32
+ }
33
+ }
34
+
35
+ class ImapMailboxNotFoundError extends Error {
36
+ constructor(name) {
37
+ super(`Mailbox not found: ${name}`);
38
+ this.name = "ImapMailboxNotFoundError";
39
+ this.code = "MAILBOX_NOT_FOUND";
40
+ this.mailbox = name;
41
+ }
42
+ }
43
+
44
+ class ImapSession {
45
+ constructor(opts) {
46
+ if (!opts || typeof opts !== "object") {
47
+ throw new Error("ImapSession: opts required");
48
+ }
49
+ for (const f of ["host", "port", "user", "authCode"]) {
50
+ if (opts[f] === undefined || opts[f] === null || opts[f] === "") {
51
+ throw new Error(`ImapSession: opts.${f} required`);
52
+ }
53
+ }
54
+ this.host = String(opts.host);
55
+ this.port = Number(opts.port);
56
+ this.secure = opts.secure !== false;
57
+ this.user = String(opts.user);
58
+ this.authCode = String(opts.authCode);
59
+ this.connectTimeoutMs = Number.isFinite(opts.connectTimeoutMs)
60
+ ? opts.connectTimeoutMs
61
+ : 15000;
62
+ this._factory = typeof opts.imapFlowFactory === "function" ? opts.imapFlowFactory : null;
63
+ this._client = null;
64
+ }
65
+
66
+ async connect() {
67
+ let ImapFlowCtor;
68
+ if (this._factory) {
69
+ ImapFlowCtor = this._factory;
70
+ } else {
71
+ try {
72
+ const mod = require("imapflow");
73
+ ImapFlowCtor = mod.ImapFlow || mod.default || mod;
74
+ } catch (err) {
75
+ throw new ImapConnectionFailedError(
76
+ "imapflow is not installed. Run `npm install imapflow` in the workspace.",
77
+ err
78
+ );
79
+ }
80
+ }
81
+
82
+ const ctorOpts = {
83
+ host: this.host,
84
+ port: this.port,
85
+ secure: this.secure,
86
+ auth: { user: this.user, pass: this.authCode },
87
+ logger: false,
88
+ };
89
+ let client;
90
+ try {
91
+ // Real imapflow's ImapFlow is an ES class → must use `new`. Test
92
+ // injection sometimes passes an arrow factory which `new` rejects
93
+ // (TypeError: not a constructor). Try constructor first, fall
94
+ // through to plain call so both shapes work.
95
+ try {
96
+ client = new ImapFlowCtor(ctorOpts);
97
+ } catch (ctorErr) {
98
+ if (ctorErr instanceof TypeError) {
99
+ client = ImapFlowCtor(ctorOpts);
100
+ } else {
101
+ throw ctorErr;
102
+ }
103
+ }
104
+ } catch (err) {
105
+ throw new ImapConnectionFailedError(
106
+ `Failed to construct IMAP client: ${err && err.message ? err.message : err}`,
107
+ err
108
+ );
109
+ }
110
+
111
+ let connectPromise;
112
+ try {
113
+ connectPromise = client.connect();
114
+ } catch (err) {
115
+ throw new ImapConnectionFailedError(
116
+ `IMAP connect threw synchronously: ${err && err.message ? err.message : err}`,
117
+ err
118
+ );
119
+ }
120
+
121
+ let timer = null;
122
+ const timeoutPromise = new Promise((_, reject) => {
123
+ timer = setTimeout(() => {
124
+ reject(new ImapConnectionFailedError(
125
+ `IMAP connect timed out after ${this.connectTimeoutMs}ms`
126
+ ));
127
+ }, this.connectTimeoutMs);
128
+ });
129
+
130
+ try {
131
+ await Promise.race([connectPromise, timeoutPromise]);
132
+ } catch (err) {
133
+ try { await client.close(); } catch (_e) {}
134
+ const msg = (err && err.message ? err.message : String(err)).toLowerCase();
135
+ if (msg.includes("auth") || msg.includes("invalid credentials") || msg.includes("login") || msg.includes("rejected")) {
136
+ throw new ImapAuthFailedError(err && err.message, err);
137
+ }
138
+ if (err && err.code === "AUTH_FAILED") throw err;
139
+ if (err && err.code === "CONNECTION_FAILED") throw err;
140
+ throw new ImapConnectionFailedError(
141
+ `IMAP connect failed: ${err && err.message ? err.message : err}`,
142
+ err
143
+ );
144
+ } finally {
145
+ if (timer) clearTimeout(timer);
146
+ }
147
+
148
+ this._client = client;
149
+ }
150
+
151
+ _requireConnected() {
152
+ if (!this._client) {
153
+ throw new Error("ImapSession: not connected; call connect() first.");
154
+ }
155
+ return this._client;
156
+ }
157
+
158
+ async openMailbox(name) {
159
+ const c = this._requireConnected();
160
+ if (typeof name !== "string" || !name) {
161
+ throw new Error("openMailbox: name must be a non-empty string");
162
+ }
163
+ let info;
164
+ try {
165
+ info = await c.mailboxOpen(name);
166
+ } catch (err) {
167
+ const msg = (err && err.message ? err.message : "").toLowerCase();
168
+ if (msg.includes("doesn't exist") || msg.includes("nonexistent") || msg.includes("not found")) {
169
+ throw new ImapMailboxNotFoundError(name);
170
+ }
171
+ throw err;
172
+ }
173
+ return {
174
+ uidValidity: info.uidValidity,
175
+ uidNext: info.uidNext,
176
+ exists: info.exists,
177
+ };
178
+ }
179
+
180
+ async listMailboxes() {
181
+ const c = this._requireConnected();
182
+ const items = await c.list();
183
+ if (!Array.isArray(items)) return [];
184
+ return items.map((m) => ({
185
+ name: m.name,
186
+ path: m.path,
187
+ specialUse: m.specialUse || null,
188
+ flags: Array.isArray(m.flags) ? m.flags : [],
189
+ }));
190
+ }
191
+
192
+ async *fetchEnvelopesSince(sinceUid = 0) {
193
+ const c = this._requireConnected();
194
+ const baseUid = Number.isFinite(sinceUid) && sinceUid > 0 ? sinceUid : 0;
195
+ const range = `${baseUid + 1}:*`;
196
+ const fields = { envelope: true, internalDate: true, flags: true, size: true, uid: true };
197
+ const iter = c.fetch(range, fields, { uid: true });
198
+ for await (const msg of iter) {
199
+ yield this._toEnvelopeRow(msg);
200
+ }
201
+ }
202
+
203
+ /**
204
+ * Like fetchEnvelopesSince but also pulls the full RFC822 source of
205
+ * each message (`source: true`). Phase 5.2 — the EmailAdapter feeds
206
+ * these into the mailparser-based email-parser to extract body text,
207
+ * HTML, attachments metadata, etc.
208
+ *
209
+ * Memory note: source bytes pile up in memory until each generator
210
+ * consumer awaits the next iteration. For huge mailboxes the registry's
211
+ * batchSize (default 100) acts as the natural back-pressure — every
212
+ * `batchSize` messages get committed to vault before the next batch
213
+ * pulls. Phase 5.5 PDF-decryption work will switch to per-attachment
214
+ * download for emails > N MB.
215
+ *
216
+ * Yields `{ ...envelopeRow, source: Buffer }`.
217
+ *
218
+ * @param {number} sinceUid
219
+ * @returns {AsyncGenerator}
220
+ */
221
+ async *fetchFullSince(sinceUid = 0) {
222
+ const c = this._requireConnected();
223
+ const baseUid = Number.isFinite(sinceUid) && sinceUid > 0 ? sinceUid : 0;
224
+ const range = `${baseUid + 1}:*`;
225
+ const fields = {
226
+ envelope: true,
227
+ internalDate: true,
228
+ flags: true,
229
+ size: true,
230
+ uid: true,
231
+ source: true, // raw RFC822 bytes
232
+ };
233
+ const iter = c.fetch(range, fields, { uid: true });
234
+ for await (const msg of iter) {
235
+ const row = this._toEnvelopeRow(msg);
236
+ // imapflow returns source as a Buffer; defensively coerce.
237
+ row.source = Buffer.isBuffer(msg.source)
238
+ ? msg.source
239
+ : msg.source
240
+ ? Buffer.from(msg.source)
241
+ : Buffer.alloc(0);
242
+ yield row;
243
+ }
244
+ }
245
+
246
+ async close() {
247
+ if (!this._client) return;
248
+ try {
249
+ await this._client.logout();
250
+ } catch (_err) {}
251
+ try {
252
+ await this._client.close();
253
+ } catch (_err) {}
254
+ this._client = null;
255
+ }
256
+
257
+ _toEnvelopeRow(msg) {
258
+ const env = msg.envelope || {};
259
+ // imapflow returns flags as a Set; older shapes use Array. Cover both.
260
+ let flags = [];
261
+ if (msg.flags) {
262
+ if (msg.flags instanceof Set || Array.isArray(msg.flags) || typeof msg.flags[Symbol.iterator] === "function") {
263
+ flags = Array.from(msg.flags);
264
+ }
265
+ }
266
+ return {
267
+ uid: msg.uid,
268
+ internalDate: msg.internalDate instanceof Date ? msg.internalDate : new Date(msg.internalDate || 0),
269
+ flags,
270
+ messageId: typeof env.messageId === "string" ? env.messageId : "",
271
+ subject: typeof env.subject === "string" ? env.subject : "",
272
+ from: this._addrs(env.from),
273
+ to: this._addrs(env.to),
274
+ cc: this._addrs(env.cc),
275
+ date: env.date instanceof Date ? env.date : env.date ? new Date(env.date) : null,
276
+ size: typeof msg.size === "number" ? msg.size : 0,
277
+ };
278
+ }
279
+
280
+ _addrs(list) {
281
+ if (!Array.isArray(list)) return [];
282
+ return list.map((a) => ({
283
+ name: a && a.name ? String(a.name) : undefined,
284
+ address: a && a.address ? String(a.address) : "",
285
+ }));
286
+ }
287
+ }
288
+
289
+ module.exports = {
290
+ ImapSession,
291
+ ImapAuthFailedError,
292
+ ImapConnectionFailedError,
293
+ ImapMailboxNotFoundError,
294
+ };
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+
3
+ const { EmailAdapter, parseWatermark, formatWatermark, NAME, VERSION } = require("./email-adapter");
4
+ const { PROVIDERS, resolveProvider } = require("./providers");
5
+ const {
6
+ ImapSession,
7
+ ImapAuthFailedError,
8
+ ImapConnectionFailedError,
9
+ ImapMailboxNotFoundError,
10
+ } = require("./imap-session");
11
+ const { parseRawEmail } = require("./email-parser");
12
+
13
+ module.exports = {
14
+ EmailAdapter,
15
+ EMAIL_ADAPTER_NAME: NAME,
16
+ EMAIL_ADAPTER_VERSION: VERSION,
17
+ parseWatermark,
18
+ formatWatermark,
19
+ EMAIL_PROVIDERS: PROVIDERS,
20
+ resolveEmailProvider: resolveProvider,
21
+ ImapSession,
22
+ ImapAuthFailedError,
23
+ ImapConnectionFailedError,
24
+ ImapMailboxNotFoundError,
25
+ parseRawEmail,
26
+ };
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Email provider presets — mainland-China-first.
3
+ *
4
+ * Each entry tells the adapter where to connect + which folders are
5
+ * worth syncing by default. Users can override host/port/tls via the
6
+ * adapter constructor (provider="custom").
7
+ *
8
+ * Authentication is consistently the per-provider "authorization code"
9
+ * pattern (per design doc §3 OQ-1). The user goes to their email
10
+ * web console, enables IMAP/SMTP, copies the auth code, pastes it
11
+ * into the adapter config. The adapter never sees the user's actual
12
+ * login password.
13
+ */
14
+
15
+ "use strict";
16
+
17
+ const PROVIDERS = Object.freeze({
18
+ qq: {
19
+ id: "qq",
20
+ displayName: "QQ 邮箱",
21
+ host: "imap.qq.com",
22
+ port: 993,
23
+ secure: true,
24
+ setupUrl: "https://mail.qq.com/cgi-bin/frame_html?sid=&r=&t=client",
25
+ defaultFolders: ["INBOX", "Sent Messages"],
26
+ authNote: "Use 授权码 (设置 → 账户 → 开启 IMAP/SMTP), NOT your QQ login password.",
27
+ },
28
+ "189": {
29
+ id: "189",
30
+ displayName: "189 邮箱",
31
+ host: "imap.189.cn",
32
+ port: 993,
33
+ secure: true,
34
+ setupUrl: "https://mail.189.cn/",
35
+ defaultFolders: ["INBOX", "已发送"],
36
+ authNote: "Use 授权码 (设置 → 第三方客户端授权码).",
37
+ },
38
+ "163": {
39
+ id: "163",
40
+ displayName: "网易邮箱 (163/126)",
41
+ host: "imap.163.com",
42
+ port: 993,
43
+ secure: true,
44
+ setupUrl: "https://mail.163.com/",
45
+ defaultFolders: ["INBOX", "已发送"],
46
+ authNote: "Use 授权码 (设置 → POP3/SMTP/IMAP).",
47
+ },
48
+ outlook: {
49
+ id: "outlook",
50
+ displayName: "Outlook / Hotmail",
51
+ host: "outlook.office365.com",
52
+ port: 993,
53
+ secure: true,
54
+ setupUrl: "https://outlook.live.com/mail/0/options/mail/forwarding",
55
+ defaultFolders: ["INBOX", "Sent"],
56
+ authNote: "App password (account.microsoft.com/security) — basic-auth deprecation pending; v1 will switch to OAuth2.",
57
+ },
58
+ gmail: {
59
+ id: "gmail",
60
+ displayName: "Gmail",
61
+ host: "imap.gmail.com",
62
+ port: 993,
63
+ secure: true,
64
+ setupUrl: "https://myaccount.google.com/apppasswords",
65
+ defaultFolders: ["INBOX", "[Gmail]/Sent Mail"],
66
+ authNote: "App password (myaccount.google.com/apppasswords). OAuth2 in v2.",
67
+ },
68
+ });
69
+
70
+ function resolveProvider(account) {
71
+ if (!account || typeof account !== "object") {
72
+ throw new Error("resolveProvider: account required");
73
+ }
74
+ const id = account.provider;
75
+ if (id === "custom") {
76
+ if (typeof account.host !== "string" || !account.host) {
77
+ throw new Error("resolveProvider: custom provider requires host");
78
+ }
79
+ return {
80
+ host: account.host,
81
+ port: Number.isInteger(account.port) ? account.port : 993,
82
+ secure: account.secure !== false,
83
+ folders: Array.isArray(account.folders) && account.folders.length > 0
84
+ ? account.folders
85
+ : ["INBOX"],
86
+ displayName: account.displayName || account.host,
87
+ providerId: "custom",
88
+ };
89
+ }
90
+ const preset = PROVIDERS[id];
91
+ if (!preset) {
92
+ throw new Error(
93
+ `resolveProvider: unknown provider "${id}". Known: ${Object.keys(PROVIDERS).join(", ")}, or use "custom".`
94
+ );
95
+ }
96
+ return {
97
+ host: account.host || preset.host,
98
+ port: Number.isInteger(account.port) ? account.port : preset.port,
99
+ secure: typeof account.secure === "boolean" ? account.secure : preset.secure,
100
+ folders: Array.isArray(account.folders) && account.folders.length > 0
101
+ ? account.folders
102
+ : preset.defaultFolders,
103
+ displayName: account.displayName || preset.displayName,
104
+ providerId: preset.id,
105
+ };
106
+ }
107
+
108
+ module.exports = {
109
+ PROVIDERS,
110
+ resolveProvider,
111
+ };
@@ -0,0 +1,226 @@
1
+ /**
2
+ * AnalysisEngine — natural-language Q&A skeleton for Personal Data Hub.
3
+ *
4
+ * Mirrors §8 of docs/design/Personal_Data_Hub_Architecture.md. The flow:
5
+ *
6
+ * ask(question) →
7
+ * 1. parseQuery(question) — time window + filters + intent
8
+ * 2. gatherFacts(parsed) — vault.queryEvents with filters
9
+ * + optional ragRetriever(question) for additional context
10
+ * 3. buildPrompt(question, facts) → messages
11
+ * 4. llm.chat(messages) → text
12
+ * 5. parseCitations(text) — extract bracketed ids
13
+ * 6. validateCitations(...) — known vs hallucinated
14
+ * 7. vault.audit(...) — record query + facts cited
15
+ * 8. return { answer, citations, facts, hallucinatedCitations, ... }
16
+ *
17
+ * Privacy invariant (§11.2): the engine refuses to call a non-local LLM
18
+ * unless the caller passes acceptNonLocal: true. This is a hard runtime
19
+ * gate — every layer downstream of the engine assumes locality.
20
+ */
21
+
22
+ "use strict";
23
+
24
+ const { parseQuery } = require("./query-parser");
25
+ const {
26
+ buildPrompt,
27
+ parseCitations,
28
+ validateCitations,
29
+ DEFAULT_SYSTEM_PROMPT,
30
+ } = require("./prompt-builder");
31
+ const { toError } = require("./adapter-spec");
32
+
33
+ const DEFAULT_MAX_FACTS = 80;
34
+ const DEFAULT_MAX_QUERY_LIMIT = 200;
35
+
36
+ class AnalysisEngine {
37
+ /**
38
+ * @param {object} opts
39
+ * @param {import("./vault").LocalVault} opts.vault
40
+ * @param {{chat: Function, isLocal: boolean, name?: string}} opts.llm
41
+ * @param {(question: string, parsed: object) => Promise<Array<{text: string, metadata: object}>>} [opts.ragRetriever]
42
+ * @param {number} [opts.maxFacts=80]
43
+ * @param {number} [opts.maxQueryLimit=200]
44
+ * @param {string} [opts.systemPrompt]
45
+ */
46
+ constructor(opts) {
47
+ if (!opts || typeof opts !== "object") throw new Error("AnalysisEngine: opts required");
48
+ if (!opts.vault) throw new Error("AnalysisEngine: opts.vault required");
49
+ if (!opts.llm || typeof opts.llm.chat !== "function") {
50
+ throw new Error("AnalysisEngine: opts.llm with .chat() required");
51
+ }
52
+ if (typeof opts.llm.isLocal !== "boolean") {
53
+ throw new Error("AnalysisEngine: opts.llm.isLocal must be declared (true/false)");
54
+ }
55
+
56
+ this.vault = opts.vault;
57
+ this.llm = opts.llm;
58
+ this.ragRetriever = typeof opts.ragRetriever === "function" ? opts.ragRetriever : null;
59
+ this.maxFacts = Number.isInteger(opts.maxFacts) && opts.maxFacts > 0 ? opts.maxFacts : DEFAULT_MAX_FACTS;
60
+ this.maxQueryLimit =
61
+ Number.isInteger(opts.maxQueryLimit) && opts.maxQueryLimit > 0
62
+ ? opts.maxQueryLimit
63
+ : DEFAULT_MAX_QUERY_LIMIT;
64
+ this.systemPrompt = opts.systemPrompt || DEFAULT_SYSTEM_PROMPT;
65
+ }
66
+
67
+ /**
68
+ * Ask a natural-language question.
69
+ *
70
+ * @param {string} question
71
+ * @param {object} [options]
72
+ * @param {boolean} [options.acceptNonLocal=false] required true for cloud LLMs
73
+ * @param {number} [options.now]
74
+ * @param {boolean} [options.skipAudit=false]
75
+ * @returns {Promise<AskResult>}
76
+ *
77
+ * @typedef {object} AskResult
78
+ * @property {string} answer
79
+ * @property {string[]} citations event ids cited AND known
80
+ * @property {string[]} hallucinatedCitations event ids cited but not in facts
81
+ * @property {Array<object>} facts facts handed to the LLM
82
+ * @property {object} parsed parseQuery output
83
+ * @property {object} usage { promptTokens, completionTokens, totalTokens }
84
+ * @property {string} model
85
+ * @property {number} durationMs
86
+ * @property {string|null} warning "no-facts" | "hallucinated-citations" | null
87
+ */
88
+ async ask(question, options = {}) {
89
+ if (typeof question !== "string" || question.length === 0) {
90
+ throw new Error("AnalysisEngine.ask: question must be a non-empty string");
91
+ }
92
+ if (!this.llm.isLocal && !options.acceptNonLocal) {
93
+ throw new Error(
94
+ "AnalysisEngine.ask: LLM declared non-local; pass acceptNonLocal: true to opt in. " +
95
+ "(Personal Data Hub default policy: all inference stays on-device.)"
96
+ );
97
+ }
98
+
99
+ const startedAt = Date.now();
100
+ const parsed = parseQuery(question, { now: options.now });
101
+
102
+ // Gather facts from the vault.
103
+ const facts = this._gatherFacts(parsed);
104
+
105
+ // Optional RAG augmentation.
106
+ let ragContext = [];
107
+ if (this.ragRetriever) {
108
+ try {
109
+ const docs = await this.ragRetriever(question, parsed);
110
+ if (Array.isArray(docs)) {
111
+ // RAG retriever returns docs with metadata.id — fetch matching entities
112
+ // from vault for citation tracking.
113
+ for (const doc of docs) {
114
+ if (!doc || !doc.id) continue;
115
+ const e = this.vault.getEvent(doc.id);
116
+ if (e && !facts.find((f) => f.id === e.id)) {
117
+ facts.push(e);
118
+ ragContext.push(doc.id);
119
+ }
120
+ }
121
+ }
122
+ } catch (err) {
123
+ // RAG failure shouldn't abort Q&A — log and continue with direct facts.
124
+ const e = toError(err, "ragRetriever");
125
+ try {
126
+ this.vault.audit("analysis.rag_failed", question, { error: e.message });
127
+ } catch (_e) {}
128
+ }
129
+ }
130
+
131
+ // Build prompt.
132
+ const { messages, factIds, factCount, truncated } = buildPrompt({
133
+ question,
134
+ facts,
135
+ systemPrompt: this.systemPrompt,
136
+ intent: parsed.intent,
137
+ timeWindow: parsed.timeWindow,
138
+ maxFacts: this.maxFacts,
139
+ });
140
+
141
+ // Call LLM.
142
+ let llmResp;
143
+ try {
144
+ llmResp = await this.llm.chat(messages, {
145
+ temperature: 0.2,
146
+ purpose: "personal-data-hub.analysis.ask",
147
+ });
148
+ } catch (err) {
149
+ const e = toError(err, "llm.chat");
150
+ try {
151
+ this.vault.audit("analysis.llm_failed", question, { error: e.message });
152
+ } catch (_e) {}
153
+ throw e;
154
+ }
155
+
156
+ const answer = (llmResp && typeof llmResp.text === "string") ? llmResp.text : "";
157
+
158
+ // Parse + validate citations.
159
+ const cited = parseCitations(answer);
160
+ const { known, unknown } = validateCitations(cited, factIds);
161
+
162
+ // Warnings.
163
+ let warning = null;
164
+ if (factCount === 0) warning = "no-facts";
165
+ else if (unknown.length > 0) warning = "hallucinated-citations";
166
+
167
+ const durationMs = Date.now() - startedAt;
168
+ const usage = llmResp.usage || {};
169
+
170
+ if (!options.skipAudit) {
171
+ try {
172
+ this.vault.audit("analysis.ask", question, {
173
+ factCount,
174
+ truncated,
175
+ citationsKnown: known.length,
176
+ citationsUnknown: unknown.length,
177
+ warning,
178
+ durationMs,
179
+ model: this.llm.name || (llmResp && llmResp.model),
180
+ });
181
+ } catch (_e) {}
182
+ }
183
+
184
+ return {
185
+ answer,
186
+ citations: known,
187
+ hallucinatedCitations: unknown,
188
+ facts,
189
+ ragContextIds: ragContext,
190
+ parsed,
191
+ usage,
192
+ model: this.llm.name || (llmResp && llmResp.model) || "unknown",
193
+ durationMs,
194
+ warning,
195
+ };
196
+ }
197
+
198
+ // ─── Internals ─────────────────────────────────────────────────────
199
+
200
+ _gatherFacts(parsed) {
201
+ // Deliberately do NOT pass parsed.filters.subtype as a vault filter:
202
+ // the keyword heuristic (`order` vs `payment` vs `transfer`) is too
203
+ // crude to reliably narrow without false negatives. E.g. a user
204
+ // asking "在淘宝花了多少" wants taobao-adapter ORDER events; the
205
+ // keyword parser picks `payment` and would over-filter to zero rows.
206
+ // Instead we filter by adapter + time window (both reliable) and
207
+ // pass the subtype/intent into the prompt as a HINT for the LLM to
208
+ // apply on prose. The LLM is good at filtering; SQL keyword guessing
209
+ // is brittle.
210
+ const q = {
211
+ limit: this.maxQueryLimit,
212
+ };
213
+ if (parsed.filters && parsed.filters.adapter) q.adapter = parsed.filters.adapter;
214
+ if (parsed.timeWindow) {
215
+ if (Number.isFinite(parsed.timeWindow.since)) q.since = parsed.timeWindow.since;
216
+ if (Number.isFinite(parsed.timeWindow.until)) q.until = parsed.timeWindow.until;
217
+ }
218
+ return this.vault.queryEvents(q);
219
+ }
220
+ }
221
+
222
+ module.exports = {
223
+ AnalysisEngine,
224
+ DEFAULT_MAX_FACTS,
225
+ DEFAULT_MAX_QUERY_LIMIT,
226
+ };